diff --git a/go/LICENSE.txt b/go/LICENSE.txt deleted file mode 100644 index 57310329835da..0000000000000 --- a/go/LICENSE.txt +++ /dev/null @@ -1,1791 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- - -src/arrow/util (some portions): Apache 2.0, and 3-clause BSD - -Some portions of this module are derived from code in the Chromium project, -copyright (c) Google inc and (c) The Chromium Authors and licensed under the -Apache 2.0 License or the under the 3-clause BSD license: - - Copyright (c) 2013 The Chromium Authors. All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - * Neither the name of Google Inc. nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -This project includes code from Daniel Lemire's FrameOfReference project. - -https://github.com/lemire/FrameOfReference/blob/6ccaf9e97160f9a3b299e23a8ef739e711ef0c71/src/bpacking.cpp - -Copyright: 2013 Daniel Lemire -Home page: http://lemire.me/en/ -Project page: https://github.com/lemire/FrameOfReference -License: Apache License Version 2.0 http://www.apache.org/licenses/LICENSE-2.0 - --------------------------------------------------------------------------------- - -This project includes code from the TensorFlow project - -Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - --------------------------------------------------------------------------------- - -This project includes code from the NumPy project. - -https://github.com/numpy/numpy/blob/e1f191c46f2eebd6cb892a4bfe14d9dd43a06c4e/numpy/core/src/multiarray/multiarraymodule.c#L2910 - -https://github.com/numpy/numpy/blob/68fd82271b9ea5a9e50d4e761061dfcca851382a/numpy/core/src/multiarray/datetime.c - -Copyright (c) 2005-2017, NumPy Developers. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - - * Neither the name of the NumPy Developers nor the names of any - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -This project includes code from the Boost project - -Boost Software License - Version 1.0 - August 17th, 2003 - -Permission is hereby granted, free of charge, to any person or organization -obtaining a copy of the software and accompanying documentation covered by -this license (the "Software") to use, reproduce, display, distribute, -execute, and transmit the Software, and to prepare derivative works of the -Software, and to permit third-parties to whom the Software is furnished to -do so, all subject to the following: - -The copyright notices in the Software and this entire statement, including -the above license grant, this restriction and the following disclaimer, -must be included in all copies of the Software, in whole or in part, and -all derivative works of the Software, unless such copies or derivative -works are solely in the form of machine-executable object code generated by -a source language processor. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT -SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE -FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. - --------------------------------------------------------------------------------- - -This project includes code from the FlatBuffers project - -Copyright 2014 Google Inc. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - --------------------------------------------------------------------------------- - -This project includes code from the tslib project - -Copyright 2015 Microsoft Corporation. All rights reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - --------------------------------------------------------------------------------- - -This project includes code from the jemalloc project - -https://github.com/jemalloc/jemalloc - -Copyright (C) 2002-2017 Jason Evans . -All rights reserved. -Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. -Copyright (C) 2009-2017 Facebook, Inc. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: -1. Redistributions of source code must retain the above copyright notice(s), - this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright notice(s), - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS -OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO -EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT, -INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF -ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. --------------------------------------------------------------------------------- - -This project includes code from the Go project, BSD 3-clause license + PATENTS -weak patent termination clause -(https://github.com/golang/go/blob/master/PATENTS). - -Copyright (c) 2009 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -This project includes code from the hs2client - -https://github.com/cloudera/hs2client - -Copyright 2016 Cloudera Inc. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - --------------------------------------------------------------------------------- - -The script ci/scripts/util_wait_for_it.sh has the following license - -Copyright (c) 2016 Giles Hall - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - --------------------------------------------------------------------------------- - -The script r/configure has the following license (MIT) - -Copyright (c) 2017, Jeroen Ooms and Jim Hester - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - --------------------------------------------------------------------------------- - -cpp/src/arrow/util/logging.cc, cpp/src/arrow/util/logging.h and -cpp/src/arrow/util/logging-test.cc are adapted from -Ray Project (https://github.com/ray-project/ray) (Apache 2.0). - -Copyright (c) 2016 Ray Project (https://github.com/ray-project/ray) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - --------------------------------------------------------------------------------- -The files cpp/src/arrow/vendored/datetime/date.h, cpp/src/arrow/vendored/datetime/tz.h, -cpp/src/arrow/vendored/datetime/tz_private.h, cpp/src/arrow/vendored/datetime/ios.h, -cpp/src/arrow/vendored/datetime/ios.mm, -cpp/src/arrow/vendored/datetime/tz.cpp are adapted from -Howard Hinnant's date library (https://github.com/HowardHinnant/date) -It is licensed under MIT license. - -The MIT License (MIT) -Copyright (c) 2015, 2016, 2017 Howard Hinnant -Copyright (c) 2016 Adrian Colomitchi -Copyright (c) 2017 Florian Dang -Copyright (c) 2017 Paul Thompson -Copyright (c) 2018 Tomasz Kamiński - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - --------------------------------------------------------------------------------- - -The file cpp/src/arrow/util/utf8.h includes code adapted from the page - https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ -with the following license (MIT) - -Copyright (c) 2008-2009 Bjoern Hoehrmann - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - --------------------------------------------------------------------------------- - -The file cpp/src/arrow/vendored/string_view.hpp has the following license - -Boost Software License - Version 1.0 - August 17th, 2003 - -Permission is hereby granted, free of charge, to any person or organization -obtaining a copy of the software and accompanying documentation covered by -this license (the "Software") to use, reproduce, display, distribute, -execute, and transmit the Software, and to prepare derivative works of the -Software, and to permit third-parties to whom the Software is furnished to -do so, all subject to the following: - -The copyright notices in the Software and this entire statement, including -the above license grant, this restriction and the following disclaimer, -must be included in all copies of the Software, in whole or in part, and -all derivative works of the Software, unless such copies or derivative -works are solely in the form of machine-executable object code generated by -a source language processor. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT -SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE -FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. - --------------------------------------------------------------------------------- - -The files in cpp/src/arrow/vendored/xxhash/ have the following license -(BSD 2-Clause License) - -xxHash Library -Copyright (c) 2012-2014, Yann Collet -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, this - list of conditions and the following disclaimer in the documentation and/or - other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -You can contact the author at : -- xxHash homepage: http://www.xxhash.com -- xxHash source repository : https://github.com/Cyan4973/xxHash - --------------------------------------------------------------------------------- - -The files in cpp/src/arrow/vendored/double-conversion/ have the following license -(BSD 3-Clause License) - -Copyright 2006-2011, the V8 project authors. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - * Neither the name of Google Inc. nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -The files in cpp/src/arrow/vendored/uriparser/ have the following license -(BSD 3-Clause License) - -uriparser - RFC 3986 URI parsing library - -Copyright (C) 2007, Weijia Song -Copyright (C) 2007, Sebastian Pipping -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - - * Redistributions of source code must retain the above - copyright notice, this list of conditions and the following - disclaimer. - - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials - provided with the distribution. - - * Neither the name of the nor the names of its - contributors may be used to endorse or promote products - derived from this software without specific prior written - permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -The files under dev/tasks/conda-recipes have the following license - -BSD 3-clause license -Copyright (c) 2015-2018, conda-forge -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its contributors - may be used to endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR -TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -The files in cpp/src/arrow/vendored/utf8cpp/ have the following license - -Copyright 2006 Nemanja Trifunovic - -Permission is hereby granted, free of charge, to any person or organization -obtaining a copy of the software and accompanying documentation covered by -this license (the "Software") to use, reproduce, display, distribute, -execute, and transmit the Software, and to prepare derivative works of the -Software, and to permit third-parties to whom the Software is furnished to -do so, all subject to the following: - -The copyright notices in the Software and this entire statement, including -the above license grant, this restriction and the following disclaimer, -must be included in all copies of the Software, in whole or in part, and -all derivative works of the Software, unless such copies or derivative -works are solely in the form of machine-executable object code generated by -a source language processor. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT -SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE -FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. - --------------------------------------------------------------------------------- - -This project includes code from Apache Kudu. - - * cpp/cmake_modules/CompilerInfo.cmake is based on Kudu's cmake_modules/CompilerInfo.cmake - -Copyright: 2016 The Apache Software Foundation. -Home page: https://kudu.apache.org/ -License: http://www.apache.org/licenses/LICENSE-2.0 - --------------------------------------------------------------------------------- - -This project includes code from Apache Impala (incubating), formerly -Impala. The Impala code and rights were donated to the ASF as part of the -Incubator process after the initial code imports into Apache Parquet. - -Copyright: 2012 Cloudera, Inc. -Copyright: 2016 The Apache Software Foundation. -Home page: http://impala.apache.org/ -License: http://www.apache.org/licenses/LICENSE-2.0 - --------------------------------------------------------------------------------- - -This project includes code from Apache Aurora. - -* dev/release/{release,changelog,release-candidate} are based on the scripts from - Apache Aurora - -Copyright: 2016 The Apache Software Foundation. -Home page: https://aurora.apache.org/ -License: http://www.apache.org/licenses/LICENSE-2.0 - --------------------------------------------------------------------------------- - -This project includes code from the Google styleguide. - -* cpp/build-support/cpplint.py is based on the scripts from the Google styleguide. - -Copyright: 2009 Google Inc. All rights reserved. -Homepage: https://github.com/google/styleguide -License: 3-clause BSD - --------------------------------------------------------------------------------- - -This project includes code from Snappy. - -* cpp/cmake_modules/{SnappyCMakeLists.txt,SnappyConfig.h} are based on code - from Google's Snappy project. - -Copyright: 2009 Google Inc. All rights reserved. -Homepage: https://github.com/google/snappy -License: 3-clause BSD - --------------------------------------------------------------------------------- - -This project includes code from the manylinux project. - -* python/manylinux1/scripts/{build_python.sh,python-tag-abi-tag.py, - requirements.txt} are based on code from the manylinux project. - -Copyright: 2016 manylinux -Homepage: https://github.com/pypa/manylinux -License: The MIT License (MIT) - --------------------------------------------------------------------------------- - -This project includes code from the cymove project: - -* python/pyarrow/includes/common.pxd includes code from the cymove project - -The MIT License (MIT) -Copyright (c) 2019 Omer Ozarslan - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -OR OTHER DEALINGS IN THE SOFTWARE. - --------------------------------------------------------------------------------- - -The projects includes code from the Ursabot project under the dev/archery -directory. - -License: BSD 2-Clause - -Copyright 2019 RStudio, Inc. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -This project include code from CMake. - -* cpp/cmake_modules/FindGTest.cmake is based on code from CMake. - -Copyright: Copyright 2000-2019 Kitware, Inc. and Contributors -Homepage: https://gitlab.kitware.com/cmake/cmake -License: 3-clause BSD - --------------------------------------------------------------------------------- - -This project include code from mingw-w64. - -* cpp/src/arrow/util/cpu-info.cc has a polyfill for mingw-w64 < 5 - -Copyright (c) 2009 - 2013 by the mingw-w64 project -Homepage: https://mingw-w64.org -License: Zope Public License (ZPL) Version 2.1. - ---------------------------------------------------------------------------------- - -This project include code from Google's Asylo project. - -* cpp/src/arrow/result.h is based on status_or.h - -Copyright (c) Copyright 2017 Asylo authors -Homepage: https://asylo.dev/ -License: Apache 2.0 - --------------------------------------------------------------------------------- - -This project includes code from Google's protobuf project - -* cpp/src/arrow/result.h ARROW_ASSIGN_OR_RAISE is based off ASSIGN_OR_RETURN - -Copyright 2008 Google Inc. All rights reserved. -Homepage: https://developers.google.com/protocol-buffers/ -License: - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -Code generated by the Protocol Buffer compiler is owned by the owner -of the input file used when generating it. This code is not -standalone and requires a support library to be linked with it. This -support library is itself covered by the above license. - --------------------------------------------------------------------------------- - -3rdparty dependency LLVM is statically linked in certain binary distributions. -Additionally some sections of source code have been derived from sources in LLVM -and have been clearly labeled as such. LLVM has the following license: - -============================================================================== -LLVM Release License -============================================================================== -University of Illinois/NCSA -Open Source License - -Copyright (c) 2003-2018 University of Illinois at Urbana-Champaign. -All rights reserved. - -Developed by: - - LLVM Team - - University of Illinois at Urbana-Champaign - - http://llvm.org - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal with -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimers. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimers in the - documentation and/or other materials provided with the distribution. - - * Neither the names of the LLVM Team, University of Illinois at - Urbana-Champaign, nor the names of its contributors may be used to - endorse or promote products derived from this Software without specific - prior written permission. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE -SOFTWARE. - -============================================================================== -Copyrights and Licenses for Third Party Software Distributed with LLVM: -============================================================================== -The LLVM software contains code written by third parties. Such software will -have its own individual LICENSE.TXT file in the directory in which it appears. -This file will describe the copyrights, license, and restrictions which apply -to that code. - -The disclaimer of warranty in the University of Illinois Open Source License -applies to all code in the LLVM Distribution, and nothing in any of the -other licenses gives permission to use the names of the LLVM Team or the -University of Illinois to endorse or promote products derived from this -Software. - -The following pieces of software have additional or alternate copyrights, -licenses, and/or restrictions: - -Program Directory -------- --------- -Google Test llvm/utils/unittest/googletest -OpenBSD regex llvm/lib/Support/{reg*, COPYRIGHT.regex} -pyyaml tests llvm/test/YAMLParser/{*.data, LICENSE.TXT} -ARM contributions llvm/lib/Target/ARM/LICENSE.TXT -md5 contributions llvm/lib/Support/MD5.cpp llvm/include/llvm/Support/MD5.h - --------------------------------------------------------------------------------- - -3rdparty dependency gRPC is statically linked in certain binary -distributions, like the python wheels. gRPC has the following license: - -Copyright 2014 gRPC authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - --------------------------------------------------------------------------------- - -3rdparty dependency Apache Thrift is statically linked in certain binary -distributions, like the python wheels. Apache Thrift has the following license: - -Apache Thrift -Copyright (C) 2006 - 2019, The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - --------------------------------------------------------------------------------- - -3rdparty dependency Apache ORC is statically linked in certain binary -distributions, like the python wheels. Apache ORC has the following license: - -Apache ORC -Copyright 2013-2019 The Apache Software Foundation - -This product includes software developed by The Apache Software -Foundation (http://www.apache.org/). - -This product includes software developed by Hewlett-Packard: -(c) Copyright [2014-2015] Hewlett-Packard Development Company, L.P - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - --------------------------------------------------------------------------------- - -3rdparty dependency zstd is statically linked in certain binary -distributions, like the python wheels. ZSTD has the following license: - -BSD License - -For Zstandard software - -Copyright (c) 2016-present, Facebook, Inc. All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - * Neither the name Facebook nor the names of its contributors may be used to - endorse or promote products derived from this software without specific - prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -3rdparty dependency lz4 is statically linked in certain binary -distributions, like the python wheels. lz4 has the following license: - -LZ4 Library -Copyright (c) 2011-2016, Yann Collet -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, this - list of conditions and the following disclaimer in the documentation and/or - other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -3rdparty dependency Brotli is statically linked in certain binary -distributions, like the python wheels. Brotli has the following license: - -Copyright (c) 2009, 2010, 2013-2016 by the Brotli Authors. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - --------------------------------------------------------------------------------- - -3rdparty dependency snappy is statically linked in certain binary -distributions, like the python wheels. snappy has the following license: - -Copyright 2011, Google Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its contributors may be - used to endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -=== - -Some of the benchmark data in testdata/ is licensed differently: - - - fireworks.jpeg is Copyright 2013 Steinar H. Gunderson, and - is licensed under the Creative Commons Attribution 3.0 license - (CC-BY-3.0). See https://creativecommons.org/licenses/by/3.0/ - for more information. - - - kppkn.gtb is taken from the Gaviota chess tablebase set, and - is licensed under the MIT License. See - https://sites.google.com/site/gaviotachessengine/Home/endgame-tablebases-1 - for more information. - - - paper-100k.pdf is an excerpt (bytes 92160 to 194560) from the paper - “Combinatorial Modeling of Chromatin Features Quantitatively Predicts DNA - Replication Timing in _Drosophila_” by Federico Comoglio and Renato Paro, - which is licensed under the CC-BY license. See - http://www.ploscompbiol.org/static/license for more ifnormation. - - - alice29.txt, asyoulik.txt, plrabn12.txt and lcet10.txt are from Project - Gutenberg. The first three have expired copyrights and are in the public - domain; the latter does not have expired copyright, but is still in the - public domain according to the license information - (http://www.gutenberg.org/ebooks/53). - --------------------------------------------------------------------------------- - -3rdparty dependency gflags is statically linked in certain binary -distributions, like the python wheels. gflags has the following license: - -Copyright (c) 2006, Google Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -3rdparty dependency glog is statically linked in certain binary -distributions, like the python wheels. glog has the following license: - -Copyright (c) 2008, Google Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -A function gettimeofday in utilities.cc is based on - -http://www.google.com/codesearch/p?hl=en#dR3YEbitojA/COPYING&q=GetSystemTimeAsFileTime%20license:bsd - -The license of this code is: - -Copyright (c) 2003-2008, Jouni Malinen and contributors -All Rights Reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -3. Neither the name(s) of the above-listed copyright holder(s) nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -3rdparty dependency re2 is statically linked in certain binary -distributions, like the python wheels. re2 has the following license: - -Copyright (c) 2009 The RE2 Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - * Neither the name of Google Inc. nor the names of its contributors - may be used to endorse or promote products derived from this - software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -3rdparty dependency c-ares is statically linked in certain binary -distributions, like the python wheels. c-ares has the following license: - -# c-ares license - -Copyright (c) 2007 - 2018, Daniel Stenberg with many contributors, see AUTHORS -file. - -Copyright 1998 by the Massachusetts Institute of Technology. - -Permission to use, copy, modify, and distribute this software and its -documentation for any purpose and without fee is hereby granted, provided that -the above copyright notice appear in all copies and that both that copyright -notice and this permission notice appear in supporting documentation, and that -the name of M.I.T. not be used in advertising or publicity pertaining to -distribution of the software without specific, written prior permission. -M.I.T. makes no representations about the suitability of this software for any -purpose. It is provided "as is" without express or implied warranty. - --------------------------------------------------------------------------------- - -3rdparty dependency zlib is redistributed as a dynamically linked shared -library in certain binary distributions, like the python wheels. In the future -this will likely change to static linkage. zlib has the following license: - -zlib.h -- interface of the 'zlib' general purpose compression library - version 1.2.11, January 15th, 2017 - - Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for any damages - arising from the use of this software. - - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute it - freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. - 2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. - 3. This notice may not be removed or altered from any source distribution. - - Jean-loup Gailly Mark Adler - jloup@gzip.org madler@alumni.caltech.edu - --------------------------------------------------------------------------------- - -3rdparty dependency openssl is redistributed as a dynamically linked shared -library in certain binary distributions, like the python wheels. openssl -preceding version 3 has the following license: - - LICENSE ISSUES - ============== - - The OpenSSL toolkit stays under a double license, i.e. both the conditions of - the OpenSSL License and the original SSLeay license apply to the toolkit. - See below for the actual license texts. - - OpenSSL License - --------------- - -/* ==================================================================== - * Copyright (c) 1998-2019 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). - * - */ - - Original SSLeay License - ----------------------- - -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - --------------------------------------------------------------------------------- - -This project includes code from the rtools-backports project. - -* ci/scripts/PKGBUILD and ci/scripts/r_windows_build.sh are based on code - from the rtools-backports project. - -Copyright: Copyright (c) 2013 - 2019, Алексей and Jeroen Ooms. -All rights reserved. -Homepage: https://github.com/r-windows/rtools-backports -License: 3-clause BSD - --------------------------------------------------------------------------------- - -Some code from pandas has been adapted for the pyarrow codebase. pandas is -available under the 3-clause BSD license, which follows: - -pandas license -============== - -Copyright (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team -All rights reserved. - -Copyright (c) 2008-2011 AQR Capital Management, LLC -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - - * Neither the name of the copyright holder nor the names of any - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -Some bits from DyND, in particular aspects of the build system, have been -adapted from libdynd and dynd-python under the terms of the BSD 2-clause -license - -The BSD 2-Clause License - - Copyright (C) 2011-12, Dynamic NDArray Developers - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -Dynamic NDArray Developers list: - - * Mark Wiebe - * Continuum Analytics - --------------------------------------------------------------------------------- - -Some source code from Ibis (https://github.com/cloudera/ibis) has been adapted -for PyArrow. Ibis is released under the Apache License, Version 2.0. - --------------------------------------------------------------------------------- - -dev/tasks/homebrew-formulae/apache-arrow.rb has the following license: - -BSD 2-Clause License - -Copyright (c) 2009-present, Homebrew contributors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - ----------------------------------------------------------------------- - -cpp/src/arrow/vendored/base64.cpp has the following license - -ZLIB License - -Copyright (C) 2004-2017 René Nyffenegger - -This source code is provided 'as-is', without any express or implied -warranty. In no event will the author be held liable for any damages arising -from the use of this software. - -Permission is granted to anyone to use this software for any purpose, including -commercial applications, and to alter it and redistribute it freely, subject to -the following restrictions: - -1. The origin of this source code must not be misrepresented; you must not - claim that you wrote the original source code. If you use this source code - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. - -2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original source code. - -3. This notice may not be removed or altered from any source distribution. - -René Nyffenegger rene.nyffenegger@adp-gmbh.ch - --------------------------------------------------------------------------------- - -The file cpp/src/arrow/vendored/optional.hpp has the following license - -Boost Software License - Version 1.0 - August 17th, 2003 - -Permission is hereby granted, free of charge, to any person or organization -obtaining a copy of the software and accompanying documentation covered by -this license (the "Software") to use, reproduce, display, distribute, -execute, and transmit the Software, and to prepare derivative works of the -Software, and to permit third-parties to whom the Software is furnished to -do so, all subject to the following: - -The copyright notices in the Software and this entire statement, including -the above license grant, this restriction and the following disclaimer, -must be included in all copies of the Software, in whole or in part, and -all derivative works of the Software, unless such copies or derivative -works are solely in the form of machine-executable object code generated by -a source language processor. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT -SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE -FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. - --------------------------------------------------------------------------------- - -The file cpp/src/arrow/vendored/musl/strptime.c has the following license - -Copyright © 2005-2020 Rich Felker, et al. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/go/README.md b/go/README.md deleted file mode 100644 index ec824229729a0..0000000000000 --- a/go/README.md +++ /dev/null @@ -1,147 +0,0 @@ - - -Apache Arrow for Go -=================== - -[![Go Reference](https://pkg.go.dev/badge/github.com/apache/arrow/go/v18.svg)](https://pkg.go.dev/github.com/apache/arrow/go/v18) - -[Apache Arrow][arrow] is a cross-language development platform for in-memory -data. It specifies a standardized language-independent columnar memory format -for flat and hierarchical data, organized for efficient analytic operations on -modern hardware. It also provides computational libraries and zero-copy -streaming messaging and inter-process communication. - -### A note about FlightSQL drivers - -Go FlightSQL drivers live in the -[ADBC repository](https://github.com/apache/arrow-adbc/tree/main/go/adbc). -In particular, to use the Golang `database/sql` interface: -```golang -import ( - "database/sql" - _ "github.com/apache/arrow-adbc/go/adbc/sqldriver/flightsql" -) - -func main() { - dsn := "uri=grpc://localhost:12345;username=mickeymouse;password=p@55w0RD" - db, err := sql.Open("flightsql", dsn) - ... -} -``` - -DSN option keys are expressed as `k=v`, delimited with `;`. -Some options keys are defined in ADBC, others are defined in the FlightSQL ADBC driver. -- Arrow ADBC [developer doc](https://arrow.apache.org/adbc/main/driver/flight_sql.html#client-options) -- ADBC [source code](https://github.com/apache/arrow-adbc/blob/3d12fad1bae21029a8ff25604d6e65760c3f65bd/go/adbc/adbc.go#L149-L158) -- FlightSQL driver option keys [source code](https://github.com/apache/arrow-adbc/blob/3d12fad1bae21029a8ff25604d6e65760c3f65bd/go/adbc/driver/flightsql/flightsql_adbc.go#L70-L81) - -Reference Counting ------------------- - -The library makes use of reference counting so that it can track when memory -buffers are no longer used. This allows Arrow to update resource accounting, -pool memory such and track overall memory usage as objects are created and -released. Types expose two methods to deal with this pattern. The `Retain` -method will increase the reference count by 1 and `Release` method will reduce -the count by 1. Once the reference count of an object is zero, any associated -object will be freed. `Retain` and `Release` are safe to call from multiple -goroutines. - -### When to call `Retain` / `Release`? - -* If you are passed an object and wish to take ownership of it, you must call - `Retain`. You must later pair this with a call to `Release` when you no - longer need the object. "Taking ownership" typically means you wish to - access the object outside the scope of the current function call. - -* You own any object you create via functions whose name begins with `New` or - `Copy` or when receiving an object over a channel. Therefore you must call - `Release` once you no longer need the object. - -* If you send an object over a channel, you must call `Retain` before sending - it as the receiver is assumed to own the object and will later call `Release` - when it no longer needs the object. - -Performance ------------ - -The arrow package makes extensive use of [c2goasm][] to leverage LLVM's -advanced optimizer and generate PLAN9 assembly functions from C/C++ code. The -arrow package can be compiled without these optimizations using the `noasm` -build tag. Alternatively, by configuring an environment variable, it is -possible to dynamically configure which architecture optimizations are used at -runtime. We use the (cpu)[https://pkg.go.dev/golang.org/x/sys/cpu] package to -check dynamically for these features. - -### Example Usage - -The following benchmarks demonstrate summing an array of 8192 values using -various optimizations. - -Disable no architecture optimizations (thus using AVX2): - -```sh -$ INTEL_DISABLE_EXT=NONE go test -bench=8192 -run=. ./math -goos: darwin -goarch: amd64 -pkg: github.com/apache/arrow/go/arrow/math -BenchmarkFloat64Funcs_Sum_8192-8 2000000 687 ns/op 95375.41 MB/s -BenchmarkInt64Funcs_Sum_8192-8 2000000 719 ns/op 91061.06 MB/s -BenchmarkUint64Funcs_Sum_8192-8 2000000 691 ns/op 94797.29 MB/s -PASS -ok github.com/apache/arrow/go/arrow/math 6.444s -``` - -**NOTE:** `NONE` is simply ignored, thus enabling optimizations for AVX2 and SSE4 - ----- - -Disable AVX2 architecture optimizations: - -```sh -$ INTEL_DISABLE_EXT=AVX2 go test -bench=8192 -run=. ./math -goos: darwin -goarch: amd64 -pkg: github.com/apache/arrow/go/arrow/math -BenchmarkFloat64Funcs_Sum_8192-8 1000000 1912 ns/op 34263.63 MB/s -BenchmarkInt64Funcs_Sum_8192-8 1000000 1392 ns/op 47065.57 MB/s -BenchmarkUint64Funcs_Sum_8192-8 1000000 1405 ns/op 46636.41 MB/s -PASS -ok github.com/apache/arrow/go/arrow/math 4.786s -``` - ----- - -Disable ALL architecture optimizations, thus using pure Go implementation: - -```sh -$ INTEL_DISABLE_EXT=ALL go test -bench=8192 -run=. ./math -goos: darwin -goarch: amd64 -pkg: github.com/apache/arrow/go/arrow/math -BenchmarkFloat64Funcs_Sum_8192-8 200000 10285 ns/op 6371.41 MB/s -BenchmarkInt64Funcs_Sum_8192-8 500000 3892 ns/op 16837.37 MB/s -BenchmarkUint64Funcs_Sum_8192-8 500000 3929 ns/op 16680.00 MB/s -PASS -ok github.com/apache/arrow/go/arrow/math 6.179s -``` - -[arrow]: https://arrow.apache.org -[c2goasm]: https://github.com/minio/c2goasm diff --git a/go/arrow/.editorconfig b/go/arrow/.editorconfig deleted file mode 100644 index a7ceaf938f92c..0000000000000 --- a/go/arrow/.editorconfig +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -root = true - -[*.tmpl] -indent_style = tab -indent_size = 4 \ No newline at end of file diff --git a/go/arrow/.gitignore b/go/arrow/.gitignore deleted file mode 100644 index d4b831ae811da..0000000000000 --- a/go/arrow/.gitignore +++ /dev/null @@ -1,35 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -### Go template -# Binaries for programs and plugins -*.exe -*.dll -*.so -*.dylib -*.o - -# Test binary, build with `go test -c` -*.test - -# Output of the go coverage tool, specifically when used with LiteIDE -*.out - -# Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736 -.glide/ - -bin/ -vendor/ \ No newline at end of file diff --git a/go/arrow/Gopkg.lock b/go/arrow/Gopkg.lock deleted file mode 100644 index 143e4f93b5eea..0000000000000 --- a/go/arrow/Gopkg.lock +++ /dev/null @@ -1,44 +0,0 @@ -# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'. - - -[[projects]] - digest = "1:56c130d885a4aacae1dd9c7b71cfe39912c7ebc1ff7d2b46083c8812996dc43b" - name = "github.com/davecgh/go-spew" - packages = ["spew"] - pruneopts = "" - revision = "346938d642f2ec3594ed81d874461961cd0faa76" - version = "v1.1.0" - -[[projects]] - digest = "1:1d7e1867c49a6dd9856598ef7c3123604ea3daabf5b83f303ff457bcbc410b1d" - name = "github.com/pkg/errors" - packages = ["."] - pruneopts = "" - revision = "ba968bfe8b2f7e042a574c888954fccecfa385b4" - version = "v0.8.1" - -[[projects]] - digest = "1:256484dbbcd271f9ecebc6795b2df8cad4c458dd0f5fd82a8c2fa0c29f233411" - name = "github.com/pmezard/go-difflib" - packages = ["difflib"] - pruneopts = "" - revision = "792786c7400a136282c1664665ae0a8db921c6c2" - version = "v1.0.0" - -[[projects]] - digest = "1:2d0dc026c4aef5e2f3a0e06a4dabe268b840d8f63190cf6894e02134a03f52c5" - name = "github.com/stretchr/testify" - packages = ["assert"] - pruneopts = "" - revision = "b91bfb9ebec76498946beb6af7c0230c7cc7ba6c" - version = "v1.2.0" - -[solve-meta] - analyzer-name = "dep" - analyzer-version = 1 - input-imports = [ - "github.com/pkg/errors", - "github.com/stretchr/testify/assert", - ] - solver-name = "gps-cdcl" - solver-version = 1 diff --git a/go/arrow/Gopkg.toml b/go/arrow/Gopkg.toml deleted file mode 100644 index b27807d69f951..0000000000000 --- a/go/arrow/Gopkg.toml +++ /dev/null @@ -1,23 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -[[constraint]] - name = "github.com/stretchr/testify" - version = "1.2.0" - -[[constraint]] - name = "github.com/pkg/errors" - version = "0.8.1" \ No newline at end of file diff --git a/go/arrow/Makefile b/go/arrow/Makefile deleted file mode 100644 index 9c4a23262d0bd..0000000000000 --- a/go/arrow/Makefile +++ /dev/null @@ -1,54 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -GO_BUILD=go build -GO_GEN=go generate -GO_TEST?=go test -GOPATH=$(realpath ../../../../../..) - -GO_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -not -name '*_test.go') -ALL_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -name '*.s' -not -name '*_test.go') -SOURCES_NO_VENDOR := $(shell find . -path ./vendor -prune -o -name "*.go" -not -name '*_test.go' -print) - -.PHONEY: test bench assembly generate - -assembly: - @$(MAKE) -C memory assembly - @$(MAKE) -C math assembly - -generate: bin/tmpl - bin/tmpl -i -data=numeric.tmpldata type_traits_numeric.gen.go.tmpl type_traits_numeric.gen_test.go.tmpl array/numeric.gen.go.tmpl array/numericbuilder.gen_test.go.tmpl array/numericbuilder.gen.go.tmpl array/bufferbuilder_numeric.gen.go.tmpl - bin/tmpl -i -data=datatype_numeric.gen.go.tmpldata datatype_numeric.gen.go.tmpl - @$(MAKE) -C math generate - -fmt: $(SOURCES_NO_VENDOR) - goimports -w $^ - -bench: $(GO_SOURCES) | assembly - $(GO_TEST) $(GO_TEST_ARGS) -bench=. -run=- ./... - -bench-noasm: $(GO_SOURCES) - $(GO_TEST) $(GO_TEST_ARGS) -tags='noasm' -bench=. -run=- ./... - -test: $(GO_SOURCES) | assembly - $(GO_TEST) $(GO_TEST_ARGS) ./... - -test-noasm: $(GO_SOURCES) - $(GO_TEST) $(GO_TEST_ARGS) -tags='noasm' ./... - -bin/tmpl: _tools/tmpl/main.go - $(GO_BUILD) -o $@ ./_tools/tmpl - diff --git a/go/arrow/_examples/helloworld/main.go b/go/arrow/_examples/helloworld/main.go deleted file mode 100644 index 7f932801917a4..0000000000000 --- a/go/arrow/_examples/helloworld/main.go +++ /dev/null @@ -1,52 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "os" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/math" - "github.com/apache/arrow/go/v18/arrow/memory" -) - -func main() { - schema := arrow.NewSchema([]arrow.Field{ - {Name: "intField", Type: arrow.PrimitiveTypes.Int64, Nullable: false}, - {Name: "stringField", Type: arrow.BinaryTypes.String, Nullable: false}, - {Name: "floatField", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - }, nil) - - builder := array.NewRecordBuilder(memory.DefaultAllocator, schema) - defer builder.Release() - - builder.Field(0).(*array.Int64Builder).AppendValues([]int64{1, 2, 3, 4, 5}, nil) - builder.Field(1).(*array.StringBuilder).AppendValues([]string{"a", "b", "c", "d", "e"}, nil) - builder.Field(2).(*array.Float64Builder).AppendValues([]float64{1, 0, 3, 0, 5}, []bool{true, false, true, false, true}) - - rec := builder.NewRecord() - defer rec.Release() - - tbl := array.NewTableFromRecords(schema, []arrow.Record{rec}) - defer tbl.Release() - - sum := math.Float64.Sum(tbl.Column(2).Data().Chunk(0).(*array.Float64)) - if sum != 9 { - defer os.Exit(1) - } -} diff --git a/go/arrow/_tools/tmpl/main.go b/go/arrow/_tools/tmpl/main.go deleted file mode 100644 index 33cb1686981f4..0000000000000 --- a/go/arrow/_tools/tmpl/main.go +++ /dev/null @@ -1,268 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "bytes" - "flag" - "fmt" - "go/format" - "io/ioutil" - "os" - "os/exec" - "path/filepath" - "strings" - "text/template" - - "github.com/apache/arrow/go/v18/internal/json" -) - -const Ext = ".tmpl" - -type pathSpec struct { - in, out string -} - -func (p *pathSpec) String() string { return p.in + " → " + p.out } -func (p *pathSpec) IsGoFile() bool { return filepath.Ext(p.out) == ".go" } - -func parsePath(path string) (string, string) { - p := strings.IndexByte(path, '=') - if p == -1 { - if filepath.Ext(path) != Ext { - errExit("template file '%s' must have .tmpl extension", path) - } - return path, path[:len(path)-len(Ext)] - } - - return path[:p], path[p+1:] -} - -type data struct { - In interface{} - D listValue -} - -func errExit(format string, a ...interface{}) { - fmt.Fprintf(os.Stderr, format, a...) - fmt.Fprintln(os.Stderr) - os.Exit(1) -} - -type listValue map[string]string - -func (l listValue) String() string { - res := make([]string, 0, len(l)) - for k, v := range l { - res = append(res, fmt.Sprintf("%s=%s", k, v)) - } - return strings.Join(res, ", ") -} - -func (l listValue) Set(v string) error { - nv := strings.Split(v, "=") - if len(nv) != 2 { - return fmt.Errorf("expected NAME=VALUE, got %s", v) - } - l[nv[0]] = nv[1] - return nil -} - -func main() { - var ( - dataArg = flag.String("data", "", "input JSON data") - gi = flag.Bool("i", false, "run goimports") - in = &data{D: make(listValue)} - ) - - flag.Var(&in.D, "d", "-d NAME=VALUE") - - flag.Parse() - if *dataArg == "" { - errExit("data option is required") - } - - if *gi { - if _, err := exec.LookPath("goimports"); err != nil { - errExit("failed to find goimports: %s", err.Error()) - } - formatter = formatSource - } else { - formatter = format.Source - } - - paths := flag.Args() - if len(paths) == 0 { - errExit("no tmpl files specified") - } - - specs := make([]pathSpec, len(paths)) - for i, p := range paths { - in, out := parsePath(p) - specs[i] = pathSpec{in: in, out: out} - } - - in.In = readData(*dataArg) - process(in, specs) -} - -func mustReadAll(path string) []byte { - data, err := ioutil.ReadFile(path) - if err != nil { - errExit(err.Error()) - } - - return data -} - -func readData(path string) interface{} { - data := mustReadAll(path) - var v interface{} - if err := json.Unmarshal(StripComments(data), &v); err != nil { - errExit("invalid JSON data: %s", err.Error()) - } - return v -} - -func fileMode(path string) os.FileMode { - stat, err := os.Stat(path) - if err != nil { - errExit(err.Error()) - } - return stat.Mode() -} - -var funcs = template.FuncMap{ - "lower": strings.ToLower, - "upper": strings.ToUpper, -} - -func process(data interface{}, specs []pathSpec) { - for _, spec := range specs { - var ( - t *template.Template - err error - ) - t, err = template.New("gen").Funcs(funcs).Parse(string(mustReadAll(spec.in))) - if err != nil { - errExit("error processing template '%s': %s", spec.in, err.Error()) - } - - var buf bytes.Buffer - if spec.IsGoFile() { - // preamble - fmt.Fprintf(&buf, "// Code generated by %s. DO NOT EDIT.\n", spec.in) - fmt.Fprintln(&buf) - } - err = t.Execute(&buf, data) - if err != nil { - errExit("error executing template '%s': %s", spec.in, err.Error()) - } - - generated := buf.Bytes() - if spec.IsGoFile() { - generated, err = formatter(generated) - if err != nil { - errExit("error formatting '%s': %s", spec.in, err.Error()) - } - } - - os.WriteFile(spec.out, generated, fileMode(spec.in)) - } -} - -var ( - formatter func([]byte) ([]byte, error) -) - -func formatSource(in []byte) ([]byte, error) { - r := bytes.NewReader(in) - cmd := exec.Command("goimports") - cmd.Stdin = r - out, err := cmd.Output() - if err != nil { - if ee, ok := err.(*exec.ExitError); ok { - return nil, fmt.Errorf("error running goimports: %s", string(ee.Stderr)) - } - return nil, fmt.Errorf("error running goimports: %s", string(out)) - } - - return out, nil -} - -func StripComments(raw []byte) []byte { - var ( - quoted, esc bool - comment bool - ) - - buf := bytes.Buffer{} - - for i := 0; i < len(raw); i++ { - b := raw[i] - - if comment { - switch b { - case '/': - comment = false - j := bytes.IndexByte(raw[i+1:], '\n') - if j == -1 { - i = len(raw) - } else { - i += j // keep new line - } - case '*': - j := bytes.Index(raw[i+1:], []byte("*/")) - if j == -1 { - i = len(raw) - } else { - i += j + 2 - comment = false - } - } - continue - } - - if esc { - esc = false - continue - } - - if b == '\\' && quoted { - esc = true - continue - } - - if b == '"' || b == '\'' { - quoted = !quoted - } - - if b == '/' && !quoted { - comment = true - continue - } - - buf.WriteByte(b) - } - - if quoted || esc || comment { - // unexpected state, so return raw bytes - return raw - } - - return buf.Bytes() -} diff --git a/go/arrow/_tools/tmpl/main_test.go b/go/arrow/_tools/tmpl/main_test.go deleted file mode 100644 index 831cf791e3a0b..0000000000000 --- a/go/arrow/_tools/tmpl/main_test.go +++ /dev/null @@ -1,73 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "testing" -) - -func TestStripComments(t *testing.T) { - tests := []struct { - name string - in string - exp string - }{ - {name: "none", in: `[1,2,3]`, exp: `[1,2,3]`}, - {name: "single-line, line comment at end", in: `[1,2,3] // foo bar`, exp: `[1,2,3] `}, - {name: "single-line, block comment at end", in: `[1,2,3] /* foo bar */ `, exp: `[1,2,3] `}, - {name: "single-line, block comment at end", in: `[1,2,3] /* /* // */`, exp: `[1,2,3] `}, - {name: "single-line, block comment in middle", in: `[1,/* foo bar */2,3]`, exp: `[1,2,3]`}, - {name: "single-line, block comment in string", in: `[1,"/* foo bar */"]`, exp: `[1,"/* foo bar */"]`}, - {name: "single-line, malformed block comment", in: `[1,2,/*]`, exp: `[1,2,/*]`}, - {name: "single-line, malformed JSON", in: `[1,2,/]`, exp: `[1,2,/]`}, - - { - name: "multi-line", - in: `[ - 1, - 2, - 3 -]`, - exp: `[ - 1, - 2, - 3 -]`, - }, - { - name: "multi-line, multiple line comments", - in: `[ // foo - 1, // bar - 2, - 3 -] // fit`, - exp: `[ - 1, - 2, - 3 -] `, - }, - } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - got := string(StripComments([]byte(test.in))) - if got != test.exp { - t.Errorf("got:\n%s\nexp:\n%s", got, test.exp) - } - }) - } -} diff --git a/go/arrow/_tools/tools.go b/go/arrow/_tools/tools.go deleted file mode 100644 index 262880bca8fe4..0000000000000 --- a/go/arrow/_tools/tools.go +++ /dev/null @@ -1,25 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build tools -// +build tools - -package _tools - -import ( - _ "golang.org/x/tools/cmd/goimports" - _ "golang.org/x/tools/cmd/stringer" -) diff --git a/go/arrow/array.go b/go/arrow/array.go deleted file mode 100644 index 768b30f8e0690..0000000000000 --- a/go/arrow/array.go +++ /dev/null @@ -1,129 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package arrow - -import ( - "fmt" - - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -// ArrayData is the underlying memory and metadata of an Arrow array, corresponding -// to the same-named object in the C++ implementation. -// -// The Array interface and subsequent typed objects provide strongly typed -// accessors which support marshalling and other patterns to the data. -// This interface allows direct access to the underlying raw byte buffers -// which allows for manipulating the internal data and casting. For example, -// one could cast the raw bytes from int64 to float64 like so: -// -// arrdata := GetMyInt64Data().Data() -// newdata := array.NewData(arrow.PrimitiveTypes.Float64, arrdata.Len(), -// arrdata.Buffers(), nil, arrdata.NullN(), arrdata.Offset()) -// defer newdata.Release() -// float64arr := array.NewFloat64Data(newdata) -// defer float64arr.Release() -// -// This is also useful in an analytics setting where memory may be reused. For -// example, if we had a group of operations all returning float64 such as: -// -// Log(Sqrt(Expr(arr))) -// -// The low-level implementations could have signatures such as: -// -// func Log(values arrow.ArrayData) arrow.ArrayData -// -// Another example would be a function that consumes one or more memory buffers -// in an input array and replaces them with newly-allocated data, changing the -// output data type as well. -type ArrayData interface { - // Retain increases the reference count by 1, it is safe to call - // in multiple goroutines simultaneously. - Retain() - // Release decreases the reference count by 1, it is safe to call - // in multiple goroutines simultaneously. Data is removed when reference - // count is 0. - Release() - // DataType returns the current datatype stored in the object. - DataType() DataType - // NullN returns the number of nulls for this data instance. - NullN() int - // Len returns the length of this data instance - Len() int - // Offset returns the offset into the raw buffers where this data begins - Offset() int - // Buffers returns the slice of raw data buffers for this data instance. Their - // meaning depends on the context of the data type. - Buffers() []*memory.Buffer - // Children returns the slice of children data instances, only relevant for - // nested data types. For instance, List data will have a single child containing - // elements of all the rows and Struct data will contain numfields children which - // are the arrays for each field of the struct. - Children() []ArrayData - // Reset allows reusing this ArrayData object by replacing the data in this ArrayData - // object without changing the reference count. - Reset(newtype DataType, newlength int, newbuffers []*memory.Buffer, newchildren []ArrayData, newnulls int, newoffset int) - // Dictionary returns the ArrayData object for the dictionary if this is a - // dictionary array, otherwise it will be nil. - Dictionary() ArrayData - // SizeInBytes returns the size of the ArrayData buffers and any children and/or dictionary in bytes. - SizeInBytes() uint64 -} - -// Array represents an immutable sequence of values using the Arrow in-memory format. -type Array interface { - json.Marshaler - - fmt.Stringer - - // DataType returns the type metadata for this instance. - DataType() DataType - - // NullN returns the number of null values in the array. - NullN() int - - // NullBitmapBytes returns a byte slice of the validity bitmap. - NullBitmapBytes() []byte - - // IsNull returns true if value at index is null. - // NOTE: IsNull will panic if NullBitmapBytes is not empty and 0 > i ≥ Len. - IsNull(i int) bool - - // IsValid returns true if value at index is not null. - // NOTE: IsValid will panic if NullBitmapBytes is not empty and 0 > i ≥ Len. - IsValid(i int) bool - // ValueStr returns the value at index as a string. - ValueStr(i int) string - - // Get single value to be marshalled with `json.Marshal` - GetOneForMarshal(i int) interface{} - - Data() ArrayData - - // Len returns the number of elements in the array. - Len() int - - // Retain increases the reference count by 1. - // Retain may be called simultaneously from multiple goroutines. - Retain() - - // Release decreases the reference count by 1. - // Release may be called simultaneously from multiple goroutines. - // When the reference count goes to zero, the memory is freed. - Release() -} diff --git a/go/arrow/array/array.go b/go/arrow/array/array.go deleted file mode 100644 index ae33ca5417db0..0000000000000 --- a/go/arrow/array/array.go +++ /dev/null @@ -1,186 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/internal/debug" -) - -const ( - // UnknownNullCount specifies the NullN should be calculated from the null bitmap buffer. - UnknownNullCount = -1 - - // NullValueStr represents a null value in arrow.Array.ValueStr and in Builder.AppendValueFromString. - // It should be returned from the arrow.Array.ValueStr implementations. - // Using it as the value in Builder.AppendValueFromString should be equivalent to Builder.AppendNull. - NullValueStr = "(null)" -) - -type array struct { - refCount int64 - data *Data - nullBitmapBytes []byte -} - -// Retain increases the reference count by 1. -// Retain may be called simultaneously from multiple goroutines. -func (a *array) Retain() { - atomic.AddInt64(&a.refCount, 1) -} - -// Release decreases the reference count by 1. -// Release may be called simultaneously from multiple goroutines. -// When the reference count goes to zero, the memory is freed. -func (a *array) Release() { - debug.Assert(atomic.LoadInt64(&a.refCount) > 0, "too many releases") - - if atomic.AddInt64(&a.refCount, -1) == 0 { - a.data.Release() - a.data, a.nullBitmapBytes = nil, nil - } -} - -// DataType returns the type metadata for this instance. -func (a *array) DataType() arrow.DataType { return a.data.dtype } - -// NullN returns the number of null values in the array. -func (a *array) NullN() int { - if a.data.nulls < 0 { - a.data.nulls = a.data.length - bitutil.CountSetBits(a.nullBitmapBytes, a.data.offset, a.data.length) - } - return a.data.nulls -} - -// NullBitmapBytes returns a byte slice of the validity bitmap. -func (a *array) NullBitmapBytes() []byte { return a.nullBitmapBytes } - -func (a *array) Data() arrow.ArrayData { return a.data } - -// Len returns the number of elements in the array. -func (a *array) Len() int { return a.data.length } - -// IsNull returns true if value at index is null. -// NOTE: IsNull will panic if NullBitmapBytes is not empty and 0 > i ≥ Len. -func (a *array) IsNull(i int) bool { - return len(a.nullBitmapBytes) != 0 && bitutil.BitIsNotSet(a.nullBitmapBytes, a.data.offset+i) -} - -// IsValid returns true if value at index is not null. -// NOTE: IsValid will panic if NullBitmapBytes is not empty and 0 > i ≥ Len. -func (a *array) IsValid(i int) bool { - return len(a.nullBitmapBytes) == 0 || bitutil.BitIsSet(a.nullBitmapBytes, a.data.offset+i) -} - -func (a *array) setData(data *Data) { - // Retain before releasing in case a.data is the same as data. - data.Retain() - - if a.data != nil { - a.data.Release() - } - - if len(data.buffers) > 0 && data.buffers[0] != nil { - a.nullBitmapBytes = data.buffers[0].Bytes() - } - a.data = data -} - -func (a *array) Offset() int { - return a.data.Offset() -} - -type arrayConstructorFn func(arrow.ArrayData) arrow.Array - -var ( - makeArrayFn [64]arrayConstructorFn -) - -func invalidDataType(data arrow.ArrayData) arrow.Array { - panic("invalid data type: " + data.DataType().ID().String()) -} - -// MakeFromData constructs a strongly-typed array instance from generic Data. -func MakeFromData(data arrow.ArrayData) arrow.Array { - return makeArrayFn[byte(data.DataType().ID()&0x3f)](data) -} - -// NewSlice constructs a zero-copy slice of the array with the indicated -// indices i and j, corresponding to array[i:j]. -// The returned array must be Release()'d after use. -// -// NewSlice panics if the slice is outside the valid range of the input array. -// NewSlice panics if j < i. -func NewSlice(arr arrow.Array, i, j int64) arrow.Array { - data := NewSliceData(arr.Data(), i, j) - slice := MakeFromData(data) - data.Release() - return slice -} - -func init() { - makeArrayFn = [...]arrayConstructorFn{ - arrow.NULL: func(data arrow.ArrayData) arrow.Array { return NewNullData(data) }, - arrow.BOOL: func(data arrow.ArrayData) arrow.Array { return NewBooleanData(data) }, - arrow.UINT8: func(data arrow.ArrayData) arrow.Array { return NewUint8Data(data) }, - arrow.INT8: func(data arrow.ArrayData) arrow.Array { return NewInt8Data(data) }, - arrow.UINT16: func(data arrow.ArrayData) arrow.Array { return NewUint16Data(data) }, - arrow.INT16: func(data arrow.ArrayData) arrow.Array { return NewInt16Data(data) }, - arrow.UINT32: func(data arrow.ArrayData) arrow.Array { return NewUint32Data(data) }, - arrow.INT32: func(data arrow.ArrayData) arrow.Array { return NewInt32Data(data) }, - arrow.UINT64: func(data arrow.ArrayData) arrow.Array { return NewUint64Data(data) }, - arrow.INT64: func(data arrow.ArrayData) arrow.Array { return NewInt64Data(data) }, - arrow.FLOAT16: func(data arrow.ArrayData) arrow.Array { return NewFloat16Data(data) }, - arrow.FLOAT32: func(data arrow.ArrayData) arrow.Array { return NewFloat32Data(data) }, - arrow.FLOAT64: func(data arrow.ArrayData) arrow.Array { return NewFloat64Data(data) }, - arrow.STRING: func(data arrow.ArrayData) arrow.Array { return NewStringData(data) }, - arrow.BINARY: func(data arrow.ArrayData) arrow.Array { return NewBinaryData(data) }, - arrow.FIXED_SIZE_BINARY: func(data arrow.ArrayData) arrow.Array { return NewFixedSizeBinaryData(data) }, - arrow.DATE32: func(data arrow.ArrayData) arrow.Array { return NewDate32Data(data) }, - arrow.DATE64: func(data arrow.ArrayData) arrow.Array { return NewDate64Data(data) }, - arrow.TIMESTAMP: func(data arrow.ArrayData) arrow.Array { return NewTimestampData(data) }, - arrow.TIME32: func(data arrow.ArrayData) arrow.Array { return NewTime32Data(data) }, - arrow.TIME64: func(data arrow.ArrayData) arrow.Array { return NewTime64Data(data) }, - arrow.INTERVAL_MONTHS: func(data arrow.ArrayData) arrow.Array { return NewMonthIntervalData(data) }, - arrow.INTERVAL_DAY_TIME: func(data arrow.ArrayData) arrow.Array { return NewDayTimeIntervalData(data) }, - arrow.DECIMAL128: func(data arrow.ArrayData) arrow.Array { return NewDecimal128Data(data) }, - arrow.DECIMAL256: func(data arrow.ArrayData) arrow.Array { return NewDecimal256Data(data) }, - arrow.LIST: func(data arrow.ArrayData) arrow.Array { return NewListData(data) }, - arrow.STRUCT: func(data arrow.ArrayData) arrow.Array { return NewStructData(data) }, - arrow.SPARSE_UNION: func(data arrow.ArrayData) arrow.Array { return NewSparseUnionData(data) }, - arrow.DENSE_UNION: func(data arrow.ArrayData) arrow.Array { return NewDenseUnionData(data) }, - arrow.DICTIONARY: func(data arrow.ArrayData) arrow.Array { return NewDictionaryData(data) }, - arrow.MAP: func(data arrow.ArrayData) arrow.Array { return NewMapData(data) }, - arrow.EXTENSION: func(data arrow.ArrayData) arrow.Array { return NewExtensionData(data) }, - arrow.FIXED_SIZE_LIST: func(data arrow.ArrayData) arrow.Array { return NewFixedSizeListData(data) }, - arrow.DURATION: func(data arrow.ArrayData) arrow.Array { return NewDurationData(data) }, - arrow.LARGE_STRING: func(data arrow.ArrayData) arrow.Array { return NewLargeStringData(data) }, - arrow.LARGE_BINARY: func(data arrow.ArrayData) arrow.Array { return NewLargeBinaryData(data) }, - arrow.LARGE_LIST: func(data arrow.ArrayData) arrow.Array { return NewLargeListData(data) }, - arrow.INTERVAL_MONTH_DAY_NANO: func(data arrow.ArrayData) arrow.Array { return NewMonthDayNanoIntervalData(data) }, - arrow.RUN_END_ENCODED: func(data arrow.ArrayData) arrow.Array { return NewRunEndEncodedData(data) }, - arrow.LIST_VIEW: func(data arrow.ArrayData) arrow.Array { return NewListViewData(data) }, - arrow.LARGE_LIST_VIEW: func(data arrow.ArrayData) arrow.Array { return NewLargeListViewData(data) }, - arrow.BINARY_VIEW: func(data arrow.ArrayData) arrow.Array { return NewBinaryViewData(data) }, - arrow.STRING_VIEW: func(data arrow.ArrayData) arrow.Array { return NewStringViewData(data) }, - // invalid data types to fill out array to size 2^6 - 1 - 63: invalidDataType, - } -} diff --git a/go/arrow/array/array_test.go b/go/arrow/array/array_test.go deleted file mode 100644 index 4f0627c600078..0000000000000 --- a/go/arrow/array/array_test.go +++ /dev/null @@ -1,346 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/extensions" - "github.com/apache/arrow/go/v18/arrow/internal/testing/tools" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -type testDataType struct { - id arrow.Type -} - -func (d *testDataType) ID() arrow.Type { return d.id } -func (d *testDataType) Name() string { panic("implement me") } -func (d *testDataType) BitWidth() int { return 8 } -func (d *testDataType) Bytes() int { return 1 } -func (d *testDataType) Fingerprint() string { return "" } -func (testDataType) Layout() arrow.DataTypeLayout { return arrow.DataTypeLayout{} } -func (testDataType) String() string { return "" } - -func TestMakeFromData(t *testing.T) { - tests := []struct { - name string - d arrow.DataType - size int - child []arrow.ArrayData - dict *array.Data - expPanic bool - expError string - }{ - // supported types - {name: "null", d: &testDataType{arrow.NULL}}, - {name: "bool", d: &testDataType{arrow.BOOL}}, - {name: "uint8", d: &testDataType{arrow.UINT8}}, - {name: "uint16", d: &testDataType{arrow.UINT16}}, - {name: "uint32", d: &testDataType{arrow.UINT32}}, - {name: "uint64", d: &testDataType{arrow.UINT64}}, - {name: "int8", d: &testDataType{arrow.INT8}}, - {name: "int16", d: &testDataType{arrow.INT16}}, - {name: "int32", d: &testDataType{arrow.INT32}}, - {name: "int64", d: &testDataType{arrow.INT64}}, - {name: "float16", d: &testDataType{arrow.FLOAT16}}, - {name: "float32", d: &testDataType{arrow.FLOAT32}}, - {name: "float64", d: &testDataType{arrow.FLOAT64}}, - {name: "string", d: &testDataType{arrow.STRING}, size: 3}, - {name: "binary", d: &testDataType{arrow.BINARY}, size: 3}, - {name: "large_string", d: &testDataType{arrow.LARGE_STRING}, size: 3}, - {name: "large_binary", d: &testDataType{arrow.LARGE_BINARY}, size: 3}, - {name: "fixed_size_binary", d: &testDataType{arrow.FIXED_SIZE_BINARY}}, - {name: "date32", d: &testDataType{arrow.DATE32}}, - {name: "date64", d: &testDataType{arrow.DATE64}}, - {name: "timestamp", d: &testDataType{arrow.TIMESTAMP}}, - {name: "time32", d: &testDataType{arrow.TIME32}}, - {name: "time64", d: &testDataType{arrow.TIME64}}, - {name: "month_interval", d: arrow.FixedWidthTypes.MonthInterval}, - {name: "day_time_interval", d: arrow.FixedWidthTypes.DayTimeInterval}, - {name: "decimal128", d: &testDataType{arrow.DECIMAL128}}, - {name: "decimal256", d: &testDataType{arrow.DECIMAL256}}, - {name: "month_day_nano_interval", d: arrow.FixedWidthTypes.MonthDayNanoInterval}, - - {name: "list", d: &testDataType{arrow.LIST}, child: []arrow.ArrayData{ - array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), - array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), - }}, - - {name: "large list", d: &testDataType{arrow.LARGE_LIST}, child: []arrow.ArrayData{ - array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), - array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), - }}, - - {name: "struct", d: &testDataType{arrow.STRUCT}}, - {name: "struct", d: &testDataType{arrow.STRUCT}, child: []arrow.ArrayData{ - array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), - array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), - }}, - - {name: "fixed_size_list", d: arrow.FixedSizeListOf(4, arrow.PrimitiveTypes.Int64), child: []arrow.ArrayData{ - array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), - array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), - }}, - {name: "duration", d: &testDataType{arrow.DURATION}}, - - {name: "map", d: &testDataType{arrow.MAP}, child: []arrow.ArrayData{ - array.NewData(&testDataType{arrow.STRUCT}, 0 /* length */, make([]*memory.Buffer, 3 /*null bitmap, values, offsets*/), []arrow.ArrayData{ - array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), - array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), - }, 0 /* nulls */, 0 /* offset */)}, - }, - - {name: "sparse union", d: arrow.SparseUnionOf(nil, nil), child: []arrow.ArrayData{}, size: 2}, - {name: "dense union", d: arrow.DenseUnionOf(nil, nil), child: []arrow.ArrayData{}, size: 3}, - - // various dictionary index types and value types - {name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: &testDataType{arrow.INT64}}, dict: array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)}, - {name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: &testDataType{arrow.INT32}}, dict: array.NewData(&testDataType{arrow.INT32}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)}, - {name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int16, ValueType: &testDataType{arrow.UINT16}}, dict: array.NewData(&testDataType{arrow.UINT16}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)}, - {name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint16, ValueType: &testDataType{arrow.INT64}}, dict: array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)}, - {name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: &testDataType{arrow.UINT32}}, dict: array.NewData(&testDataType{arrow.UINT32}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)}, - {name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint32, ValueType: &testDataType{arrow.TIMESTAMP}}, dict: array.NewData(&testDataType{arrow.TIMESTAMP}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)}, - {name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int64, ValueType: &testDataType{arrow.UINT32}}, dict: array.NewData(&testDataType{arrow.UINT32}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)}, - {name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint64, ValueType: &testDataType{arrow.TIMESTAMP}}, dict: array.NewData(&testDataType{arrow.TIMESTAMP}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)}, - - {name: "extension", d: &testDataType{arrow.EXTENSION}, expPanic: true, expError: "arrow/array: DataType for ExtensionArray must implement arrow.ExtensionType"}, - {name: "extension", d: extensions.NewUUIDType()}, - - {name: "run end encoded", d: arrow.RunEndEncodedOf(arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Int64), child: []arrow.ArrayData{ - array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), - array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), - }}, - - // invalid types - {name: "invalid(-1)", d: &testDataType{arrow.Type(-1)}, expPanic: true, expError: "invalid data type: Type(-1)"}, - {name: "invalid(63)", d: &testDataType{arrow.Type(63)}, expPanic: true, expError: "invalid data type: Type(63)"}, - } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - var ( - b [4]*memory.Buffer - n = 4 - data arrow.ArrayData - ) - if test.size != 0 { - n = test.size - } - if test.dict != nil { - data = array.NewDataWithDictionary(test.d, 0, b[:n], 0, 0, test.dict) - } else { - data = array.NewData(test.d, 0, b[:n], test.child, 0, 0) - } - - if test.expPanic { - assert.PanicsWithValue(t, test.expError, func() { - array.MakeFromData(data) - }) - } else { - assert.NotNil(t, array.MakeFromData(data)) - } - }) - } -} - -func bbits(v ...int32) []byte { - return tools.IntsToBitsLSB(v...) -} - -func TestArray_NullN(t *testing.T) { - tests := []struct { - name string - l int - bm []byte - n int - exp int - }{ - {name: "unknown,l16", l: 16, bm: bbits(0x11001010, 0x00110011), n: array.UnknownNullCount, exp: 8}, - {name: "unknown,l12,ignores last nibble", l: 12, bm: bbits(0x11001010, 0x00111111), n: array.UnknownNullCount, exp: 6}, - {name: "unknown,l12,12 nulls", l: 12, bm: bbits(0x00000000, 0x00000000), n: array.UnknownNullCount, exp: 12}, - {name: "unknown,l12,00 nulls", l: 12, bm: bbits(0x11111111, 0x11111111), n: array.UnknownNullCount, exp: 0}, - } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - buf := memory.NewBufferBytes(test.bm) - data := array.NewData(arrow.FixedWidthTypes.Boolean, test.l, []*memory.Buffer{buf, nil}, nil, test.n, 0) - buf.Release() - ar := array.MakeFromData(data) - data.Release() - got := ar.NullN() - ar.Release() - assert.Equal(t, test.exp, got) - }) - } -} - -func TestArraySlice(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - valids = []bool{true, true, true, false, true, true} - vs = []float64{1, 2, 3, 0, 4, 5} - ) - - b := array.NewFloat64Builder(pool) - defer b.Release() - - for _, tc := range []struct { - i, j int - panics bool - len int - }{ - {i: 0, j: len(valids), panics: false, len: len(valids)}, - {i: len(valids), j: len(valids), panics: false, len: 0}, - {i: 0, j: 1, panics: false, len: 1}, - {i: 1, j: 1, panics: false, len: 0}, - {i: 0, j: len(valids) + 1, panics: true}, - {i: 2, j: 1, panics: true}, - {i: len(valids) + 1, j: len(valids) + 1, panics: true}, - } { - t.Run("", func(t *testing.T) { - b.AppendValues(vs, valids) - - arr := b.NewFloat64Array() - defer arr.Release() - - if got, want := arr.Len(), len(valids); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if tc.panics { - defer func() { - e := recover() - if e == nil { - t.Fatalf("this should have panicked, but did not") - } - }() - } - - slice := array.NewSlice(arr, int64(tc.i), int64(tc.j)).(*array.Float64) - defer slice.Release() - - if got, want := slice.Len(), tc.len; got != want { - t.Fatalf("invalid slice length: got=%d, want=%d", got, want) - } - }) - } -} - -func TestArraySliceTypes(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - valids := []bool{true, true, true, false, true, true} - - for _, tc := range []struct { - values interface{} - builder array.Builder - append func(b array.Builder, vs interface{}) - }{ - { - values: []bool{true, false, true, false, true, false}, - builder: array.NewBooleanBuilder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.BooleanBuilder).AppendValues(vs.([]bool), valids) }, - }, - { - values: []uint8{1, 2, 3, 0, 4, 5}, - builder: array.NewUint8Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Uint8Builder).AppendValues(vs.([]uint8), valids) }, - }, - { - values: []uint16{1, 2, 3, 0, 4, 5}, - builder: array.NewUint16Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Uint16Builder).AppendValues(vs.([]uint16), valids) }, - }, - { - values: []uint32{1, 2, 3, 0, 4, 5}, - builder: array.NewUint32Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Uint32Builder).AppendValues(vs.([]uint32), valids) }, - }, - { - values: []uint64{1, 2, 3, 0, 4, 5}, - builder: array.NewUint64Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Uint64Builder).AppendValues(vs.([]uint64), valids) }, - }, - { - values: []int8{1, 2, 3, 0, 4, 5}, - builder: array.NewInt8Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Int8Builder).AppendValues(vs.([]int8), valids) }, - }, - { - values: []int16{1, 2, 3, 0, 4, 5}, - builder: array.NewInt16Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Int16Builder).AppendValues(vs.([]int16), valids) }, - }, - { - values: []int32{1, 2, 3, 0, 4, 5}, - builder: array.NewInt32Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Int32Builder).AppendValues(vs.([]int32), valids) }, - }, - { - values: []int64{1, 2, 3, 0, 4, 5}, - builder: array.NewInt64Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Int64Builder).AppendValues(vs.([]int64), valids) }, - }, - { - values: []float32{1, 2, 3, 0, 4, 5}, - builder: array.NewFloat32Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Float32Builder).AppendValues(vs.([]float32), valids) }, - }, - { - values: []float64{1, 2, 3, 0, 4, 5}, - builder: array.NewFloat64Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Float64Builder).AppendValues(vs.([]float64), valids) }, - }, - } { - t.Run("", func(t *testing.T) { - defer tc.builder.Release() - - b := tc.builder - tc.append(b, tc.values) - - arr := b.NewArray() - defer arr.Release() - - if got, want := arr.Len(), len(valids); got != want { - t.Fatalf("invalid length: got=%d, want=%d", got, want) - } - - slice := array.NewSlice(arr, 2, 5) - defer slice.Release() - - if got, want := slice.Len(), 3; got != want { - t.Fatalf("invalid slice length: got=%d, want=%d", got, want) - } - - shortSlice := array.NewSlice(arr, 2, 3) - defer shortSlice.Release() - - sliceOfShortSlice := array.NewSlice(shortSlice, 0, 1) - defer sliceOfShortSlice.Release() - - if got, want := sliceOfShortSlice.Len(), 1; got != want { - t.Fatalf("invalid short slice length: got=%d, want=%d", got, want) - } - }) - } -} diff --git a/go/arrow/array/binary.go b/go/arrow/array/binary.go deleted file mode 100644 index 99764270bf39d..0000000000000 --- a/go/arrow/array/binary.go +++ /dev/null @@ -1,453 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "encoding/base64" - "fmt" - "strings" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -type BinaryLike interface { - arrow.Array - ValueLen(int) int - ValueBytes() []byte - ValueOffset64(int) int64 -} - -// A type which represents an immutable sequence of variable-length binary strings. -type Binary struct { - array - valueOffsets []int32 - valueBytes []byte -} - -// NewBinaryData constructs a new Binary array from data. -func NewBinaryData(data arrow.ArrayData) *Binary { - a := &Binary{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Value returns the slice at index i. This value should not be mutated. -func (a *Binary) Value(i int) []byte { - if i < 0 || i >= a.array.data.length { - panic("arrow/array: index out of range") - } - idx := a.array.data.offset + i - return a.valueBytes[a.valueOffsets[idx]:a.valueOffsets[idx+1]] -} - -// ValueStr returns a copy of the base64-encoded string value or NullValueStr -func (a *Binary) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return base64.StdEncoding.EncodeToString(a.Value(i)) -} - -// ValueString returns the string at index i without performing additional allocations. -// The string is only valid for the lifetime of the Binary array. -func (a *Binary) ValueString(i int) string { - b := a.Value(i) - return *(*string)(unsafe.Pointer(&b)) -} - -func (a *Binary) ValueOffset(i int) int { - if i < 0 || i >= a.array.data.length { - panic("arrow/array: index out of range") - } - return int(a.valueOffsets[a.array.data.offset+i]) -} - -func (a *Binary) ValueOffset64(i int) int64 { - return int64(a.ValueOffset(i)) -} - -func (a *Binary) ValueLen(i int) int { - if i < 0 || i >= a.array.data.length { - panic("arrow/array: index out of range") - } - beg := a.array.data.offset + i - return int(a.valueOffsets[beg+1] - a.valueOffsets[beg]) -} - -func (a *Binary) ValueOffsets() []int32 { - beg := a.array.data.offset - end := beg + a.array.data.length + 1 - return a.valueOffsets[beg:end] -} - -func (a *Binary) ValueBytes() []byte { - beg := a.array.data.offset - end := beg + a.array.data.length - return a.valueBytes[a.valueOffsets[beg]:a.valueOffsets[end]] -} - -func (a *Binary) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - o.WriteString(" ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%q", a.ValueString(i)) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Binary) setData(data *Data) { - if len(data.buffers) != 3 { - panic("len(data.buffers) != 3") - } - - a.array.setData(data) - - if valueData := data.buffers[2]; valueData != nil { - a.valueBytes = valueData.Bytes() - } - - if valueOffsets := data.buffers[1]; valueOffsets != nil { - a.valueOffsets = arrow.Int32Traits.CastFromBytes(valueOffsets.Bytes()) - } - - if a.array.data.length < 1 { - return - } - - expNumOffsets := a.array.data.offset + a.array.data.length + 1 - if len(a.valueOffsets) < expNumOffsets { - panic(fmt.Errorf("arrow/array: binary offset buffer must have at least %d values", expNumOffsets)) - } - - if int(a.valueOffsets[expNumOffsets-1]) > len(a.valueBytes) { - panic("arrow/array: binary offsets out of bounds of data buffer") - } -} - -func (a *Binary) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - return a.Value(i) -} - -func (a *Binary) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - vals[i] = a.GetOneForMarshal(i) - } - // golang marshal standard says that []byte will be marshalled - // as a base64-encoded string - return json.Marshal(vals) -} - -func arrayEqualBinary(left, right *Binary) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if !bytes.Equal(left.Value(i), right.Value(i)) { - return false - } - } - return true -} - -type LargeBinary struct { - array - valueOffsets []int64 - valueBytes []byte -} - -func NewLargeBinaryData(data arrow.ArrayData) *LargeBinary { - a := &LargeBinary{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *LargeBinary) Value(i int) []byte { - if i < 0 || i >= a.array.data.length { - panic("arrow/array: index out of range") - } - idx := a.array.data.offset + i - return a.valueBytes[a.valueOffsets[idx]:a.valueOffsets[idx+1]] -} - -func (a *LargeBinary) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return base64.StdEncoding.EncodeToString(a.Value(i)) -} -func (a *LargeBinary) ValueString(i int) string { - b := a.Value(i) - return *(*string)(unsafe.Pointer(&b)) -} - -func (a *LargeBinary) ValueOffset(i int) int64 { - if i < 0 || i >= a.array.data.length { - panic("arrow/array: index out of range") - } - return a.valueOffsets[a.array.data.offset+i] -} - -func (a *LargeBinary) ValueOffset64(i int) int64 { - return a.ValueOffset(i) -} - -func (a *LargeBinary) ValueLen(i int) int { - if i < 0 || i >= a.array.data.length { - panic("arrow/array: index out of range") - } - beg := a.array.data.offset + i - return int(a.valueOffsets[beg+1] - a.valueOffsets[beg]) -} - -func (a *LargeBinary) ValueOffsets() []int64 { - beg := a.array.data.offset - end := beg + a.array.data.length + 1 - return a.valueOffsets[beg:end] -} - -func (a *LargeBinary) ValueBytes() []byte { - beg := a.array.data.offset - end := beg + a.array.data.length - return a.valueBytes[a.valueOffsets[beg]:a.valueOffsets[end]] -} - -func (a *LargeBinary) String() string { - var o strings.Builder - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - o.WriteString(" ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(&o, "%q", a.ValueString(i)) - } - } - o.WriteString("]") - return o.String() -} - -func (a *LargeBinary) setData(data *Data) { - if len(data.buffers) != 3 { - panic("len(data.buffers) != 3") - } - - a.array.setData(data) - - if valueData := data.buffers[2]; valueData != nil { - a.valueBytes = valueData.Bytes() - } - - if valueOffsets := data.buffers[1]; valueOffsets != nil { - a.valueOffsets = arrow.Int64Traits.CastFromBytes(valueOffsets.Bytes()) - } - - if a.array.data.length < 1 { - return - } - - expNumOffsets := a.array.data.offset + a.array.data.length + 1 - if len(a.valueOffsets) < expNumOffsets { - panic(fmt.Errorf("arrow/array: large binary offset buffer must have at least %d values", expNumOffsets)) - } - - if int(a.valueOffsets[expNumOffsets-1]) > len(a.valueBytes) { - panic("arrow/array: large binary offsets out of bounds of data buffer") - } -} - -func (a *LargeBinary) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - return a.Value(i) -} - -func (a *LargeBinary) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - vals[i] = a.GetOneForMarshal(i) - } - // golang marshal standard says that []byte will be marshalled - // as a base64-encoded string - return json.Marshal(vals) -} - -func arrayEqualLargeBinary(left, right *LargeBinary) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if !bytes.Equal(left.Value(i), right.Value(i)) { - return false - } - } - return true -} - -type ViewLike interface { - arrow.Array - ValueHeader(int) *arrow.ViewHeader -} - -type BinaryView struct { - array - values []arrow.ViewHeader - dataBuffers []*memory.Buffer -} - -func NewBinaryViewData(data arrow.ArrayData) *BinaryView { - a := &BinaryView{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *BinaryView) setData(data *Data) { - if len(data.buffers) < 2 { - panic("len(data.buffers) < 2") - } - a.array.setData(data) - - if valueData := data.buffers[1]; valueData != nil { - a.values = arrow.ViewHeaderTraits.CastFromBytes(valueData.Bytes()) - } - - a.dataBuffers = data.buffers[2:] -} - -func (a *BinaryView) ValueHeader(i int) *arrow.ViewHeader { - if i < 0 || i >= a.array.data.length { - panic("arrow/array: index out of range") - } - return &a.values[a.array.data.offset+i] -} - -func (a *BinaryView) Value(i int) []byte { - s := a.ValueHeader(i) - if s.IsInline() { - return s.InlineBytes() - } - start := s.BufferOffset() - buf := a.dataBuffers[s.BufferIndex()] - return buf.Bytes()[start : start+int32(s.Len())] -} - -func (a *BinaryView) ValueLen(i int) int { - s := a.ValueHeader(i) - return s.Len() -} - -// ValueString returns the value at index i as a string instead of -// a byte slice, without copying the underlying data. -func (a *BinaryView) ValueString(i int) string { - b := a.Value(i) - return *(*string)(unsafe.Pointer(&b)) -} - -func (a *BinaryView) String() string { - var o strings.Builder - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - o.WriteString(" ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(&o, "%q", a.ValueString(i)) - } - } - o.WriteString("]") - return o.String() -} - -// ValueStr is paired with AppendValueFromString in that it returns -// the value at index i as a string: Semantically this means that for -// a null value it will return the string "(null)", otherwise it will -// return the value as a base64 encoded string suitable for CSV/JSON. -// -// This is always going to be less performant than just using ValueString -// and exists to fulfill the Array interface to provide a method which -// can produce a human readable string for a given index. -func (a *BinaryView) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return base64.StdEncoding.EncodeToString(a.Value(i)) -} - -func (a *BinaryView) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - return a.Value(i) -} - -func (a *BinaryView) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - vals[i] = a.GetOneForMarshal(i) - } - // golang marshal standard says that []byte will be marshalled - // as a base64-encoded string - return json.Marshal(vals) -} - -func arrayEqualBinaryView(left, right *BinaryView) bool { - leftBufs, rightBufs := left.dataBuffers, right.dataBuffers - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if !left.ValueHeader(i).Equals(leftBufs, right.ValueHeader(i), rightBufs) { - return false - } - } - return true -} - -var ( - _ arrow.Array = (*Binary)(nil) - _ arrow.Array = (*LargeBinary)(nil) - _ arrow.Array = (*BinaryView)(nil) - - _ BinaryLike = (*Binary)(nil) - _ BinaryLike = (*LargeBinary)(nil) -) diff --git a/go/arrow/array/binary_test.go b/go/arrow/array/binary_test.go deleted file mode 100644 index 919fff7b5e5e8..0000000000000 --- a/go/arrow/array/binary_test.go +++ /dev/null @@ -1,726 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "reflect" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestBinary(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - - values := [][]byte{ - []byte("AAA"), - nil, - []byte("BBBB"), - } - valid := []bool{true, false, true} - b.AppendValues(values, valid) - - b.Retain() - b.Release() - - a := b.NewBinaryArray() - assert.Equal(t, 3, a.Len()) - assert.Equal(t, 1, a.NullN()) - assert.Equal(t, []byte("AAA"), a.Value(0)) - assert.Equal(t, []byte{}, a.Value(1)) - assert.Equal(t, []byte("BBBB"), a.Value(2)) - assert.Equal(t, "QUFB", a.ValueStr(0)) - assert.Equal(t, NullValueStr, a.ValueStr(1)) - a.Release() - - // Test builder reset and NewArray API. - b.AppendValues(values, valid) - a = b.NewArray().(*Binary) - assert.Equal(t, 3, a.Len()) - assert.Equal(t, 1, a.NullN()) - assert.Equal(t, []byte("AAA"), a.Value(0)) - assert.Equal(t, []byte{}, a.Value(1)) - assert.Equal(t, []byte("BBBB"), a.Value(2)) - assert.Equal(t, "QUFB", a.ValueStr(0)) - assert.Equal(t, NullValueStr, a.ValueStr(1)) - a.Release() - - b.Release() -} - -func TestLargeBinary(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) - - values := [][]byte{ - []byte("AAA"), - nil, - []byte("BBBB"), - } - valid := []bool{true, false, true} - b.AppendValues(values, valid) - - b.Retain() - b.Release() - - assert.Panics(t, func() { - b.NewBinaryArray() - }) - - a := b.NewLargeBinaryArray() - assert.Equal(t, 3, a.Len()) - assert.Equal(t, 1, a.NullN()) - assert.Equal(t, []byte("AAA"), a.Value(0)) - assert.Equal(t, []byte{}, a.Value(1)) - assert.Equal(t, []byte("BBBB"), a.Value(2)) - assert.Equal(t, "QUFB", a.ValueStr(0)) - assert.Equal(t, NullValueStr, a.ValueStr(1)) - a.Release() - - // Test builder reset and NewArray API. - b.AppendValues(values, valid) - a = b.NewArray().(*LargeBinary) - assert.Equal(t, 3, a.Len()) - assert.Equal(t, 1, a.NullN()) - assert.Equal(t, []byte("AAA"), a.Value(0)) - assert.Equal(t, []byte{}, a.Value(1)) - assert.Equal(t, []byte("BBBB"), a.Value(2)) - assert.Equal(t, "QUFB", a.ValueStr(0)) - assert.Equal(t, NullValueStr, a.ValueStr(1)) - a.Release() - - b.Release() -} - -func TestBinarySliceData(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "def", "g", "hijk", "lm", "n", "opq", "rs", "tu"} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - defer b.Release() - - for _, v := range values { - b.AppendString(v) - } - - arr := b.NewArray().(*Binary) - defer arr.Release() - - if got, want := arr.Len(), len(values); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - vs := make([]string, arr.Len()) - - for i := range vs { - vs[i] = arr.ValueString(i) - } - - if got, want := vs, values; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - tests := []struct { - interval [2]int64 - want []string - }{ - { - interval: [2]int64{0, 0}, - want: []string{}, - }, - { - interval: [2]int64{0, 5}, - want: []string{"a", "bc", "def", "g", "hijk"}, - }, - { - interval: [2]int64{0, 10}, - want: []string{"a", "bc", "def", "g", "hijk", "lm", "n", "opq", "rs", "tu"}, - }, - { - interval: [2]int64{5, 10}, - want: []string{"lm", "n", "opq", "rs", "tu"}, - }, - { - interval: [2]int64{10, 10}, - want: []string{}, - }, - { - interval: [2]int64{2, 7}, - want: []string{"def", "g", "hijk", "lm", "n"}, - }, - } - - for _, tc := range tests { - t.Run("", func(t *testing.T) { - - slice := NewSlice(arr, tc.interval[0], tc.interval[1]).(*Binary) - defer slice.Release() - - if got, want := slice.Len(), len(tc.want); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - vs := make([]string, slice.Len()) - - for i := range vs { - vs[i] = slice.ValueString(i) - } - - if got, want := vs, tc.want; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - }) - } -} - -func TestBinarySliceDataWithNull(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} - valids := []bool{true, true, false, false, true, true, true, true, false, true} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - defer b.Release() - - b.AppendStringValues(values, valids) - - arr := b.NewArray().(*Binary) - defer arr.Release() - - if got, want := arr.Len(), len(values); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.NullN(), 3; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - vs := make([]string, arr.Len()) - - for i := range vs { - vs[i] = arr.ValueString(i) - } - - if got, want := vs, values; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - tests := []struct { - interval [2]int64 - nulls int - want []string - }{ - { - interval: [2]int64{0, 2}, - nulls: 0, - want: []string{"a", "bc"}, - }, - { - interval: [2]int64{0, 3}, - nulls: 1, - want: []string{"a", "bc", ""}, - }, - { - interval: [2]int64{0, 4}, - nulls: 2, - want: []string{"a", "bc", "", ""}, - }, - { - interval: [2]int64{4, 8}, - nulls: 0, - want: []string{"hijk", "lm", "", "opq"}, - }, - { - interval: [2]int64{2, 9}, - nulls: 3, - want: []string{"", "", "hijk", "lm", "", "opq", ""}, - }, - } - - for _, tc := range tests { - t.Run("", func(t *testing.T) { - - slice := NewSlice(arr, tc.interval[0], tc.interval[1]).(*Binary) - defer slice.Release() - - if got, want := slice.Len(), len(tc.want); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := slice.NullN(), tc.nulls; got != want { - t.Errorf("got=%d, want=%d", got, want) - } - - vs := make([]string, slice.Len()) - - for i := range vs { - vs[i] = slice.ValueString(i) - } - - if got, want := vs, tc.want; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - }) - } -} - -func TestBinarySliceOutOfBounds(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "def", "g", "hijk", "lm", "n", "opq", "rs", "tu"} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - defer b.Release() - - for _, v := range values { - b.AppendString(v) - } - - arr := b.NewArray().(*Binary) - defer arr.Release() - - slice := NewSlice(arr, 3, 8).(*Binary) - defer slice.Release() - - tests := []struct { - index int - panic bool - }{ - { - index: -1, - panic: true, - }, - { - index: 5, - panic: true, - }, - { - index: 0, - panic: false, - }, - { - index: 4, - panic: false, - }, - } - - for _, tc := range tests { - t.Run("", func(t *testing.T) { - - var val string - - if tc.panic { - defer func() { - e := recover() - if e == nil { - t.Fatalf("this should have panicked, but did not; slice value %q", val) - } - if got, want := e.(string), "arrow/array: index out of range"; got != want { - t.Fatalf("invalid error. got=%q, want=%q", got, want) - } - }() - } else { - defer func() { - if e := recover(); e != nil { - t.Fatalf("unexpected panic: %v", e) - } - }() - } - - val = slice.ValueString(tc.index) - }) - } -} - -func TestBinaryValueOffset(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} - valids := []bool{true, true, false, false, true, true, true, true, false, true} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - defer b.Release() - - b.AppendStringValues(values, valids) - - arr := b.NewArray().(*Binary) - defer arr.Release() - - slice := NewSlice(arr, 2, 9).(*Binary) - defer slice.Release() - - offset := 3 - vs := values[2:9] - - for i, v := range vs { - assert.Equal(t, offset, slice.ValueOffset(i)) - offset += len(v) - } -} - -func TestLargeBinaryValueOffset(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} - valids := []bool{true, true, false, false, true, true, true, true, false, true} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) - defer b.Release() - - b.AppendStringValues(values, valids) - - arr := b.NewArray().(*LargeBinary) - defer arr.Release() - - slice := NewSlice(arr, 2, 9).(*LargeBinary) - defer slice.Release() - - offset := 3 - vs := values[2:9] - - for i, v := range vs { - assert.EqualValues(t, offset, slice.ValueOffset(i)) - offset += len(v) - } -} - -func TestBinaryValueLen(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} - valids := []bool{true, true, false, false, true, true, true, true, false, true} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - defer b.Release() - - b.AppendStringValues(values, valids) - - arr := b.NewArray().(*Binary) - defer arr.Release() - - slice := NewSlice(arr, 2, 9).(*Binary) - defer slice.Release() - - vs := values[2:9] - - for i, v := range vs { - assert.Equal(t, len(v), slice.ValueLen(i)) - } -} - -func TestLargeBinaryValueLen(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} - valids := []bool{true, true, false, false, true, true, true, true, false, true} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) - defer b.Release() - - b.AppendStringValues(values, valids) - - arr := b.NewArray().(*LargeBinary) - defer arr.Release() - - slice := NewSlice(arr, 2, 9).(*LargeBinary) - defer slice.Release() - - vs := values[2:9] - - for i, v := range vs { - assert.Equal(t, len(v), slice.ValueLen(i)) - } -} - -func TestBinaryValueOffsets(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} - valids := []bool{true, true, false, false, true, true, true, true, false, true} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - defer b.Release() - - b.AppendStringValues(values, valids) - - arr := b.NewArray().(*Binary) - defer arr.Release() - - assert.Equal(t, []int32{0, 1, 3, 3, 3, 7, 9, 9, 12, 12, 14}, arr.ValueOffsets()) - - slice := NewSlice(arr, 2, 9).(*Binary) - defer slice.Release() - - assert.Equal(t, []int32{3, 3, 3, 7, 9, 9, 12, 12}, slice.ValueOffsets()) -} - -func TestLargeBinaryValueOffsets(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} - valids := []bool{true, true, false, false, true, true, true, true, false, true} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) - defer b.Release() - - b.AppendStringValues(values, valids) - - arr := b.NewArray().(*LargeBinary) - defer arr.Release() - - assert.Equal(t, []int64{0, 1, 3, 3, 3, 7, 9, 9, 12, 12, 14}, arr.ValueOffsets()) - - slice := NewSlice(arr, 2, 9).(*LargeBinary) - defer slice.Release() - - assert.Equal(t, []int64{3, 3, 3, 7, 9, 9, 12, 12}, slice.ValueOffsets()) -} - -func TestBinaryValueBytes(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} - valids := []bool{true, true, false, false, true, true, true, true, false, true} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - defer b.Release() - - b.AppendStringValues(values, valids) - - arr := b.NewArray().(*Binary) - defer arr.Release() - - assert.Equal(t, []byte{'a', 'b', 'c', 'h', 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'q', 't', 'u'}, arr.ValueBytes()) - - slice := NewSlice(arr, 2, 9).(*Binary) - defer slice.Release() - - assert.Equal(t, []byte{'h', 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'q'}, slice.ValueBytes()) -} - -func TestLargeBinaryValueBytes(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} - valids := []bool{true, true, false, false, true, true, true, true, false, true} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) - defer b.Release() - - b.AppendStringValues(values, valids) - - arr := b.NewArray().(*LargeBinary) - defer arr.Release() - - assert.Equal(t, []byte{'a', 'b', 'c', 'h', 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'q', 't', 'u'}, arr.ValueBytes()) - - slice := NewSlice(arr, 2, 9).(*LargeBinary) - defer slice.Release() - - assert.Equal(t, []byte{'h', 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'q'}, slice.ValueBytes()) -} - -func TestBinaryStringer(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "é", "", "hijk", "lm", "", "opq", "", "tu"} - valids := []bool{true, true, false, true, false, true, true, true, true, false, true} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - defer b.Release() - - b.AppendStringValues(values, valids) - - arr := b.NewArray().(*Binary) - defer arr.Release() - - got := arr.String() - want := `["a" "bc" (null) "é" (null) "hijk" "lm" "" "opq" (null) "tu"]` - - if got != want { - t.Fatalf("invalid stringer:\ngot= %s\nwant=%s\n", got, want) - } -} - -func TestLargeBinaryStringer(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "é", "", "hijk", "lm", "", "opq", "", "tu"} - valids := []bool{true, true, false, true, false, true, true, true, true, false, true} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) - defer b.Release() - - b.AppendStringValues(values, valids) - - arr := b.NewArray().(*LargeBinary) - defer arr.Release() - - got := arr.String() - want := `["a" "bc" (null) "é" (null) "hijk" "lm" "" "opq" (null) "tu"]` - - if got != want { - t.Fatalf("invalid stringer:\ngot= %s\nwant=%s\n", got, want) - } -} - -func TestBinaryInvalidOffsets(t *testing.T) { - const expectedPanic = "arrow/array: binary offsets out of bounds of data buffer" - - makeBuffers := func(valids []bool, offsets []int32, data string) []*memory.Buffer { - offsetBuf := memory.NewBufferBytes(arrow.Int32Traits.CastToBytes(offsets)) - var nullBufBytes []byte - var nullBuf *memory.Buffer - if valids != nil { - nullBufBytes = make([]byte, bitutil.BytesForBits(int64(len(valids)))) - for i, v := range valids { - bitutil.SetBitTo(nullBufBytes, i, v) - } - nullBuf = memory.NewBufferBytes(nullBufBytes) - } - return []*memory.Buffer{nullBuf, offsetBuf, memory.NewBufferBytes([]byte(data))} - } - - assert.NotPanics(t, func() { - buffers := makeBuffers(nil, []int32{}, "") - NewBinaryData(NewData(arrow.BinaryTypes.Binary, 0, buffers, nil, 0, 0)) - }, "empty array with no offsets") - - assert.NotPanics(t, func() { - buffers := makeBuffers(nil, []int32{0, 5}, "") - NewBinaryData(NewData(arrow.BinaryTypes.Binary, 0, buffers, nil, 0, 0)) - }, "empty array, offsets ignored") - - assert.NotPanics(t, func() { - buffers := makeBuffers(nil, []int32{0, 3, 4, 9}, "oooabcdef") - NewBinaryData(NewData(arrow.BinaryTypes.Binary, 1, buffers, nil, 0, 2)) - }, "data has offset and value offsets are valid") - - assert.NotPanics(t, func() { - buffers := makeBuffers(nil, []int32{0, 3, 6, 9, 9}, "012345678") - arr := NewBinaryData(NewData(arrow.BinaryTypes.Binary, 4, buffers, nil, 0, 0)) - if assert.Equal(t, 4, arr.Len()) && assert.Zero(t, arr.NullN()) { - assert.EqualValues(t, "012", arr.Value(0)) - assert.EqualValues(t, "345", arr.Value(1)) - assert.EqualValues(t, "678", arr.Value(2)) - assert.EqualValues(t, "", arr.Value(3), "trailing empty binary value will have offset past end") - } - }, "simple valid case") - - assert.NotPanics(t, func() { - buffers := makeBuffers([]bool{true, false, true, false}, []int32{0, 3, 4, 9, 9}, "oooabcdef") - arr := NewBinaryData(NewData(arrow.BinaryTypes.Binary, 4, buffers, nil, 2, 0)) - if assert.Equal(t, 4, arr.Len()) && assert.Equal(t, 2, arr.NullN()) { - assert.EqualValues(t, "ooo", arr.Value(0)) - assert.True(t, arr.IsNull(1)) - assert.EqualValues(t, "bcdef", arr.Value(2)) - assert.True(t, arr.IsNull(3)) - } - }, "simple valid case with nulls") - - assert.PanicsWithValue(t, expectedPanic, func() { - buffers := makeBuffers(nil, []int32{0, 5}, "abc") - NewBinaryData(NewData(arrow.BinaryTypes.Binary, 1, buffers, nil, 0, 0)) - }, "last offset is overflowing") - - assert.PanicsWithError(t, "arrow/array: binary offset buffer must have at least 2 values", func() { - buffers := makeBuffers(nil, []int32{0}, "abc") - NewBinaryData(NewData(arrow.BinaryTypes.Binary, 1, buffers, nil, 0, 0)) - }, "last offset is missing") - - assert.PanicsWithValue(t, expectedPanic, func() { - buffers := makeBuffers(nil, []int32{0, 3, 10, 15}, "oooabcdef") - NewBinaryData(NewData(arrow.BinaryTypes.Binary, 1, buffers, nil, 0, 2)) - }, "data has offset and value offset is overflowing") -} - -func TestBinaryStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} - valid := []bool{true, true, false, false, true, true, true, true, false, true} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - defer b.Release() - - b.AppendStringValues(values, valid) - - arr := b.NewArray().(*Binary) - defer arr.Release() - - // 2. create array via AppendValueFromString - - b1 := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*Binary) - defer arr1.Release() - - assert.True(t, Equal(arr, arr1)) -} - -func TestBinaryViewStringRoundTrip(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "", "supercalifragilistic", "", "expialidocious"} - valid := []bool{true, true, false, false, true, true, true} - - b := NewBinaryViewBuilder(mem) - defer b.Release() - - b.AppendStringValues(values, valid) - arr := b.NewArray().(*BinaryView) - defer arr.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b.NewArray().(*BinaryView) - defer arr1.Release() - - assert.True(t, Equal(arr, arr1)) -} diff --git a/go/arrow/array/binarybuilder.go b/go/arrow/array/binarybuilder.go deleted file mode 100644 index 6fcc4eaf46479..0000000000000 --- a/go/arrow/array/binarybuilder.go +++ /dev/null @@ -1,704 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "encoding/base64" - "fmt" - "math" - "reflect" - "sync/atomic" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -// A BinaryBuilder is used to build a Binary array using the Append methods. -type BinaryBuilder struct { - builder - - dtype arrow.BinaryDataType - offsets bufBuilder - values *byteBufferBuilder - - appendOffsetVal func(int) - getOffsetVal func(int) int - maxCapacity uint64 - offsetByteWidth int -} - -// NewBinaryBuilder can be used for any of the variable length binary types, -// Binary, LargeBinary, String, LargeString by passing the appropriate data type -func NewBinaryBuilder(mem memory.Allocator, dtype arrow.BinaryDataType) *BinaryBuilder { - var ( - offsets bufBuilder - offsetValFn func(int) - maxCapacity uint64 - offsetByteWidth int - getOffsetVal func(int) int - ) - switch dtype.Layout().Buffers[1].ByteWidth { - case 4: - b := newInt32BufferBuilder(mem) - offsetValFn = func(v int) { b.AppendValue(int32(v)) } - getOffsetVal = func(i int) int { return int(b.Value(i)) } - offsets = b - maxCapacity = math.MaxInt32 - offsetByteWidth = arrow.Int32SizeBytes - case 8: - b := newInt64BufferBuilder(mem) - offsetValFn = func(v int) { b.AppendValue(int64(v)) } - getOffsetVal = func(i int) int { return int(b.Value(i)) } - offsets = b - maxCapacity = math.MaxInt64 - offsetByteWidth = arrow.Int64SizeBytes - } - - b := &BinaryBuilder{ - builder: builder{refCount: 1, mem: mem}, - dtype: dtype, - offsets: offsets, - values: newByteBufferBuilder(mem), - appendOffsetVal: offsetValFn, - maxCapacity: maxCapacity, - offsetByteWidth: offsetByteWidth, - getOffsetVal: getOffsetVal, - } - return b -} - -func (b *BinaryBuilder) Type() arrow.DataType { return b.dtype } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -// Release may be called simultaneously from multiple goroutines. -func (b *BinaryBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.offsets != nil { - b.offsets.Release() - b.offsets = nil - } - if b.values != nil { - b.values.Release() - b.values = nil - } - } -} - -func (b *BinaryBuilder) Append(v []byte) { - b.Reserve(1) - b.appendNextOffset() - b.values.Append(v) - b.UnsafeAppendBoolToBitmap(true) -} - -func (b *BinaryBuilder) AppendString(v string) { - b.Append([]byte(v)) -} - -func (b *BinaryBuilder) AppendNull() { - b.Reserve(1) - b.appendNextOffset() - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *BinaryBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *BinaryBuilder) AppendEmptyValue() { - b.Reserve(1) - b.appendNextOffset() - b.UnsafeAppendBoolToBitmap(true) -} - -func (b *BinaryBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *BinaryBuilder) AppendValues(v [][]byte, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - for _, vv := range v { - b.appendNextOffset() - b.values.Append(vv) - } - - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -// AppendStringValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *BinaryBuilder) AppendStringValues(v []string, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - for _, vv := range v { - b.appendNextOffset() - b.values.Append([]byte(vv)) - } - - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *BinaryBuilder) UnsafeAppend(v []byte) { - b.appendNextOffset() - b.values.unsafeAppend(v) - b.UnsafeAppendBoolToBitmap(true) -} - -func (b *BinaryBuilder) Value(i int) []byte { - start := b.getOffsetVal(i) - var end int - if i == (b.length - 1) { - end = b.values.Len() - } else { - end = b.getOffsetVal(i + 1) - } - return b.values.Bytes()[start:end] -} - -func (b *BinaryBuilder) init(capacity int) { - b.builder.init(capacity) - b.offsets.resize((capacity + 1) * b.offsetByteWidth) -} - -// DataLen returns the number of bytes in the data array. -func (b *BinaryBuilder) DataLen() int { return b.values.length } - -// DataCap returns the total number of bytes that can be stored -// without allocating additional memory. -func (b *BinaryBuilder) DataCap() int { return b.values.capacity } - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *BinaryBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// ReserveData ensures there is enough space for appending n bytes -// by checking the capacity and resizing the data buffer if necessary. -func (b *BinaryBuilder) ReserveData(n int) { - if b.values.capacity < b.values.length+n { - b.values.resize(b.values.Len() + n) - } -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may be reduced. -func (b *BinaryBuilder) Resize(n int) { - b.offsets.resize((n + 1) * b.offsetByteWidth) - if (n * b.offsetByteWidth) < b.offsets.Len() { - b.offsets.SetLength(n * b.offsetByteWidth) - } - b.builder.resize(n, b.init) -} - -func (b *BinaryBuilder) ResizeData(n int) { - b.values.length = n -} - -// NewArray creates a Binary array from the memory buffers used by the builder and resets the BinaryBuilder -// so it can be used to build a new array. -// -// Builds the appropriate Binary or LargeBinary array based on the datatype -// it was initialized with. -func (b *BinaryBuilder) NewArray() arrow.Array { - if b.offsetByteWidth == arrow.Int32SizeBytes { - return b.NewBinaryArray() - } - return b.NewLargeBinaryArray() -} - -// NewBinaryArray creates a Binary array from the memory buffers used by the builder and resets the BinaryBuilder -// so it can be used to build a new array. -func (b *BinaryBuilder) NewBinaryArray() (a *Binary) { - if b.offsetByteWidth != arrow.Int32SizeBytes { - panic("arrow/array: invalid call to NewBinaryArray when building a LargeBinary array") - } - - data := b.newData() - a = NewBinaryData(data) - data.Release() - return -} - -func (b *BinaryBuilder) NewLargeBinaryArray() (a *LargeBinary) { - if b.offsetByteWidth != arrow.Int64SizeBytes { - panic("arrow/array: invalid call to NewLargeBinaryArray when building a Binary array") - } - - data := b.newData() - a = NewLargeBinaryData(data) - data.Release() - return -} - -func (b *BinaryBuilder) newData() (data *Data) { - b.appendNextOffset() - offsets, values := b.offsets.Finish(), b.values.Finish() - data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, offsets, values}, nil, b.nulls, 0) - if offsets != nil { - offsets.Release() - } - - if values != nil { - values.Release() - } - - b.builder.reset() - - return -} - -func (b *BinaryBuilder) appendNextOffset() { - numBytes := b.values.Len() - debug.Assert(uint64(numBytes) <= b.maxCapacity, "exceeded maximum capacity of binary array") - b.appendOffsetVal(numBytes) -} - -func (b *BinaryBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - - if b.dtype.IsUtf8() { - b.Append([]byte(s)) - return nil - } - - decodedVal, err := base64.StdEncoding.DecodeString(s) - if err != nil { - return fmt.Errorf("could not decode base64 string: %w", err) - } - b.Append(decodedVal) - return nil -} - -func (b *BinaryBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case string: - data, err := base64.StdEncoding.DecodeString(v) - if err != nil { - return err - } - b.Append(data) - case []byte: - b.Append(v) - case nil: - b.AppendNull() - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf([]byte{}), - Offset: dec.InputOffset(), - } - } - return nil -} - -func (b *BinaryBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *BinaryBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -const ( - dfltBlockSize = 32 << 10 // 32 KB - viewValueSizeLimit int32 = math.MaxInt32 -) - -type BinaryViewBuilder struct { - builder - dtype arrow.BinaryDataType - - data *memory.Buffer - rawData []arrow.ViewHeader - - blockBuilder multiBufferBuilder -} - -func NewBinaryViewBuilder(mem memory.Allocator) *BinaryViewBuilder { - return &BinaryViewBuilder{ - dtype: arrow.BinaryTypes.BinaryView, - builder: builder{ - refCount: 1, - mem: mem, - }, - blockBuilder: multiBufferBuilder{ - refCount: 1, - blockSize: dfltBlockSize, - mem: mem, - }, - } -} - -func (b *BinaryViewBuilder) SetBlockSize(sz uint) { - b.blockBuilder.blockSize = int(sz) -} - -func (b *BinaryViewBuilder) Type() arrow.DataType { return b.dtype } - -func (b *BinaryViewBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) != 0 { - return - } - - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } -} - -func (b *BinaryViewBuilder) init(capacity int) { - b.builder.init(capacity) - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.ViewHeaderTraits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.ViewHeaderTraits.CastFromBytes(b.data.Bytes()) -} - -func (b *BinaryViewBuilder) Resize(n int) { - nbuild := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - return - } - - b.builder.resize(nbuild, b.init) - b.data.Resize(arrow.ViewHeaderTraits.BytesRequired(n)) - b.rawData = arrow.ViewHeaderTraits.CastFromBytes(b.data.Bytes()) -} - -func (b *BinaryViewBuilder) ReserveData(length int) { - if int32(length) > viewValueSizeLimit { - panic(fmt.Errorf("%w: BinaryView or StringView elements cannot reference strings larger than 2GB", - arrow.ErrInvalid)) - } - b.blockBuilder.Reserve(int(length)) -} - -func (b *BinaryViewBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -func (b *BinaryViewBuilder) Append(v []byte) { - if int32(len(v)) > viewValueSizeLimit { - panic(fmt.Errorf("%w: BinaryView or StringView elements cannot reference strings larger than 2GB", arrow.ErrInvalid)) - } - - if !arrow.IsViewInline(len(v)) { - b.ReserveData(len(v)) - } - - b.Reserve(1) - b.UnsafeAppend(v) -} - -// AppendString is identical to Append, only accepting a string instead -// of a byte slice, avoiding the extra copy that would occur if you simply -// did []byte(v). -// -// This is different than AppendValueFromString which exists for the -// Builder interface, in that this expects raw binary data which is -// appended unmodified. AppendValueFromString expects base64 encoded binary -// data instead. -func (b *BinaryViewBuilder) AppendString(v string) { - // create a []byte without copying the bytes - // in go1.20 this would be unsafe.StringData - val := *(*[]byte)(unsafe.Pointer(&struct { - string - int - }{v, len(v)})) - b.Append(val) -} - -func (b *BinaryViewBuilder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *BinaryViewBuilder) AppendNulls(n int) { - b.Reserve(n) - for i := 0; i < n; i++ { - b.UnsafeAppendBoolToBitmap(false) - } -} - -func (b *BinaryViewBuilder) AppendEmptyValue() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(true) -} - -func (b *BinaryViewBuilder) AppendEmptyValues(n int) { - b.Reserve(n) - b.unsafeAppendBoolsToBitmap(nil, n) -} - -func (b *BinaryViewBuilder) UnsafeAppend(v []byte) { - hdr := &b.rawData[b.length] - hdr.SetBytes(v) - if !hdr.IsInline() { - b.blockBuilder.UnsafeAppend(hdr, v) - } - b.UnsafeAppendBoolToBitmap(true) -} - -func (b *BinaryViewBuilder) AppendValues(v [][]byte, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - outOfLineTotal := 0 - for i, vv := range v { - if len(valid) == 0 || valid[i] { - if !arrow.IsViewInline(len(vv)) { - outOfLineTotal += len(vv) - } - } - } - - b.ReserveData(outOfLineTotal) - for i, vv := range v { - if len(valid) == 0 || valid[i] { - hdr := &b.rawData[b.length+i] - hdr.SetBytes(vv) - if !hdr.IsInline() { - b.blockBuilder.UnsafeAppend(hdr, vv) - } - } - } - - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *BinaryViewBuilder) AppendStringValues(v []string, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - outOfLineTotal := 0 - for i, vv := range v { - if len(valid) == 0 || valid[i] { - if !arrow.IsViewInline(len(vv)) { - outOfLineTotal += len(vv) - } - } - } - - b.ReserveData(outOfLineTotal) - for i, vv := range v { - if len(valid) == 0 || valid[i] { - hdr := &b.rawData[b.length+i] - hdr.SetString(vv) - if !hdr.IsInline() { - b.blockBuilder.UnsafeAppendString(hdr, vv) - } - } - } - - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -// AppendValueFromString is paired with ValueStr for fulfilling the -// base Builder interface. This is intended to read in a human-readable -// string such as from CSV or JSON and append it to the array. -// -// For Binary values are expected to be base64 encoded (and will be -// decoded as such before being appended). -func (b *BinaryViewBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - - if b.dtype.IsUtf8() { - b.Append([]byte(s)) - return nil - } - - decodedVal, err := base64.StdEncoding.DecodeString(s) - if err != nil { - return fmt.Errorf("could not decode base64 string: %w", err) - } - b.Append(decodedVal) - return nil -} - -func (b *BinaryViewBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case string: - data, err := base64.StdEncoding.DecodeString(v) - if err != nil { - return err - } - b.Append(data) - case []byte: - b.Append(v) - case nil: - b.AppendNull() - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf([]byte{}), - Offset: dec.InputOffset(), - } - } - return nil -} - -func (b *BinaryViewBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *BinaryViewBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary view builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -func (b *BinaryViewBuilder) newData() (data *Data) { - bytesRequired := arrow.ViewHeaderTraits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - - dataBuffers := b.blockBuilder.Finish() - data = NewData(b.dtype, b.length, append([]*memory.Buffer{ - b.nullBitmap, b.data}, dataBuffers...), nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - for _, buf := range dataBuffers { - buf.Release() - } - } - return -} - -func (b *BinaryViewBuilder) NewBinaryViewArray() (a *BinaryView) { - data := b.newData() - a = NewBinaryViewData(data) - data.Release() - return -} - -func (b *BinaryViewBuilder) NewArray() arrow.Array { - return b.NewBinaryViewArray() -} - -var ( - _ Builder = (*BinaryBuilder)(nil) - _ Builder = (*BinaryViewBuilder)(nil) -) diff --git a/go/arrow/array/binarybuilder_test.go b/go/arrow/array/binarybuilder_test.go deleted file mode 100644 index 65d5c7385df4c..0000000000000 --- a/go/arrow/array/binarybuilder_test.go +++ /dev/null @@ -1,151 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "bytes" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestBinaryBuilder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - - exp := [][]byte{[]byte("foo"), []byte("bar"), nil, []byte("sydney"), []byte("cameron")} - for _, v := range exp { - if v == nil { - ab.AppendNull() - } else { - ab.Append(v) - } - } - - assert.Equal(t, len(exp), ab.Len(), "unexpected Len()") - assert.Equal(t, 1, ab.NullN(), "unexpected NullN()") - - for i, v := range exp { - if v == nil { - v = []byte{} - } - assert.Equal(t, v, ab.Value(i), "unexpected BinaryArrayBuilder.Value(%d)", i) - } - // Zm9v is foo in base64 - assert.NoError(t, ab.AppendValueFromString("Zm9v")) - - ar := ab.NewBinaryArray() - assert.Equal(t, "Zm9v", ar.ValueStr(5)) - - ab.Release() - ar.Release() - - // check state of builder after NewBinaryArray - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewBinaryArray did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewBinaryArray did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewBinaryArray did not reset state") -} - -func TestBinaryBuilder_ReserveData(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - - // call ReserveData and ensure the capacity doesn't change - // when appending entries until that count. - ab.ReserveData(256) - expCap := ab.DataCap() - for i := 0; i < 256/8; i++ { - ab.Append(bytes.Repeat([]byte("a"), 8)) - } - assert.Equal(t, expCap, ab.DataCap(), "unexpected BinaryArrayBuilder.DataCap()") - - ar := ab.NewBinaryArray() - ab.Release() - ar.Release() - - // check state of builder after NewBinaryArray - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewBinaryArray did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewBinaryArray did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewBinaryArray did not reset state") -} - -func TestBinaryBuilderLarge(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) - - exp := [][]byte{[]byte("foo"), []byte("bar"), nil, []byte("sydney"), []byte("cameron")} - for _, v := range exp { - if v == nil { - ab.AppendNull() - } else { - ab.Append(v) - } - } - - assert.Equal(t, len(exp), ab.Len(), "unexpected Len()") - assert.Equal(t, 1, ab.NullN(), "unexpected NullN()") - - for i, v := range exp { - if v == nil { - v = []byte{} - } - assert.Equal(t, v, ab.Value(i), "unexpected BinaryArrayBuilder.Value(%d)", i) - } - - ar := ab.NewLargeBinaryArray() - ab.Release() - ar.Release() - - // check state of builder after NewBinaryArray - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewBinaryArray did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewBinaryArray did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewBinaryArray did not reset state") -} - -func TestBinaryBuilderLarge_ReserveData(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) - - // call ReserveData and ensure the capacity doesn't change - // when appending entries until that count. - ab.ReserveData(256) - expCap := ab.DataCap() - for i := 0; i < 256/8; i++ { - ab.Append(bytes.Repeat([]byte("a"), 8)) - } - assert.Equal(t, expCap, ab.DataCap(), "unexpected BinaryArrayBuilder.DataCap()") - - ar := ab.NewLargeBinaryArray() - ab.Release() - ar.Release() - - // check state of builder after NewBinaryArray - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewBinaryArray did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewBinaryArray did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewBinaryArray did not reset state") -} diff --git a/go/arrow/array/boolean.go b/go/arrow/array/boolean.go deleted file mode 100644 index eab26d273dd96..0000000000000 --- a/go/arrow/array/boolean.go +++ /dev/null @@ -1,126 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "fmt" - "strconv" - "strings" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -// A type which represents an immutable sequence of boolean values. -type Boolean struct { - array - values []byte -} - -// NewBoolean creates a boolean array from the data memory.Buffer and contains length elements. -// The nullBitmap buffer can be nil of there are no null values. -// If nulls is not known, use UnknownNullCount to calculate the value of NullN at runtime from the nullBitmap buffer. -func NewBoolean(length int, data *memory.Buffer, nullBitmap *memory.Buffer, nulls int) *Boolean { - arrdata := NewData(arrow.FixedWidthTypes.Boolean, length, []*memory.Buffer{nullBitmap, data}, nil, nulls, 0) - defer arrdata.Release() - return NewBooleanData(arrdata) -} - -func NewBooleanData(data arrow.ArrayData) *Boolean { - a := &Boolean{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *Boolean) Value(i int) bool { - if i < 0 || i >= a.array.data.length { - panic("arrow/array: index out of range") - } - return bitutil.BitIsSet(a.values, a.array.data.offset+i) -} - -func (a *Boolean) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } else { - return strconv.FormatBool(a.Value(i)) - } -} - -func (a *Boolean) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", a.Value(i)) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Boolean) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = vals.Bytes() - } -} - -func (a *Boolean) GetOneForMarshal(i int) interface{} { - if a.IsValid(i) { - return a.Value(i) - } - return nil -} - -func (a *Boolean) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if a.IsValid(i) { - vals[i] = a.Value(i) - } else { - vals[i] = nil - } - } - return json.Marshal(vals) -} - -func arrayEqualBoolean(left, right *Boolean) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -var ( - _ arrow.Array = (*Boolean)(nil) -) diff --git a/go/arrow/array/boolean_test.go b/go/arrow/array/boolean_test.go deleted file mode 100644 index f980497d54521..0000000000000 --- a/go/arrow/array/boolean_test.go +++ /dev/null @@ -1,322 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "fmt" - "reflect" - "strings" - "testing" - - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestBooleanSliceData(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - values := []bool{true, false, true, true, true, true, true, false, true, false} - - b := array.NewBooleanBuilder(pool) - defer b.Release() - - for _, v := range values { - b.Append(v) - } - - arr := b.NewArray().(*array.Boolean) - defer arr.Release() - - if got, want := arr.Len(), len(values); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - vs := make([]bool, arr.Len()) - - for i := range vs { - vs[i] = arr.Value(i) - } - - if got, want := vs, values; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - tests := []struct { - interval [2]int64 - want []bool - }{ - { - interval: [2]int64{0, 0}, - want: []bool{}, - }, - { - interval: [2]int64{10, 10}, - want: []bool{}, - }, - { - interval: [2]int64{0, 5}, - want: []bool{true, false, true, true, true}, - }, - { - interval: [2]int64{5, 10}, - want: []bool{true, true, false, true, false}, - }, - { - interval: [2]int64{2, 7}, - want: []bool{true, true, true, true, true}, - }, - } - - for _, tc := range tests { - t.Run("", func(t *testing.T) { - - slice := array.NewSlice(arr, tc.interval[0], tc.interval[1]).(*array.Boolean) - defer slice.Release() - - if got, want := slice.Len(), len(tc.want); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - vs := make([]bool, slice.Len()) - - for i := range vs { - vs[i] = slice.Value(i) - } - - if got, want := vs, tc.want; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - }) - } -} - -func TestBooleanSliceDataWithNull(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - values := []bool{true, false, true, false, false, false, true, false, true, false} - valids := []bool{true, false, true, true, true, true, true, false, true, true} - - b := array.NewBooleanBuilder(pool) - defer b.Release() - - b.AppendValues(values, valids) - - arr := b.NewArray().(*array.Boolean) - defer arr.Release() - - if got, want := arr.Len(), len(valids); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.NullN(), 2; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - vs := make([]bool, arr.Len()) - - for i := range vs { - vs[i] = arr.Value(i) - } - - if got, want := vs, values; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - tests := []struct { - interval [2]int64 - nulls int - want []bool - }{ - { - interval: [2]int64{2, 9}, - nulls: 1, - want: []bool{true, false, false, false, true, false, true}, - }, - { - interval: [2]int64{0, 7}, - nulls: 1, - want: []bool{true, false, true, false, false, false, true}, - }, - { - interval: [2]int64{1, 8}, - nulls: 2, - want: []bool{false, true, false, false, false, true, false}, - }, - { - interval: [2]int64{2, 7}, - nulls: 0, - want: []bool{true, false, false, false, true}, - }, - } - - for _, tc := range tests { - t.Run("", func(t *testing.T) { - - slice := array.NewSlice(arr, tc.interval[0], tc.interval[1]).(*array.Boolean) - defer slice.Release() - - if got, want := slice.NullN(), tc.nulls; got != want { - t.Errorf("got=%d, want=%d", got, want) - } - - if got, want := slice.Len(), len(tc.want); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - vs := make([]bool, slice.Len()) - - for i := range vs { - vs[i] = slice.Value(i) - } - - if got, want := vs, tc.want; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - }) - } -} - -func TestBooleanSliceOutOfBounds(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - values := []bool{true, false, true, false, true, false, true, false, true, false} - - b := array.NewBooleanBuilder(pool) - defer b.Release() - - for _, v := range values { - b.Append(v) - } - - arr := b.NewArray().(*array.Boolean) - defer arr.Release() - - slice := array.NewSlice(arr, 3, 8).(*array.Boolean) - defer slice.Release() - - tests := []struct { - index int - panic bool - }{ - { - index: -1, - panic: true, - }, - { - index: 5, - panic: true, - }, - { - index: 0, - panic: false, - }, - { - index: 4, - panic: false, - }, - } - - for _, tc := range tests { - t.Run("", func(t *testing.T) { - - var val bool - - if tc.panic { - defer func() { - e := recover() - if e == nil { - t.Fatalf("this should have panicked, but did not; slice value %v", val) - } - if got, want := e.(string), "arrow/array: index out of range"; got != want { - t.Fatalf("invalid error. got=%q, want=%q", got, want) - } - }() - } else { - defer func() { - if e := recover(); e != nil { - t.Fatalf("unexpected panic: %v", e) - } - }() - } - - val = slice.Value(tc.index) - }) - } -} - -func TestBooleanStringer(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - values = []bool{true, false, true, false, true, false, true, false, true, false} - valids = []bool{true, true, false, true, true, true, false, true, true, true} - ) - - b := array.NewBooleanBuilder(pool) - defer b.Release() - - b.AppendValues(values, valids) - - arr := b.NewArray().(*array.Boolean) - defer arr.Release() - - out := new(strings.Builder) - fmt.Fprintf(out, "%v", arr) - - const want = "[true false (null) false true false (null) false true false]" - if got := out.String(); got != want { - t.Fatalf("invalid stringer:\ngot= %q\nwant=%q", got, want) - } - assert.Equal(t, "true", arr.ValueStr(0)) - assert.Equal(t, "false", arr.ValueStr(1)) - assert.Equal(t, array.NullValueStr, arr.ValueStr(2)) -} - -func TestBooleanStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []bool{true, false, true, true, true, true, true, false, true, false} - valid := []bool{true, false, false, true, false, true, true, false, true, false} - - b := array.NewBooleanBuilder(mem) - defer b.Release() - - b.AppendValues(values, valid) - - arr := b.NewArray().(*array.Boolean) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewBooleanBuilder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Boolean) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} diff --git a/go/arrow/array/booleanbuilder.go b/go/arrow/array/booleanbuilder.go deleted file mode 100644 index 44d33018f94ea..0000000000000 --- a/go/arrow/array/booleanbuilder.go +++ /dev/null @@ -1,263 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "fmt" - "reflect" - "strconv" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -type BooleanBuilder struct { - builder - - data *memory.Buffer - rawData []byte -} - -func NewBooleanBuilder(mem memory.Allocator) *BooleanBuilder { - return &BooleanBuilder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *BooleanBuilder) Type() arrow.DataType { return arrow.FixedWidthTypes.Boolean } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -// Release may be called simultaneously from multiple goroutines. -func (b *BooleanBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *BooleanBuilder) Append(v bool) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *BooleanBuilder) AppendByte(v byte) { - b.Reserve(1) - b.UnsafeAppend(v != 0) -} - -func (b *BooleanBuilder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *BooleanBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *BooleanBuilder) AppendEmptyValue() { - b.Reserve(1) - b.UnsafeAppend(false) -} - -func (b *BooleanBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *BooleanBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - val, err := strconv.ParseBool(s) - if err != nil { - return err - } - b.Append(val) - return nil -} - -func (b *BooleanBuilder) UnsafeAppend(v bool) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - if v { - bitutil.SetBit(b.rawData, b.length) - } else { - bitutil.ClearBit(b.rawData, b.length) - } - b.length++ -} - -func (b *BooleanBuilder) AppendValues(v []bool, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - for i, vv := range v { - bitutil.SetBitTo(b.rawData, b.length+i, vv) - } - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *BooleanBuilder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.BooleanTraits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = b.data.Bytes() -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *BooleanBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *BooleanBuilder) Resize(n int) { - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(n, b.init) - b.data.Resize(arrow.BooleanTraits.BytesRequired(n)) - b.rawData = b.data.Bytes() - } -} - -// NewArray creates a Boolean array from the memory buffers used by the builder and resets the BooleanBuilder -// so it can be used to build a new array. -func (b *BooleanBuilder) NewArray() arrow.Array { - return b.NewBooleanArray() -} - -// NewBooleanArray creates a Boolean array from the memory buffers used by the builder and resets the BooleanBuilder -// so it can be used to build a new array. -func (b *BooleanBuilder) NewBooleanArray() (a *Boolean) { - data := b.newData() - a = NewBooleanData(data) - data.Release() - return -} - -func (b *BooleanBuilder) newData() *Data { - bytesRequired := arrow.BooleanTraits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - res := NewData(arrow.FixedWidthTypes.Boolean, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return res -} - -func (b *BooleanBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case bool: - b.Append(v) - case string: - val, err := strconv.ParseBool(v) - if err != nil { - return err - } - b.Append(val) - case json.Number: - val, err := strconv.ParseBool(v.String()) - if err != nil { - return err - } - b.Append(val) - case nil: - b.AppendNull() - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(true), - Offset: dec.InputOffset(), - } - } - return nil -} - -func (b *BooleanBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *BooleanBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - dec.UseNumber() - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("boolean builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -func (b *BooleanBuilder) Value(i int) bool { - return bitutil.BitIsSet(b.rawData, i) -} - -var ( - _ Builder = (*BooleanBuilder)(nil) -) diff --git a/go/arrow/array/booleanbuilder_test.go b/go/arrow/array/booleanbuilder_test.go deleted file mode 100644 index 42e49f95a2f3e..0000000000000 --- a/go/arrow/array/booleanbuilder_test.go +++ /dev/null @@ -1,103 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/internal/testing/tools" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestBooleanBuilder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewBooleanBuilder(mem) - - exp := tools.Bools(1, 1, 0, 1, 1, 0) - - b.AppendValues(exp, nil) - assert.NoError(t, b.AppendValueFromString("true")) - assert.NoError(t, b.AppendValueFromString("false")) - exp = tools.Bools(1, 1, 0, 1, 1, 0, 1, 0) - - got := make([]bool, len(exp)) - // make sure we can read the values directly from the builder. - for i := 0; i < b.Len(); i++ { - got[i] = b.Value(i) - } - assert.Equal(t, exp, got) - - got = make([]bool, len(exp)) // reset - - a := b.NewBooleanArray() - b.Release() - for i := 0; i < a.Len(); i++ { - got[i] = a.Value(i) - } - assert.Equal(t, exp, got) - - a.Release() -} - -func TestBooleanBuilder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewBooleanBuilder(mem) - defer ab.Release() - - want := tools.Bools(1, 1, 0, 1, 1, 0, 1, 0) - - boolValues := func(a *array.Boolean) []bool { - vs := make([]bool, a.Len()) - for i := range vs { - vs[i] = a.Value(i) - } - return vs - } - - ab.AppendValues([]bool{}, nil) - a := ab.NewBooleanArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewBooleanArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(want, nil) - a = ab.NewBooleanArray() - assert.Equal(t, want, boolValues(a)) - a.Release() - - ab.AppendValues([]bool{}, nil) - ab.AppendValues(want, nil) - a = ab.NewBooleanArray() - assert.Equal(t, want, boolValues(a)) - a.Release() - - ab.AppendValues(want, nil) - ab.AppendValues([]bool{}, nil) - a = ab.NewBooleanArray() - assert.Equal(t, want, boolValues(a)) - a.Release() -} diff --git a/go/arrow/array/bufferbuilder.go b/go/arrow/array/bufferbuilder.go deleted file mode 100644 index 037d220f0b141..0000000000000 --- a/go/arrow/array/bufferbuilder.go +++ /dev/null @@ -1,261 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "sync/atomic" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" -) - -type bufBuilder interface { - Retain() - Release() - Len() int - Cap() int - Bytes() []byte - resize(int) - Advance(int) - SetLength(int) - Append([]byte) - Reset() - Finish() *memory.Buffer -} - -// A bufferBuilder provides common functionality for populating memory with a sequence of type-specific values. -// Specialized implementations provide type-safe APIs for appending and accessing the memory. -type bufferBuilder struct { - refCount int64 - mem memory.Allocator - buffer *memory.Buffer - length int - capacity int - - bytes []byte -} - -// Retain increases the reference count by 1. -// Retain may be called simultaneously from multiple goroutines. -func (b *bufferBuilder) Retain() { - atomic.AddInt64(&b.refCount, 1) -} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -// Release may be called simultaneously from multiple goroutines. -func (b *bufferBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.buffer != nil { - b.buffer.Release() - b.buffer, b.bytes = nil, nil - } - } -} - -// Len returns the length of the memory buffer in bytes. -func (b *bufferBuilder) Len() int { return b.length } - -// Cap returns the total number of bytes that can be stored without allocating additional memory. -func (b *bufferBuilder) Cap() int { return b.capacity } - -// Bytes returns a slice of length b.Len(). -// The slice is only valid for use until the next buffer modification. That is, until the next call -// to Advance, Reset, Finish or any Append function. The slice aliases the buffer content at least until the next -// buffer modification. -func (b *bufferBuilder) Bytes() []byte { return b.bytes[:b.length] } - -func (b *bufferBuilder) resize(elements int) { - if b.buffer == nil { - b.buffer = memory.NewResizableBuffer(b.mem) - } - - b.buffer.ResizeNoShrink(elements) - oldCapacity := b.capacity - b.capacity = b.buffer.Cap() - b.bytes = b.buffer.Buf() - - if b.capacity > oldCapacity { - memory.Set(b.bytes[oldCapacity:], 0) - } -} - -func (b *bufferBuilder) SetLength(length int) { - if length > b.length { - b.Advance(length) - return - } - - b.length = length -} - -// Advance increases the buffer by length and initializes the skipped bytes to zero. -func (b *bufferBuilder) Advance(length int) { - if b.capacity < b.length+length { - newCapacity := bitutil.NextPowerOf2(b.length + length) - b.resize(newCapacity) - } - b.length += length -} - -// Append appends the contents of v to the buffer, resizing it if necessary. -func (b *bufferBuilder) Append(v []byte) { - if b.capacity < b.length+len(v) { - newCapacity := bitutil.NextPowerOf2(b.length + len(v)) - b.resize(newCapacity) - } - b.unsafeAppend(v) -} - -// Reset returns the buffer to an empty state. Reset releases the memory and sets the length and capacity to zero. -func (b *bufferBuilder) Reset() { - if b.buffer != nil { - b.buffer.Release() - } - b.buffer, b.bytes = nil, nil - b.capacity, b.length = 0, 0 -} - -// Finish TODO(sgc) -func (b *bufferBuilder) Finish() (buffer *memory.Buffer) { - if b.length > 0 { - b.buffer.ResizeNoShrink(b.length) - } - buffer = b.buffer - b.buffer = nil - b.Reset() - if buffer == nil { - buffer = memory.NewBufferBytes(nil) - } - return -} - -func (b *bufferBuilder) unsafeAppend(data []byte) { - copy(b.bytes[b.length:], data) - b.length += len(data) -} - -type multiBufferBuilder struct { - refCount int64 - blockSize int - - mem memory.Allocator - blocks []*memory.Buffer - currentOutBuffer int -} - -// Retain increases the reference count by 1. -// Retain may be called simultaneously from multiple goroutines. -func (b *multiBufferBuilder) Retain() { - atomic.AddInt64(&b.refCount, 1) -} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -// Release may be called simultaneously from multiple goroutines. -func (b *multiBufferBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - b.Reset() - } -} - -func (b *multiBufferBuilder) Reserve(nbytes int) { - if len(b.blocks) == 0 { - out := memory.NewResizableBuffer(b.mem) - if nbytes < b.blockSize { - nbytes = b.blockSize - } - out.Reserve(nbytes) - b.currentOutBuffer = 0 - b.blocks = []*memory.Buffer{out} - return - } - - curBuf := b.blocks[b.currentOutBuffer] - remain := curBuf.Cap() - curBuf.Len() - if nbytes <= remain { - return - } - - // search for underfull block that has enough bytes - for i, block := range b.blocks { - remaining := block.Cap() - block.Len() - if nbytes <= remaining { - b.currentOutBuffer = i - return - } - } - - // current buffer doesn't have enough space, no underfull buffers - // make new buffer and set that as our current. - newBuf := memory.NewResizableBuffer(b.mem) - if nbytes < b.blockSize { - nbytes = b.blockSize - } - - newBuf.Reserve(nbytes) - b.currentOutBuffer = len(b.blocks) - b.blocks = append(b.blocks, newBuf) -} - -func (b *multiBufferBuilder) RemainingBytes() int { - if len(b.blocks) == 0 { - return 0 - } - - buf := b.blocks[b.currentOutBuffer] - return buf.Cap() - buf.Len() -} - -func (b *multiBufferBuilder) Reset() { - b.currentOutBuffer = 0 - for _, block := range b.Finish() { - block.Release() - } -} - -func (b *multiBufferBuilder) UnsafeAppend(hdr *arrow.ViewHeader, val []byte) { - buf := b.blocks[b.currentOutBuffer] - idx, offset := b.currentOutBuffer, buf.Len() - hdr.SetIndexOffset(int32(idx), int32(offset)) - - n := copy(buf.Buf()[offset:], val) - buf.ResizeNoShrink(offset + n) -} - -func (b *multiBufferBuilder) UnsafeAppendString(hdr *arrow.ViewHeader, val string) { - // create a byte slice with zero-copies - // in go1.20 this would be equivalent to unsafe.StringData - v := *(*[]byte)(unsafe.Pointer(&struct { - string - int - }{val, len(val)})) - b.UnsafeAppend(hdr, v) -} - -func (b *multiBufferBuilder) Finish() (out []*memory.Buffer) { - b.currentOutBuffer = 0 - out, b.blocks = b.blocks, nil - return -} diff --git a/go/arrow/array/bufferbuilder_byte.go b/go/arrow/array/bufferbuilder_byte.go deleted file mode 100644 index 2ac7ec703b579..0000000000000 --- a/go/arrow/array/bufferbuilder_byte.go +++ /dev/null @@ -1,30 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import "github.com/apache/arrow/go/v18/arrow/memory" - -type byteBufferBuilder struct { - bufferBuilder -} - -func newByteBufferBuilder(mem memory.Allocator) *byteBufferBuilder { - return &byteBufferBuilder{bufferBuilder: bufferBuilder{refCount: 1, mem: mem}} -} - -func (b *byteBufferBuilder) Values() []byte { return b.Bytes() } -func (b *byteBufferBuilder) Value(i int) byte { return b.bytes[i] } diff --git a/go/arrow/array/bufferbuilder_numeric.gen.go b/go/arrow/array/bufferbuilder_numeric.gen.go deleted file mode 100644 index 5215ecf65a312..0000000000000 --- a/go/arrow/array/bufferbuilder_numeric.gen.go +++ /dev/null @@ -1,124 +0,0 @@ -// Code generated by array/bufferbuilder_numeric.gen.go.tmpl. DO NOT EDIT. - -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/memory" -) - -type int64BufferBuilder struct { - bufferBuilder -} - -func newInt64BufferBuilder(mem memory.Allocator) *int64BufferBuilder { - return &int64BufferBuilder{bufferBuilder: bufferBuilder{refCount: 1, mem: mem}} -} - -// AppendValues appends the contents of v to the buffer, growing the buffer as needed. -func (b *int64BufferBuilder) AppendValues(v []int64) { b.Append(arrow.Int64Traits.CastToBytes(v)) } - -// Values returns a slice of length b.Len(). -// The slice is only valid for use until the next buffer modification. That is, until the next call -// to Advance, Reset, Finish or any Append function. The slice aliases the buffer content at least until the next -// buffer modification. -func (b *int64BufferBuilder) Values() []int64 { return arrow.Int64Traits.CastFromBytes(b.Bytes()) } - -// Value returns the int64 element at the index i. Value will panic if i is negative or ≥ Len. -func (b *int64BufferBuilder) Value(i int) int64 { return b.Values()[i] } - -// Len returns the number of int64 elements in the buffer. -func (b *int64BufferBuilder) Len() int { return b.length / arrow.Int64SizeBytes } - -// AppendValue appends v to the buffer, growing the buffer as needed. -func (b *int64BufferBuilder) AppendValue(v int64) { - if b.capacity < b.length+arrow.Int64SizeBytes { - newCapacity := bitutil.NextPowerOf2(b.length + arrow.Int64SizeBytes) - b.resize(newCapacity) - } - arrow.Int64Traits.PutValue(b.bytes[b.length:], v) - b.length += arrow.Int64SizeBytes -} - -type int32BufferBuilder struct { - bufferBuilder -} - -func newInt32BufferBuilder(mem memory.Allocator) *int32BufferBuilder { - return &int32BufferBuilder{bufferBuilder: bufferBuilder{refCount: 1, mem: mem}} -} - -// AppendValues appends the contents of v to the buffer, growing the buffer as needed. -func (b *int32BufferBuilder) AppendValues(v []int32) { b.Append(arrow.Int32Traits.CastToBytes(v)) } - -// Values returns a slice of length b.Len(). -// The slice is only valid for use until the next buffer modification. That is, until the next call -// to Advance, Reset, Finish or any Append function. The slice aliases the buffer content at least until the next -// buffer modification. -func (b *int32BufferBuilder) Values() []int32 { return arrow.Int32Traits.CastFromBytes(b.Bytes()) } - -// Value returns the int32 element at the index i. Value will panic if i is negative or ≥ Len. -func (b *int32BufferBuilder) Value(i int) int32 { return b.Values()[i] } - -// Len returns the number of int32 elements in the buffer. -func (b *int32BufferBuilder) Len() int { return b.length / arrow.Int32SizeBytes } - -// AppendValue appends v to the buffer, growing the buffer as needed. -func (b *int32BufferBuilder) AppendValue(v int32) { - if b.capacity < b.length+arrow.Int32SizeBytes { - newCapacity := bitutil.NextPowerOf2(b.length + arrow.Int32SizeBytes) - b.resize(newCapacity) - } - arrow.Int32Traits.PutValue(b.bytes[b.length:], v) - b.length += arrow.Int32SizeBytes -} - -type int8BufferBuilder struct { - bufferBuilder -} - -func newInt8BufferBuilder(mem memory.Allocator) *int8BufferBuilder { - return &int8BufferBuilder{bufferBuilder: bufferBuilder{refCount: 1, mem: mem}} -} - -// AppendValues appends the contents of v to the buffer, growing the buffer as needed. -func (b *int8BufferBuilder) AppendValues(v []int8) { b.Append(arrow.Int8Traits.CastToBytes(v)) } - -// Values returns a slice of length b.Len(). -// The slice is only valid for use until the next buffer modification. That is, until the next call -// to Advance, Reset, Finish or any Append function. The slice aliases the buffer content at least until the next -// buffer modification. -func (b *int8BufferBuilder) Values() []int8 { return arrow.Int8Traits.CastFromBytes(b.Bytes()) } - -// Value returns the int8 element at the index i. Value will panic if i is negative or ≥ Len. -func (b *int8BufferBuilder) Value(i int) int8 { return b.Values()[i] } - -// Len returns the number of int8 elements in the buffer. -func (b *int8BufferBuilder) Len() int { return b.length / arrow.Int8SizeBytes } - -// AppendValue appends v to the buffer, growing the buffer as needed. -func (b *int8BufferBuilder) AppendValue(v int8) { - if b.capacity < b.length+arrow.Int8SizeBytes { - newCapacity := bitutil.NextPowerOf2(b.length + arrow.Int8SizeBytes) - b.resize(newCapacity) - } - arrow.Int8Traits.PutValue(b.bytes[b.length:], v) - b.length += arrow.Int8SizeBytes -} diff --git a/go/arrow/array/bufferbuilder_numeric.gen.go.tmpl b/go/arrow/array/bufferbuilder_numeric.gen.go.tmpl deleted file mode 100644 index 2b7fcaefcdeb2..0000000000000 --- a/go/arrow/array/bufferbuilder_numeric.gen.go.tmpl +++ /dev/null @@ -1,61 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/memory" -) - -{{range .In}} -{{$TypeNamePrefix := .name}} -{{if .Opt.BufferBuilder}} -type {{$TypeNamePrefix}}BufferBuilder struct { - bufferBuilder -} - -func new{{.Name}}BufferBuilder(mem memory.Allocator) *{{$TypeNamePrefix}}BufferBuilder { - return &{{$TypeNamePrefix}}BufferBuilder{bufferBuilder:bufferBuilder{refCount: 1, mem:mem}} -} - -// AppendValues appends the contents of v to the buffer, growing the buffer as needed. -func (b *{{$TypeNamePrefix}}BufferBuilder) AppendValues(v []{{.Type}}) { b.Append(arrow.{{.Name}}Traits.CastToBytes(v)) } - -// Values returns a slice of length b.Len(). -// The slice is only valid for use until the next buffer modification. That is, until the next call -// to Advance, Reset, Finish or any Append function. The slice aliases the buffer content at least until the next -// buffer modification. -func (b *{{$TypeNamePrefix}}BufferBuilder) Values() []{{.Type}} { return arrow.{{.Name}}Traits.CastFromBytes(b.Bytes()) } - -// Value returns the {{.Type}} element at the index i. Value will panic if i is negative or ≥ Len. -func (b *{{$TypeNamePrefix}}BufferBuilder) Value(i int) {{.Type}} { return b.Values()[i] } - -// Len returns the number of {{.Type}} elements in the buffer. -func (b *{{$TypeNamePrefix}}BufferBuilder) Len() int { return b.length/arrow.{{.Name}}SizeBytes } - -// AppendValue appends v to the buffer, growing the buffer as needed. -func (b *{{$TypeNamePrefix}}BufferBuilder) AppendValue(v {{.Type}}) { - if b.capacity < b.length+arrow.{{.Name}}SizeBytes { - newCapacity := bitutil.NextPowerOf2(b.length + arrow.{{.Name}}SizeBytes) - b.resize(newCapacity) - } - arrow.{{.Name}}Traits.PutValue(b.bytes[b.length:], v) - b.length+=arrow.{{.Name}}SizeBytes -} -{{end}} -{{end}} diff --git a/go/arrow/array/bufferbuilder_numeric_test.go b/go/arrow/array/bufferbuilder_numeric_test.go deleted file mode 100644 index 3c947c87eeaac..0000000000000 --- a/go/arrow/array/bufferbuilder_numeric_test.go +++ /dev/null @@ -1,106 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "testing" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow/endian" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestInt32BufferBuilder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - bb := newInt32BufferBuilder(mem) - exp := []int32{0x01020304, 0x05060708, 0x090a0b0c, 0x0d0e0f01, 0x02030405, 0x06070809} - bb.AppendValues(exp[:3]) - bb.AppendValues(exp[3:]) - - var expBuf []byte - if endian.IsBigEndian { - expBuf = []byte{ - 0x01, 0x02, 0x03, 0x04, - 0x05, 0x06, 0x07, 0x08, - 0x09, 0x0a, 0x0b, 0x0c, - 0x0d, 0x0e, 0x0f, 0x01, - 0x02, 0x03, 0x04, 0x05, - 0x06, 0x07, 0x08, 0x09, - } - } else { - expBuf = []byte{ - 0x04, 0x03, 0x02, 0x01, - 0x08, 0x07, 0x06, 0x05, - 0x0c, 0x0b, 0x0a, 0x09, - 0x01, 0x0f, 0x0e, 0x0d, - 0x05, 0x04, 0x03, 0x02, - 0x09, 0x08, 0x07, 0x06, - } - } - assert.Equal(t, expBuf, bb.Bytes(), "unexpected byte values") - assert.Equal(t, exp, bb.Values(), "unexpected int32 values") - assert.Equal(t, len(exp), bb.Len(), "unexpected Len()") - - buflen := bb.Len() - bfr := bb.Finish() - assert.Equal(t, buflen*int(unsafe.Sizeof(int32(0))), bfr.Len(), "Buffer was not resized") - assert.Len(t, bfr.Bytes(), bfr.Len(), "Buffer.Bytes() != Buffer.Len()") - bfr.Release() - - assert.Len(t, bb.Bytes(), 0, "BufferBuilder was not reset after Finish") - assert.Zero(t, bb.Len(), "BufferBuilder was not reset after Finish") - bb.Release() -} - -func TestInt32BufferBuilder_AppendValue(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - bb := newInt32BufferBuilder(mem) - exp := []int32{0x01020304, 0x05060708, 0x090a0b0c, 0x0d0e0f01, 0x02030405, 0x06070809} - for _, v := range exp { - bb.AppendValue(v) - } - - var expBuf []byte - if endian.IsBigEndian { - expBuf = []byte{ - 0x01, 0x02, 0x03, 0x04, - 0x05, 0x06, 0x07, 0x08, - 0x09, 0x0a, 0x0b, 0x0c, - 0x0d, 0x0e, 0x0f, 0x01, - 0x02, 0x03, 0x04, 0x05, - 0x06, 0x07, 0x08, 0x09, - } - } else { - expBuf = []byte{ - 0x04, 0x03, 0x02, 0x01, - 0x08, 0x07, 0x06, 0x05, - 0x0c, 0x0b, 0x0a, 0x09, - 0x01, 0x0f, 0x0e, 0x0d, - 0x05, 0x04, 0x03, 0x02, - 0x09, 0x08, 0x07, 0x06, - } - } - assert.Equal(t, expBuf, bb.Bytes(), "unexpected byte values") - assert.Equal(t, exp, bb.Values(), "unexpected int32 values") - assert.Equal(t, len(exp), bb.Len(), "unexpected Len()") - bb.Release() -} diff --git a/go/arrow/array/builder.go b/go/arrow/array/builder.go deleted file mode 100644 index 1f4d0ea963509..0000000000000 --- a/go/arrow/array/builder.go +++ /dev/null @@ -1,374 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "fmt" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -const ( - minBuilderCapacity = 1 << 5 -) - -// Builder provides an interface to build arrow arrays. -type Builder interface { - // you can unmarshal a json array to add the values to a builder - json.Unmarshaler - - // Type returns the datatype that this is building - Type() arrow.DataType - - // Retain increases the reference count by 1. - // Retain may be called simultaneously from multiple goroutines. - Retain() - - // Release decreases the reference count by 1. - Release() - - // Len returns the number of elements in the array builder. - Len() int - - // Cap returns the total number of elements that can be stored - // without allocating additional memory. - Cap() int - - // NullN returns the number of null values in the array builder. - NullN() int - - // AppendNull adds a new null value to the array being built. - AppendNull() - - // AppendNulls adds new n null values to the array being built. - AppendNulls(n int) - - // AppendEmptyValue adds a new zero value of the appropriate type - AppendEmptyValue() - - // AppendEmptyValues adds new n zero values of the appropriate type - AppendEmptyValues(n int) - - // AppendValueFromString adds a new value from a string. Inverse of array.ValueStr(i int) string - AppendValueFromString(string) error - - // Reserve ensures there is enough space for appending n elements - // by checking the capacity and calling Resize if necessary. - Reserve(n int) - - // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), - // additional memory will be allocated. If n is smaller, the allocated memory may reduced. - Resize(n int) - - // NewArray creates a new array from the memory buffers used - // by the builder and resets the Builder so it can be used to build - // a new array. - NewArray() arrow.Array - - // IsNull returns if a previously appended value at a given index is null or not. - IsNull(i int) bool - - // SetNull sets the value at index i to null. - SetNull(i int) - - UnsafeAppendBoolToBitmap(bool) - - init(capacity int) - resize(newBits int, init func(int)) - - UnmarshalOne(*json.Decoder) error - Unmarshal(*json.Decoder) error - - newData() *Data -} - -// builder provides common functionality for managing the validity bitmap (nulls) when building arrays. -type builder struct { - refCount int64 - mem memory.Allocator - nullBitmap *memory.Buffer - nulls int - length int - capacity int -} - -// Retain increases the reference count by 1. -// Retain may be called simultaneously from multiple goroutines. -func (b *builder) Retain() { - atomic.AddInt64(&b.refCount, 1) -} - -// Len returns the number of elements in the array builder. -func (b *builder) Len() int { return b.length } - -// Cap returns the total number of elements that can be stored without allocating additional memory. -func (b *builder) Cap() int { return b.capacity } - -// NullN returns the number of null values in the array builder. -func (b *builder) NullN() int { return b.nulls } - -func (b *builder) IsNull(i int) bool { - return b.nullBitmap.Len() != 0 && bitutil.BitIsNotSet(b.nullBitmap.Bytes(), i) -} - -func (b *builder) SetNull(i int) { - if i < 0 || i >= b.length { - panic("arrow/array: index out of range") - } - bitutil.ClearBit(b.nullBitmap.Bytes(), i) -} - -func (b *builder) init(capacity int) { - toAlloc := bitutil.CeilByte(capacity) / 8 - b.nullBitmap = memory.NewResizableBuffer(b.mem) - b.nullBitmap.Resize(toAlloc) - b.capacity = capacity - memory.Set(b.nullBitmap.Buf(), 0) -} - -func (b *builder) reset() { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - - b.nulls = 0 - b.length = 0 - b.capacity = 0 -} - -func (b *builder) resize(newBits int, init func(int)) { - if b.nullBitmap == nil { - init(newBits) - return - } - - newBytesN := bitutil.CeilByte(newBits) / 8 - oldBytesN := b.nullBitmap.Len() - b.nullBitmap.Resize(newBytesN) - b.capacity = newBits - if oldBytesN < newBytesN { - // TODO(sgc): necessary? - memory.Set(b.nullBitmap.Buf()[oldBytesN:], 0) - } - if newBits < b.length { - b.length = newBits - b.nulls = newBits - bitutil.CountSetBits(b.nullBitmap.Buf(), 0, newBits) - } -} - -func (b *builder) reserve(elements int, resize func(int)) { - if b.nullBitmap == nil { - b.nullBitmap = memory.NewResizableBuffer(b.mem) - } - if b.length+elements > b.capacity { - newCap := bitutil.NextPowerOf2(b.length + elements) - resize(newCap) - } -} - -// unsafeAppendBoolsToBitmap appends the contents of valid to the validity bitmap. -// As an optimization, if the valid slice is empty, the next length bits will be set to valid (not null). -func (b *builder) unsafeAppendBoolsToBitmap(valid []bool, length int) { - if len(valid) == 0 { - b.unsafeSetValid(length) - return - } - - byteOffset := b.length / 8 - bitOffset := byte(b.length % 8) - nullBitmap := b.nullBitmap.Bytes() - bitSet := nullBitmap[byteOffset] - - for _, v := range valid { - if bitOffset == 8 { - bitOffset = 0 - nullBitmap[byteOffset] = bitSet - byteOffset++ - bitSet = nullBitmap[byteOffset] - } - - if v { - bitSet |= bitutil.BitMask[bitOffset] - } else { - bitSet &= bitutil.FlippedBitMask[bitOffset] - b.nulls++ - } - bitOffset++ - } - - if bitOffset != 0 { - nullBitmap[byteOffset] = bitSet - } - b.length += len(valid) -} - -// unsafeSetValid sets the next length bits to valid in the validity bitmap. -func (b *builder) unsafeSetValid(length int) { - padToByte := min(8-(b.length%8), length) - if padToByte == 8 { - padToByte = 0 - } - bits := b.nullBitmap.Bytes() - for i := b.length; i < b.length+padToByte; i++ { - bitutil.SetBit(bits, i) - } - - start := (b.length + padToByte) / 8 - fastLength := (length - padToByte) / 8 - memory.Set(bits[start:start+fastLength], 0xff) - - newLength := b.length + length - // trailing bytes - for i := b.length + padToByte + (fastLength * 8); i < newLength; i++ { - bitutil.SetBit(bits, i) - } - - b.length = newLength -} - -func (b *builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -func NewBuilder(mem memory.Allocator, dtype arrow.DataType) Builder { - // FIXME(sbinet): use a type switch on dtype instead? - switch dtype.ID() { - case arrow.NULL: - return NewNullBuilder(mem) - case arrow.BOOL: - return NewBooleanBuilder(mem) - case arrow.UINT8: - return NewUint8Builder(mem) - case arrow.INT8: - return NewInt8Builder(mem) - case arrow.UINT16: - return NewUint16Builder(mem) - case arrow.INT16: - return NewInt16Builder(mem) - case arrow.UINT32: - return NewUint32Builder(mem) - case arrow.INT32: - return NewInt32Builder(mem) - case arrow.UINT64: - return NewUint64Builder(mem) - case arrow.INT64: - return NewInt64Builder(mem) - case arrow.FLOAT16: - return NewFloat16Builder(mem) - case arrow.FLOAT32: - return NewFloat32Builder(mem) - case arrow.FLOAT64: - return NewFloat64Builder(mem) - case arrow.STRING: - return NewStringBuilder(mem) - case arrow.LARGE_STRING: - return NewLargeStringBuilder(mem) - case arrow.BINARY: - return NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - case arrow.LARGE_BINARY: - return NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) - case arrow.FIXED_SIZE_BINARY: - typ := dtype.(*arrow.FixedSizeBinaryType) - return NewFixedSizeBinaryBuilder(mem, typ) - case arrow.DATE32: - return NewDate32Builder(mem) - case arrow.DATE64: - return NewDate64Builder(mem) - case arrow.TIMESTAMP: - typ := dtype.(*arrow.TimestampType) - return NewTimestampBuilder(mem, typ) - case arrow.TIME32: - typ := dtype.(*arrow.Time32Type) - return NewTime32Builder(mem, typ) - case arrow.TIME64: - typ := dtype.(*arrow.Time64Type) - return NewTime64Builder(mem, typ) - case arrow.INTERVAL_MONTHS: - return NewMonthIntervalBuilder(mem) - case arrow.INTERVAL_DAY_TIME: - return NewDayTimeIntervalBuilder(mem) - case arrow.INTERVAL_MONTH_DAY_NANO: - return NewMonthDayNanoIntervalBuilder(mem) - case arrow.DECIMAL128: - if typ, ok := dtype.(*arrow.Decimal128Type); ok { - return NewDecimal128Builder(mem, typ) - } - case arrow.DECIMAL256: - if typ, ok := dtype.(*arrow.Decimal256Type); ok { - return NewDecimal256Builder(mem, typ) - } - case arrow.LIST: - typ := dtype.(*arrow.ListType) - return NewListBuilderWithField(mem, typ.ElemField()) - case arrow.STRUCT: - typ := dtype.(*arrow.StructType) - return NewStructBuilder(mem, typ) - case arrow.SPARSE_UNION: - typ := dtype.(*arrow.SparseUnionType) - return NewSparseUnionBuilder(mem, typ) - case arrow.DENSE_UNION: - typ := dtype.(*arrow.DenseUnionType) - return NewDenseUnionBuilder(mem, typ) - case arrow.DICTIONARY: - typ := dtype.(*arrow.DictionaryType) - return NewDictionaryBuilder(mem, typ) - case arrow.LARGE_LIST: - typ := dtype.(*arrow.LargeListType) - return NewLargeListBuilderWithField(mem, typ.ElemField()) - case arrow.MAP: - typ := dtype.(*arrow.MapType) - return NewMapBuilderWithType(mem, typ) - case arrow.LIST_VIEW: - typ := dtype.(*arrow.ListViewType) - return NewListViewBuilderWithField(mem, typ.ElemField()) - case arrow.LARGE_LIST_VIEW: - typ := dtype.(*arrow.LargeListViewType) - return NewLargeListViewBuilderWithField(mem, typ.ElemField()) - case arrow.EXTENSION: - if custom, ok := dtype.(CustomExtensionBuilder); ok { - return custom.NewBuilder(mem) - } - if typ, ok := dtype.(arrow.ExtensionType); ok { - return NewExtensionBuilder(mem, typ) - } - panic(fmt.Errorf("arrow/array: invalid extension type: %T", dtype)) - case arrow.FIXED_SIZE_LIST: - typ := dtype.(*arrow.FixedSizeListType) - return NewFixedSizeListBuilderWithField(mem, typ.Len(), typ.ElemField()) - case arrow.DURATION: - typ := dtype.(*arrow.DurationType) - return NewDurationBuilder(mem, typ) - case arrow.RUN_END_ENCODED: - typ := dtype.(*arrow.RunEndEncodedType) - return NewRunEndEncodedBuilder(mem, typ.RunEnds(), typ.Encoded()) - case arrow.BINARY_VIEW: - return NewBinaryViewBuilder(mem) - case arrow.STRING_VIEW: - return NewStringViewBuilder(mem) - } - panic(fmt.Errorf("arrow/array: unsupported builder for %T", dtype)) -} diff --git a/go/arrow/array/builder_test.go b/go/arrow/array/builder_test.go deleted file mode 100644 index 7eb2b3f7cf9e3..0000000000000 --- a/go/arrow/array/builder_test.go +++ /dev/null @@ -1,123 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow/internal/testing/tools" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestBuilder_Init(t *testing.T) { - type exp struct{ size int } - tests := []struct { - name string - cap int - - exp exp - }{ - {"07 bits", 07, exp{size: 1}}, - {"19 bits", 19, exp{size: 3}}, - } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - ab := &builder{mem: memory.NewGoAllocator()} - ab.init(test.cap) - assert.Equal(t, test.cap, ab.Cap(), "invalid capacity") - assert.Equal(t, test.exp.size, ab.nullBitmap.Len(), "invalid length") - }) - } -} - -func TestBuilder_UnsafeSetValid(t *testing.T) { - ab := &builder{mem: memory.NewGoAllocator()} - ab.init(32) - ab.unsafeAppendBoolsToBitmap(tools.Bools(0, 0, 0, 0, 0), 5) - assert.Equal(t, 5, ab.Len()) - assert.Equal(t, []byte{0, 0, 0, 0}, ab.nullBitmap.Bytes()) - - ab.unsafeSetValid(17) - assert.Equal(t, []byte{0xe0, 0xff, 0x3f, 0}, ab.nullBitmap.Bytes()) -} - -func TestBuilder_resize(t *testing.T) { - b := &builder{mem: memory.NewGoAllocator()} - n := 64 - - b.init(n) - assert.Equal(t, n, b.Cap()) - assert.Equal(t, 0, b.Len()) - - b.UnsafeAppendBoolToBitmap(true) - for i := 1; i < n; i++ { - b.UnsafeAppendBoolToBitmap(false) - } - assert.Equal(t, n, b.Cap()) - assert.Equal(t, n, b.Len()) - assert.Equal(t, n-1, b.NullN()) - - n = 5 - b.resize(n, b.init) - assert.Equal(t, n, b.Len()) - assert.Equal(t, n-1, b.NullN()) - - b.resize(32, b.init) - assert.Equal(t, n, b.Len()) - assert.Equal(t, n-1, b.NullN()) -} - -func TestBuilder_IsNull(t *testing.T) { - b := &builder{mem: memory.NewGoAllocator()} - n := 32 - b.init(n) - - assert.True(t, b.IsNull(0)) - assert.True(t, b.IsNull(1)) - - for i := 0; i < n; i++ { - b.UnsafeAppendBoolToBitmap(i%2 == 0) - } - for i := 0; i < n; i++ { - assert.Equal(t, i%2 != 0, b.IsNull(i)) - } -} - -func TestBuilder_SetNull(t *testing.T) { - b := &builder{mem: memory.NewGoAllocator()} - n := 32 - b.init(n) - - for i := 0; i < n; i++ { - // Set everything to true - b.UnsafeAppendBoolToBitmap(true) - } - for i := 0; i < n; i++ { - if i%2 == 0 { // Set all even numbers to null - b.SetNull(i) - } - } - - for i := 0; i < n; i++ { - if i%2 == 0 { - assert.True(t, b.IsNull(i)) - } else { - assert.False(t, b.IsNull(i)) - } - } -} diff --git a/go/arrow/array/compare.go b/go/arrow/array/compare.go deleted file mode 100644 index a54c1e23c1e1c..0000000000000 --- a/go/arrow/array/compare.go +++ /dev/null @@ -1,854 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "fmt" - "math" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/float16" - "github.com/apache/arrow/go/v18/internal/bitutils" -) - -// RecordEqual reports whether the two provided records are equal. -func RecordEqual(left, right arrow.Record) bool { - switch { - case left.NumCols() != right.NumCols(): - return false - case left.NumRows() != right.NumRows(): - return false - } - - for i := range left.Columns() { - lc := left.Column(i) - rc := right.Column(i) - if !Equal(lc, rc) { - return false - } - } - return true -} - -// RecordApproxEqual reports whether the two provided records are approximately equal. -// For non-floating point columns, it is equivalent to RecordEqual. -func RecordApproxEqual(left, right arrow.Record, opts ...EqualOption) bool { - switch { - case left.NumCols() != right.NumCols(): - return false - case left.NumRows() != right.NumRows(): - return false - } - - opt := newEqualOption(opts...) - - for i := range left.Columns() { - lc := left.Column(i) - rc := right.Column(i) - if !arrayApproxEqual(lc, rc, opt) { - return false - } - } - return true -} - -// helper function to evaluate a function on two chunked object having possibly different -// chunk layouts. the function passed in will be called for each corresponding slice of the -// two chunked arrays and if the function returns false it will end the loop early. -func chunkedBinaryApply(left, right *arrow.Chunked, fn func(left arrow.Array, lbeg, lend int64, right arrow.Array, rbeg, rend int64) bool) { - var ( - pos int64 - length int64 = int64(left.Len()) - leftIdx, rightIdx int - leftPos, rightPos int64 - ) - - for pos < length { - var cleft, cright arrow.Array - for { - cleft, cright = left.Chunk(leftIdx), right.Chunk(rightIdx) - if leftPos == int64(cleft.Len()) { - leftPos = 0 - leftIdx++ - continue - } - if rightPos == int64(cright.Len()) { - rightPos = 0 - rightIdx++ - continue - } - break - } - - sz := int64(min(cleft.Len()-int(leftPos), cright.Len()-int(rightPos))) - pos += sz - if !fn(cleft, leftPos, leftPos+sz, cright, rightPos, rightPos+sz) { - return - } - - leftPos += sz - rightPos += sz - } -} - -// ChunkedEqual reports whether two chunked arrays are equal regardless of their chunkings -func ChunkedEqual(left, right *arrow.Chunked) bool { - switch { - case left == right: - return true - case left.Len() != right.Len(): - return false - case left.NullN() != right.NullN(): - return false - case !arrow.TypeEqual(left.DataType(), right.DataType()): - return false - } - - var isequal bool = true - chunkedBinaryApply(left, right, func(left arrow.Array, lbeg, lend int64, right arrow.Array, rbeg, rend int64) bool { - isequal = SliceEqual(left, lbeg, lend, right, rbeg, rend) - return isequal - }) - - return isequal -} - -// ChunkedApproxEqual reports whether two chunked arrays are approximately equal regardless of their chunkings -// for non-floating point arrays, this is equivalent to ChunkedEqual -func ChunkedApproxEqual(left, right *arrow.Chunked, opts ...EqualOption) bool { - switch { - case left == right: - return true - case left.Len() != right.Len(): - return false - case left.NullN() != right.NullN(): - return false - case !arrow.TypeEqual(left.DataType(), right.DataType()): - return false - } - - var isequal bool - chunkedBinaryApply(left, right, func(left arrow.Array, lbeg, lend int64, right arrow.Array, rbeg, rend int64) bool { - isequal = SliceApproxEqual(left, lbeg, lend, right, rbeg, rend, opts...) - return isequal - }) - - return isequal -} - -// TableEqual returns if the two tables have the same data in the same schema -func TableEqual(left, right arrow.Table) bool { - switch { - case left.NumCols() != right.NumCols(): - return false - case left.NumRows() != right.NumRows(): - return false - } - - for i := 0; int64(i) < left.NumCols(); i++ { - lc := left.Column(i) - rc := right.Column(i) - if !lc.Field().Equal(rc.Field()) { - return false - } - - if !ChunkedEqual(lc.Data(), rc.Data()) { - return false - } - } - return true -} - -// TableEqual returns if the two tables have the approximately equal data in the same schema -func TableApproxEqual(left, right arrow.Table, opts ...EqualOption) bool { - switch { - case left.NumCols() != right.NumCols(): - return false - case left.NumRows() != right.NumRows(): - return false - } - - for i := 0; int64(i) < left.NumCols(); i++ { - lc := left.Column(i) - rc := right.Column(i) - if !lc.Field().Equal(rc.Field()) { - return false - } - - if !ChunkedApproxEqual(lc.Data(), rc.Data(), opts...) { - return false - } - } - return true -} - -// Equal reports whether the two provided arrays are equal. -func Equal(left, right arrow.Array) bool { - switch { - case !baseArrayEqual(left, right): - return false - case left.Len() == 0: - return true - case left.NullN() == left.Len(): - return true - } - - // at this point, we know both arrays have same type, same length, same number of nulls - // and nulls at the same place. - // compare the values. - - switch l := left.(type) { - case *Null: - return true - case *Boolean: - r := right.(*Boolean) - return arrayEqualBoolean(l, r) - case *FixedSizeBinary: - r := right.(*FixedSizeBinary) - return arrayEqualFixedSizeBinary(l, r) - case *Binary: - r := right.(*Binary) - return arrayEqualBinary(l, r) - case *String: - r := right.(*String) - return arrayEqualString(l, r) - case *LargeBinary: - r := right.(*LargeBinary) - return arrayEqualLargeBinary(l, r) - case *LargeString: - r := right.(*LargeString) - return arrayEqualLargeString(l, r) - case *BinaryView: - r := right.(*BinaryView) - return arrayEqualBinaryView(l, r) - case *StringView: - r := right.(*StringView) - return arrayEqualStringView(l, r) - case *Int8: - r := right.(*Int8) - return arrayEqualInt8(l, r) - case *Int16: - r := right.(*Int16) - return arrayEqualInt16(l, r) - case *Int32: - r := right.(*Int32) - return arrayEqualInt32(l, r) - case *Int64: - r := right.(*Int64) - return arrayEqualInt64(l, r) - case *Uint8: - r := right.(*Uint8) - return arrayEqualUint8(l, r) - case *Uint16: - r := right.(*Uint16) - return arrayEqualUint16(l, r) - case *Uint32: - r := right.(*Uint32) - return arrayEqualUint32(l, r) - case *Uint64: - r := right.(*Uint64) - return arrayEqualUint64(l, r) - case *Float16: - r := right.(*Float16) - return arrayEqualFloat16(l, r) - case *Float32: - r := right.(*Float32) - return arrayEqualFloat32(l, r) - case *Float64: - r := right.(*Float64) - return arrayEqualFloat64(l, r) - case *Decimal128: - r := right.(*Decimal128) - return arrayEqualDecimal128(l, r) - case *Decimal256: - r := right.(*Decimal256) - return arrayEqualDecimal256(l, r) - case *Date32: - r := right.(*Date32) - return arrayEqualDate32(l, r) - case *Date64: - r := right.(*Date64) - return arrayEqualDate64(l, r) - case *Time32: - r := right.(*Time32) - return arrayEqualTime32(l, r) - case *Time64: - r := right.(*Time64) - return arrayEqualTime64(l, r) - case *Timestamp: - r := right.(*Timestamp) - return arrayEqualTimestamp(l, r) - case *List: - r := right.(*List) - return arrayEqualList(l, r) - case *LargeList: - r := right.(*LargeList) - return arrayEqualLargeList(l, r) - case *ListView: - r := right.(*ListView) - return arrayEqualListView(l, r) - case *LargeListView: - r := right.(*LargeListView) - return arrayEqualLargeListView(l, r) - case *FixedSizeList: - r := right.(*FixedSizeList) - return arrayEqualFixedSizeList(l, r) - case *Struct: - r := right.(*Struct) - return arrayEqualStruct(l, r) - case *MonthInterval: - r := right.(*MonthInterval) - return arrayEqualMonthInterval(l, r) - case *DayTimeInterval: - r := right.(*DayTimeInterval) - return arrayEqualDayTimeInterval(l, r) - case *MonthDayNanoInterval: - r := right.(*MonthDayNanoInterval) - return arrayEqualMonthDayNanoInterval(l, r) - case *Duration: - r := right.(*Duration) - return arrayEqualDuration(l, r) - case *Map: - r := right.(*Map) - return arrayEqualMap(l, r) - case ExtensionArray: - r := right.(ExtensionArray) - return arrayEqualExtension(l, r) - case *Dictionary: - r := right.(*Dictionary) - return arrayEqualDict(l, r) - case *SparseUnion: - r := right.(*SparseUnion) - return arraySparseUnionEqual(l, r) - case *DenseUnion: - r := right.(*DenseUnion) - return arrayDenseUnionEqual(l, r) - case *RunEndEncoded: - r := right.(*RunEndEncoded) - return arrayRunEndEncodedEqual(l, r) - default: - panic(fmt.Errorf("arrow/array: unknown array type %T", l)) - } -} - -// SliceEqual reports whether slices left[lbeg:lend] and right[rbeg:rend] are equal. -func SliceEqual(left arrow.Array, lbeg, lend int64, right arrow.Array, rbeg, rend int64) bool { - l := NewSlice(left, lbeg, lend) - defer l.Release() - r := NewSlice(right, rbeg, rend) - defer r.Release() - - return Equal(l, r) -} - -// SliceApproxEqual reports whether slices left[lbeg:lend] and right[rbeg:rend] are approximately equal. -func SliceApproxEqual(left arrow.Array, lbeg, lend int64, right arrow.Array, rbeg, rend int64, opts ...EqualOption) bool { - opt := newEqualOption(opts...) - return sliceApproxEqual(left, lbeg, lend, right, rbeg, rend, opt) -} - -func sliceApproxEqual(left arrow.Array, lbeg, lend int64, right arrow.Array, rbeg, rend int64, opt equalOption) bool { - l := NewSlice(left, lbeg, lend) - defer l.Release() - r := NewSlice(right, rbeg, rend) - defer r.Release() - - return arrayApproxEqual(l, r, opt) -} - -const defaultAbsoluteTolerance = 1e-5 - -type equalOption struct { - atol float64 // absolute tolerance - nansEq bool // whether NaNs are considered equal. - unorderedMapKeys bool // whether maps are allowed to have different entries order -} - -func (eq equalOption) f16(f1, f2 float16.Num) bool { - v1 := float64(f1.Float32()) - v2 := float64(f2.Float32()) - switch { - case eq.nansEq: - return math.Abs(v1-v2) <= eq.atol || (math.IsNaN(v1) && math.IsNaN(v2)) - default: - return math.Abs(v1-v2) <= eq.atol - } -} - -func (eq equalOption) f32(f1, f2 float32) bool { - v1 := float64(f1) - v2 := float64(f2) - switch { - case eq.nansEq: - return v1 == v2 || math.Abs(v1-v2) <= eq.atol || (math.IsNaN(v1) && math.IsNaN(v2)) - default: - return v1 == v2 || math.Abs(v1-v2) <= eq.atol - } -} - -func (eq equalOption) f64(v1, v2 float64) bool { - switch { - case eq.nansEq: - return v1 == v2 || math.Abs(v1-v2) <= eq.atol || (math.IsNaN(v1) && math.IsNaN(v2)) - default: - return v1 == v2 || math.Abs(v1-v2) <= eq.atol - } -} - -func newEqualOption(opts ...EqualOption) equalOption { - eq := equalOption{ - atol: defaultAbsoluteTolerance, - nansEq: false, - } - for _, opt := range opts { - opt(&eq) - } - - return eq -} - -// EqualOption is a functional option type used to configure how Records and Arrays are compared. -type EqualOption func(*equalOption) - -// WithNaNsEqual configures the comparison functions so that NaNs are considered equal. -func WithNaNsEqual(v bool) EqualOption { - return func(o *equalOption) { - o.nansEq = v - } -} - -// WithAbsTolerance configures the comparison functions so that 2 floating point values -// v1 and v2 are considered equal if |v1-v2| <= atol. -func WithAbsTolerance(atol float64) EqualOption { - return func(o *equalOption) { - o.atol = atol - } -} - -// WithUnorderedMapKeys configures the comparison functions so that Map with different entries order are considered equal. -func WithUnorderedMapKeys(v bool) EqualOption { - return func(o *equalOption) { - o.unorderedMapKeys = v - } -} - -// ApproxEqual reports whether the two provided arrays are approximately equal. -// For non-floating point arrays, it is equivalent to Equal. -func ApproxEqual(left, right arrow.Array, opts ...EqualOption) bool { - opt := newEqualOption(opts...) - return arrayApproxEqual(left, right, opt) -} - -func arrayApproxEqual(left, right arrow.Array, opt equalOption) bool { - switch { - case !baseArrayEqual(left, right): - return false - case left.Len() == 0: - return true - case left.NullN() == left.Len(): - return true - } - - // at this point, we know both arrays have same type, same length, same number of nulls - // and nulls at the same place. - // compare the values. - - switch l := left.(type) { - case *Null: - return true - case *Boolean: - r := right.(*Boolean) - return arrayEqualBoolean(l, r) - case *FixedSizeBinary: - r := right.(*FixedSizeBinary) - return arrayEqualFixedSizeBinary(l, r) - case *Binary: - r := right.(*Binary) - return arrayEqualBinary(l, r) - case *String: - r := right.(*String) - return arrayEqualString(l, r) - case *LargeBinary: - r := right.(*LargeBinary) - return arrayEqualLargeBinary(l, r) - case *LargeString: - r := right.(*LargeString) - return arrayEqualLargeString(l, r) - case *BinaryView: - r := right.(*BinaryView) - return arrayEqualBinaryView(l, r) - case *StringView: - r := right.(*StringView) - return arrayEqualStringView(l, r) - case *Int8: - r := right.(*Int8) - return arrayEqualInt8(l, r) - case *Int16: - r := right.(*Int16) - return arrayEqualInt16(l, r) - case *Int32: - r := right.(*Int32) - return arrayEqualInt32(l, r) - case *Int64: - r := right.(*Int64) - return arrayEqualInt64(l, r) - case *Uint8: - r := right.(*Uint8) - return arrayEqualUint8(l, r) - case *Uint16: - r := right.(*Uint16) - return arrayEqualUint16(l, r) - case *Uint32: - r := right.(*Uint32) - return arrayEqualUint32(l, r) - case *Uint64: - r := right.(*Uint64) - return arrayEqualUint64(l, r) - case *Float16: - r := right.(*Float16) - return arrayApproxEqualFloat16(l, r, opt) - case *Float32: - r := right.(*Float32) - return arrayApproxEqualFloat32(l, r, opt) - case *Float64: - r := right.(*Float64) - return arrayApproxEqualFloat64(l, r, opt) - case *Decimal128: - r := right.(*Decimal128) - return arrayEqualDecimal128(l, r) - case *Decimal256: - r := right.(*Decimal256) - return arrayEqualDecimal256(l, r) - case *Date32: - r := right.(*Date32) - return arrayEqualDate32(l, r) - case *Date64: - r := right.(*Date64) - return arrayEqualDate64(l, r) - case *Time32: - r := right.(*Time32) - return arrayEqualTime32(l, r) - case *Time64: - r := right.(*Time64) - return arrayEqualTime64(l, r) - case *Timestamp: - r := right.(*Timestamp) - return arrayEqualTimestamp(l, r) - case *List: - r := right.(*List) - return arrayApproxEqualList(l, r, opt) - case *LargeList: - r := right.(*LargeList) - return arrayApproxEqualLargeList(l, r, opt) - case *ListView: - r := right.(*ListView) - return arrayApproxEqualListView(l, r, opt) - case *LargeListView: - r := right.(*LargeListView) - return arrayApproxEqualLargeListView(l, r, opt) - case *FixedSizeList: - r := right.(*FixedSizeList) - return arrayApproxEqualFixedSizeList(l, r, opt) - case *Struct: - r := right.(*Struct) - return arrayApproxEqualStruct(l, r, opt) - case *MonthInterval: - r := right.(*MonthInterval) - return arrayEqualMonthInterval(l, r) - case *DayTimeInterval: - r := right.(*DayTimeInterval) - return arrayEqualDayTimeInterval(l, r) - case *MonthDayNanoInterval: - r := right.(*MonthDayNanoInterval) - return arrayEqualMonthDayNanoInterval(l, r) - case *Duration: - r := right.(*Duration) - return arrayEqualDuration(l, r) - case *Map: - r := right.(*Map) - if opt.unorderedMapKeys { - return arrayApproxEqualMap(l, r, opt) - } - return arrayApproxEqualList(l.List, r.List, opt) - case *Dictionary: - r := right.(*Dictionary) - return arrayApproxEqualDict(l, r, opt) - case ExtensionArray: - r := right.(ExtensionArray) - return arrayApproxEqualExtension(l, r, opt) - case *SparseUnion: - r := right.(*SparseUnion) - return arraySparseUnionApproxEqual(l, r, opt) - case *DenseUnion: - r := right.(*DenseUnion) - return arrayDenseUnionApproxEqual(l, r, opt) - case *RunEndEncoded: - r := right.(*RunEndEncoded) - return arrayRunEndEncodedApproxEqual(l, r, opt) - default: - panic(fmt.Errorf("arrow/array: unknown array type %T", l)) - } -} - -func baseArrayEqual(left, right arrow.Array) bool { - switch { - case left.Len() != right.Len(): - return false - case left.NullN() != right.NullN(): - return false - case !arrow.TypeEqual(left.DataType(), right.DataType()): // We do not check for metadata as in the C++ implementation. - return false - case !validityBitmapEqual(left, right): - return false - } - return true -} - -func validityBitmapEqual(left, right arrow.Array) bool { - // TODO(alexandreyc): make it faster by comparing byte slices of the validity bitmap? - n := left.Len() - if n != right.Len() { - return false - } - for i := 0; i < n; i++ { - if left.IsNull(i) != right.IsNull(i) { - return false - } - } - return true -} - -func arrayApproxEqualFloat16(left, right *Float16, opt equalOption) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if !opt.f16(left.Value(i), right.Value(i)) { - return false - } - } - return true -} - -func arrayApproxEqualFloat32(left, right *Float32, opt equalOption) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if !opt.f32(left.Value(i), right.Value(i)) { - return false - } - } - return true -} - -func arrayApproxEqualFloat64(left, right *Float64, opt equalOption) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if !opt.f64(left.Value(i), right.Value(i)) { - return false - } - } - return true -} - -func arrayApproxEqualList(left, right *List, opt equalOption) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - o := func() bool { - l := left.newListValue(i) - defer l.Release() - r := right.newListValue(i) - defer r.Release() - return arrayApproxEqual(l, r, opt) - }() - if !o { - return false - } - } - return true -} - -func arrayApproxEqualLargeList(left, right *LargeList, opt equalOption) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - o := func() bool { - l := left.newListValue(i) - defer l.Release() - r := right.newListValue(i) - defer r.Release() - return arrayApproxEqual(l, r, opt) - }() - if !o { - return false - } - } - return true -} - -func arrayApproxEqualListView(left, right *ListView, opt equalOption) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - o := func() bool { - l := left.newListValue(i) - defer l.Release() - r := right.newListValue(i) - defer r.Release() - return arrayApproxEqual(l, r, opt) - }() - if !o { - return false - } - } - return true -} - -func arrayApproxEqualLargeListView(left, right *LargeListView, opt equalOption) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - o := func() bool { - l := left.newListValue(i) - defer l.Release() - r := right.newListValue(i) - defer r.Release() - return arrayApproxEqual(l, r, opt) - }() - if !o { - return false - } - } - return true -} - -func arrayApproxEqualFixedSizeList(left, right *FixedSizeList, opt equalOption) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - o := func() bool { - l := left.newListValue(i) - defer l.Release() - r := right.newListValue(i) - defer r.Release() - return arrayApproxEqual(l, r, opt) - }() - if !o { - return false - } - } - return true -} - -func arrayApproxEqualStruct(left, right *Struct, opt equalOption) bool { - return bitutils.VisitSetBitRuns( - left.NullBitmapBytes(), - int64(left.Offset()), int64(left.Len()), - approxEqualStructRun(left, right, opt), - ) == nil -} - -func approxEqualStructRun(left, right *Struct, opt equalOption) bitutils.VisitFn { - return func(pos int64, length int64) error { - for i := range left.fields { - if !sliceApproxEqual(left.fields[i], pos, pos+length, right.fields[i], pos, pos+length, opt) { - return arrow.ErrInvalid - } - } - return nil - } -} - -// arrayApproxEqualMap doesn't care about the order of keys (in Go map traversal order is undefined) -func arrayApproxEqualMap(left, right *Map, opt equalOption) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if !arrayApproxEqualSingleMapEntry(left.newListValue(i).(*Struct), right.newListValue(i).(*Struct), opt) { - return false - } - } - return true -} - -// arrayApproxEqualSingleMapEntry is a helper function that checks if a single entry pair is approx equal. -// Basically, it doesn't care about key order. -// structs passed will be released -func arrayApproxEqualSingleMapEntry(left, right *Struct, opt equalOption) bool { - defer left.Release() - defer right.Release() - - // we don't compare the validity bitmap, but we want other checks from baseArrayEqual - switch { - case left.Len() != right.Len(): - return false - case left.NullN() != right.NullN(): - return false - case !arrow.TypeEqual(left.DataType(), right.DataType()): // We do not check for metadata as in the C++ implementation. - return false - case left.NullN() == left.Len(): - return true - } - - used := make(map[int]bool, right.Len()) - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - - found := false - lBeg, lEnd := int64(i), int64(i+1) - for j := 0; j < right.Len(); j++ { - if used[j] { - continue - } - if right.IsNull(j) { - used[j] = true - continue - } - - rBeg, rEnd := int64(j), int64(j+1) - - // check keys (field 0) - if !sliceApproxEqual(left.Field(0), lBeg, lEnd, right.Field(0), rBeg, rEnd, opt) { - continue - } - - // only now check the values - if sliceApproxEqual(left.Field(1), lBeg, lEnd, right.Field(1), rBeg, rEnd, opt) { - found = true - used[j] = true - break - } - } - if !found { - return false - } - } - - return len(used) == right.Len() -} diff --git a/go/arrow/array/compare_test.go b/go/arrow/array/compare_test.go deleted file mode 100644 index f757ab9f25f07..0000000000000 --- a/go/arrow/array/compare_test.go +++ /dev/null @@ -1,728 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "fmt" - "math" - "sort" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/float16" - "github.com/apache/arrow/go/v18/arrow/internal/arrdata" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestArrayEqual(t *testing.T) { - for name, recs := range arrdata.Records { - t.Run(name, func(t *testing.T) { - rec := recs[0] - schema := rec.Schema() - for i, col := range rec.Columns() { - t.Run(schema.Field(i).Name, func(t *testing.T) { - arr := col - if !array.Equal(arr, arr) { - t.Fatalf("identical arrays should compare equal:\narray=%v", arr) - } - sub1 := array.NewSlice(arr, 1, int64(arr.Len())) - defer sub1.Release() - - sub2 := array.NewSlice(arr, 0, int64(arr.Len()-1)) - defer sub2.Release() - - if array.Equal(sub1, sub2) && name != "nulls" { - t.Fatalf("non-identical arrays should not compare equal:\nsub1=%v\nsub2=%v\narrf=%v\n", sub1, sub2, arr) - } - }) - } - }) - } -} - -func TestArraySliceEqual(t *testing.T) { - for name, recs := range arrdata.Records { - t.Run(name, func(t *testing.T) { - rec := recs[0] - schema := rec.Schema() - for i, col := range rec.Columns() { - t.Run(schema.Field(i).Name, func(t *testing.T) { - arr := col - if !array.SliceEqual( - arr, 0, int64(arr.Len()), - arr, 0, int64(arr.Len()), - ) { - t.Fatalf("identical slices should compare equal:\narray=%v", arr) - } - sub1 := array.NewSlice(arr, 1, int64(arr.Len())) - defer sub1.Release() - - sub2 := array.NewSlice(arr, 0, int64(arr.Len()-1)) - defer sub2.Release() - - if array.SliceEqual(sub1, 0, int64(sub1.Len()), sub2, 0, int64(sub2.Len())) && name != "nulls" { - t.Fatalf("non-identical slices should not compare equal:\nsub1=%v\nsub2=%v\narrf=%v\n", sub1, sub2, arr) - } - }) - } - }) - } -} - -func TestArrayApproxEqual(t *testing.T) { - for name, recs := range arrdata.Records { - t.Run(name, func(t *testing.T) { - rec := recs[0] - schema := rec.Schema() - for i, col := range rec.Columns() { - t.Run(schema.Field(i).Name, func(t *testing.T) { - arr := col - if !array.ApproxEqual(arr, arr) { - t.Fatalf("identical arrays should compare equal:\narray=%v", arr) - } - sub1 := array.NewSlice(arr, 1, int64(arr.Len())) - defer sub1.Release() - - sub2 := array.NewSlice(arr, 0, int64(arr.Len()-1)) - defer sub2.Release() - - if array.ApproxEqual(sub1, sub2) && name != "nulls" { - t.Fatalf("non-identical arrays should not compare equal:\nsub1=%v\nsub2=%v\narrf=%v\n", sub1, sub2, arr) - } - }) - } - }) - } -} - -func TestArrayApproxEqualFloats(t *testing.T) { - f16sFrom := func(vs []float64) []float16.Num { - o := make([]float16.Num, len(vs)) - for i, v := range vs { - o[i] = float16.New(float32(v)) - } - return o - } - - for _, tc := range []struct { - name string - a1 interface{} - a2 interface{} - opts []array.EqualOption - want bool - }{ - { - name: "f16", - a1: f16sFrom([]float64{1, 2, 3, 4, 5, 6}), - a2: f16sFrom([]float64{1, 2, 3, 4, 5, 6}), - want: true, - }, - { - name: "f16-no-tol", - a1: f16sFrom([]float64{1, 2, 3, 4, 5, 6}), - a2: f16sFrom([]float64{1, 2, 3, 4, 5, 7}), - want: false, - }, - { - name: "f16-tol-ok", - a1: f16sFrom([]float64{1, 2, 3, 4, 5, 6}), - a2: f16sFrom([]float64{1, 2, 3, 4, 5, 7}), - opts: []array.EqualOption{array.WithAbsTolerance(1)}, - want: true, - }, - { - name: "f16-nan", - a1: f16sFrom([]float64{1, 2, 3, 4, 5, 6}), - a2: f16sFrom([]float64{1, 2, 3, 4, 5, math.NaN()}), - want: false, - }, - { - name: "f16-nan-not", - a1: f16sFrom([]float64{1, 2, 3, 4, 5, 6}), - a2: f16sFrom([]float64{1, 2, 3, 4, 5, math.NaN()}), - opts: []array.EqualOption{array.WithNaNsEqual(true)}, - want: false, - }, - { - name: "f16-nan-ok", - a1: f16sFrom([]float64{1, 2, 3, 4, 5, math.NaN()}), - a2: f16sFrom([]float64{1, 2, 3, 4, 5, math.NaN()}), - opts: []array.EqualOption{array.WithNaNsEqual(true)}, - want: true, - }, - { - name: "f16-nan-no-tol", - a1: f16sFrom([]float64{1, 2, 3, 4, 5, math.NaN()}), - a2: f16sFrom([]float64{1, 2, 3, 4, 6, math.NaN()}), - opts: []array.EqualOption{array.WithNaNsEqual(true)}, - want: false, - }, - { - name: "f16-nan-tol", - a1: f16sFrom([]float64{1, 2, 3, 4, 5, math.NaN()}), - a2: f16sFrom([]float64{1, 2, 3, 4, 6, math.NaN()}), - opts: []array.EqualOption{array.WithNaNsEqual(true), array.WithAbsTolerance(1)}, - want: true, - }, - { - name: "f32", - a1: []float32{1, 2, 3, 4, 5, 6}, - a2: []float32{1, 2, 3, 4, 5, 6}, - want: true, - }, - { - name: "f32-no-tol", - a1: []float32{1, 2, 3, 4, 5, 6}, - a2: []float32{1, 2, 3, 4, 5, 7}, - want: false, - }, - { - name: "f32-tol-ok", - a1: []float32{1, 2, 3, 4, 5, 6}, - a2: []float32{1, 2, 3, 4, 5, 7}, - opts: []array.EqualOption{array.WithAbsTolerance(1)}, - want: true, - }, - { - name: "f32-nan", - a1: []float32{1, 2, 3, 4, 5, 6}, - a2: []float32{1, 2, 3, 4, 5, float32(math.NaN())}, - want: false, - }, - { - name: "f32-nan-not", - a1: []float32{1, 2, 3, 4, 5, 6}, - a2: []float32{1, 2, 3, 4, 5, float32(math.NaN())}, - opts: []array.EqualOption{array.WithNaNsEqual(true)}, - want: false, - }, - { - name: "f32-nan-ok", - a1: []float32{1, 2, 3, 4, 5, float32(math.NaN())}, - a2: []float32{1, 2, 3, 4, 5, float32(math.NaN())}, - opts: []array.EqualOption{array.WithNaNsEqual(true)}, - want: true, - }, - { - name: "f32-nan-no-tol", - a1: []float32{1, 2, 3, 4, 5, float32(math.NaN())}, - a2: []float32{1, 2, 3, 4, 6, float32(math.NaN())}, - opts: []array.EqualOption{array.WithNaNsEqual(true)}, - want: false, - }, - { - name: "f32-nan-tol", - a1: []float32{1, 2, 3, 4, 5, float32(math.NaN())}, - a2: []float32{1, 2, 3, 4, 6, float32(math.NaN())}, - opts: []array.EqualOption{array.WithNaNsEqual(true), array.WithAbsTolerance(1)}, - want: true, - }, - { - name: "f64", - a1: []float64{1, 2, 3, 4, 5, 6}, - a2: []float64{1, 2, 3, 4, 5, 6}, - want: true, - }, - { - name: "f64-no-tol", - a1: []float64{1, 2, 3, 4, 5, 6}, - a2: []float64{1, 2, 3, 4, 5, 7}, - want: false, - }, - { - name: "f64-tol-ok", - a1: []float64{1, 2, 3, 4, 5, 6}, - a2: []float64{1, 2, 3, 4, 5, 7}, - opts: []array.EqualOption{array.WithAbsTolerance(1)}, - want: true, - }, - { - name: "f64-nan", - a1: []float64{1, 2, 3, 4, 5, 6}, - a2: []float64{1, 2, 3, 4, 5, math.NaN()}, - want: false, - }, - { - name: "f64-nan-not", - a1: []float64{1, 2, 3, 4, 5, 6}, - a2: []float64{1, 2, 3, 4, 5, math.NaN()}, - opts: []array.EqualOption{array.WithNaNsEqual(true)}, - want: false, - }, - { - name: "f64-nan-ok", - a1: []float64{1, 2, 3, 4, 5, math.NaN()}, - a2: []float64{1, 2, 3, 4, 5, math.NaN()}, - opts: []array.EqualOption{array.WithNaNsEqual(true)}, - want: true, - }, - { - name: "f64-nan-no-tol", - a1: []float64{1, 2, 3, 4, 5, math.NaN()}, - a2: []float64{1, 2, 3, 4, 6, math.NaN()}, - opts: []array.EqualOption{array.WithNaNsEqual(true)}, - want: false, - }, - { - name: "f64-nan-tol", - a1: []float64{1, 2, 3, 4, 5, math.NaN()}, - a2: []float64{1, 2, 3, 4, 6, math.NaN()}, - opts: []array.EqualOption{array.WithNaNsEqual(true), array.WithAbsTolerance(1)}, - want: true, - }, - } { - t.Run(tc.name, func(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - a1 := arrayOf(mem, tc.a1, nil) - defer a1.Release() - a2 := arrayOf(mem, tc.a2, nil) - defer a2.Release() - - if got, want := array.ApproxEqual(a1, a2, tc.opts...), tc.want; got != want { - t.Fatalf("invalid comparison: got=%v, want=%v\na1: %v\na2: %v\n", got, want, a1, a2) - } - }) - } -} - -func testStringMap(mem memory.Allocator, m map[string]string, keys []string) *array.Map { - dt := arrow.MapOf(arrow.BinaryTypes.String, arrow.BinaryTypes.String) - builder := array.NewMapBuilderWithType(mem, dt) - defer builder.Release() - key, item := builder.KeyBuilder().(*array.StringBuilder), builder.ItemBuilder().(*array.StringBuilder) - - builder.AppendNull() - builder.Append(true) - - for _, k := range keys { - key.Append(k) - - v, ok := m[k] - if !ok { - item.AppendNull() - continue - } - - item.Append(v) - } - - return builder.NewMapArray() -} - -func TestArrayApproxEqualMaps(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - t.Run("different order", func(t *testing.T) { - m := map[string]string{"x": "x", "y": "y", "z": "z"} - - keys := []string{"z", "y", "x", "null"} - a := testStringMap(mem, m, keys) - defer a.Release() - - asc := make([]string, len(keys)) - copy(asc, keys) - sort.Strings(asc) - assert.NotEqual(t, keys, asc) - - b := testStringMap(mem, m, asc) - defer b.Release() - - assert.False(t, array.ApproxEqual(a, b)) - assert.True(t, array.ApproxEqual(a, b, array.WithUnorderedMapKeys(true))) - }) - - t.Run("extra left value", func(t *testing.T) { - m := map[string]string{"x": "x", "y": "y", "z": "z", "extra": "extra"} - - aKeys := []string{"z", "y", "x", "extra"} - a := testStringMap(mem, m, aKeys) - defer a.Release() - - bKeys := []string{"z", "y", "x"} - b := testStringMap(mem, m, bKeys) - defer b.Release() - - assert.NotEqual(t, aKeys, bKeys) - assert.Equal(t, a.NullN(), b.NullN()) - assert.False(t, array.ApproxEqual(a, b)) - assert.False(t, array.ApproxEqual(a, b, array.WithUnorderedMapKeys(true))) - }) - - t.Run("extra right value", func(t *testing.T) { - m := map[string]string{"x": "x", "y": "y", "z": "z", "extra": "extra"} - - aKeys := []string{"z", "y", "x"} - a := testStringMap(mem, m, aKeys) - defer a.Release() - - bKeys := []string{"z", "y", "x", "extra"} - b := testStringMap(mem, m, bKeys) - defer b.Release() - - assert.NotEqual(t, aKeys, bKeys) - assert.Equal(t, a.NullN(), b.NullN()) - assert.False(t, array.ApproxEqual(a, b)) - assert.False(t, array.ApproxEqual(a, b, array.WithUnorderedMapKeys(true))) - }) - - t.Run("unmatched value", func(t *testing.T) { - m := map[string]string{"x": "x", "y": "y", "z": "z", "extra": "extra", "extra2": "extra"} - - aKeys := []string{"z", "y", "x", "extra"} - a := testStringMap(mem, m, aKeys) - defer a.Release() - - bKeys := []string{"z", "y", "x", "extra2"} - b := testStringMap(mem, m, bKeys) - defer b.Release() - - assert.NotEqual(t, aKeys, bKeys) - assert.Equal(t, a.NullN(), b.NullN()) - assert.False(t, array.ApproxEqual(a, b)) - assert.False(t, array.ApproxEqual(a, b, array.WithUnorderedMapKeys(true))) - }) - - t.Run("different value", func(t *testing.T) { - m := map[string]string{"x": "x", "y": "y", "z": "z", "extra": "extra"} - - keys := []string{"z", "y", "x", "extra"} - a := testStringMap(mem, m, keys) - defer a.Release() - - m["extra"] = "different" - b := testStringMap(mem, m, keys) - defer b.Release() - - assert.Equal(t, a.NullN(), b.NullN()) - assert.False(t, array.ApproxEqual(a, b)) - assert.False(t, array.ApproxEqual(a, b, array.WithUnorderedMapKeys(true))) - }) -} - -func arrayOf(mem memory.Allocator, a interface{}, valids []bool) arrow.Array { - if mem == nil { - mem = memory.NewGoAllocator() - } - - switch a := a.(type) { - case []float16.Num: - bldr := array.NewFloat16Builder(mem) - defer bldr.Release() - - bldr.AppendValues(a, valids) - return bldr.NewFloat16Array() - - case []float32: - bldr := array.NewFloat32Builder(mem) - defer bldr.Release() - - bldr.AppendValues(a, valids) - return bldr.NewFloat32Array() - - case []float64: - bldr := array.NewFloat64Builder(mem) - defer bldr.Release() - - bldr.AppendValues(a, valids) - return bldr.NewFloat64Array() - - default: - panic(fmt.Errorf("arrdata: invalid data slice type %T", a)) - } -} - -func TestArrayEqualBaseArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b1 := array.NewBooleanBuilder(mem) - defer b1.Release() - b1.Append(true) - a1 := b1.NewBooleanArray() - defer a1.Release() - - b2 := array.NewBooleanBuilder(mem) - defer b2.Release() - a2 := b2.NewBooleanArray() - defer a2.Release() - - if array.Equal(a1, a2) { - t.Errorf("two arrays with different lengths must not be equal") - } - - b3 := array.NewBooleanBuilder(mem) - defer b3.Release() - b3.AppendNull() - a3 := b3.NewBooleanArray() - defer a3.Release() - - if array.Equal(a1, a3) { - t.Errorf("two arrays with different number of null values must not be equal") - } - - b4 := array.NewInt32Builder(mem) - defer b4.Release() - b4.Append(0) - a4 := b4.NewInt32Array() - defer a4.Release() - - if array.Equal(a1, a4) { - t.Errorf("two arrays with different types must not be equal") - } - - b5 := array.NewBooleanBuilder(mem) - defer b5.Release() - b5.AppendNull() - b5.Append(true) - a5 := b5.NewBooleanArray() - defer a5.Release() - b1.AppendNull() - - if array.Equal(a1, a5) { - t.Errorf("two arrays with different validity bitmaps must not be equal") - } -} - -func TestArrayEqualNull(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - null := array.NewNull(0) - defer null.Release() - - if !array.Equal(null, null) { - t.Fatalf("identical arrays should compare equal") - } - - n0 := array.NewNull(10) - defer n0.Release() - - n1 := array.NewNull(10) - defer n1.Release() - - if !array.Equal(n0, n0) { - t.Fatalf("identical arrays should compare equal") - } - if !array.Equal(n1, n1) { - t.Fatalf("identical arrays should compare equal") - } - if !array.Equal(n0, n1) || !array.Equal(n1, n0) { - t.Fatalf("n0 and n1 should compare equal") - } - - sub07 := array.NewSlice(n0, 0, 7) - defer sub07.Release() - sub08 := array.NewSlice(n0, 0, 8) - defer sub08.Release() - sub19 := array.NewSlice(n0, 1, 9) - defer sub19.Release() - - if !array.Equal(sub08, sub19) { - t.Fatalf("sub08 and sub19 should compare equal") - } - - if array.Equal(sub08, sub07) { - t.Fatalf("sub08 and sub07 should not compare equal") - } -} - -func TestArrayEqualMaskedArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt32Builder(mem) - defer ab.Release() - - valids := []bool{false, false, false, false} - ab.AppendValues([]int32{1, 2, 0, 4}, valids) - - a1 := ab.NewInt32Array() - defer a1.Release() - - ab.AppendValues([]int32{1, 2, 3, 4}, valids) - a2 := ab.NewInt32Array() - defer a2.Release() - - if !array.Equal(a1, a1) || !array.Equal(a2, a2) { - t.Errorf("an array must be equal to itself") - } - - if !array.Equal(a1, a2) { - t.Errorf("%v must be equal to %v", a1, a2) - } -} - -func TestArrayEqualDifferentMaskedValues(t *testing.T) { - // test 2 int32 arrays, with same nulls (but different masked values) compare equal. - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt32Builder(mem) - defer ab.Release() - - valids := []bool{true, true, false, true} - ab.AppendValues([]int32{1, 2, 0, 4}, valids) - - a1 := ab.NewInt32Array() - defer a1.Release() - - ab.AppendValues([]int32{1, 2, 3, 4}, valids) - a2 := ab.NewInt32Array() - defer a2.Release() - - if !array.Equal(a1, a1) || !array.Equal(a2, a2) { - t.Errorf("an array must be equal to itself") - } - - if !array.Equal(a1, a2) { - t.Errorf("%v must be equal to %v", a1, a2) - } -} - -func TestRecordEqual(t *testing.T) { - for name, recs := range arrdata.Records { - t.Run(name, func(t *testing.T) { - rec0 := recs[0] - rec1 := recs[1] - if !array.RecordEqual(rec0, rec0) { - t.Fatalf("identical records should compare equal:\nrecord:\n%v", rec0) - } - - if array.RecordEqual(rec0, rec1) && name != "nulls" { - t.Fatalf("non-identical records should not compare equal:\nrec0:\n%v\nrec1:\n%v", rec0, rec1) - } - - sub00 := rec0.NewSlice(0, recs[0].NumRows()-1) - defer sub00.Release() - sub01 := rec0.NewSlice(1, recs[0].NumRows()) - defer sub01.Release() - - if array.RecordEqual(sub00, sub01) && name != "nulls" { - t.Fatalf("non-identical records should not compare equal:\nsub0:\n%v\nsub1:\n%v", sub00, sub01) - } - }) - } -} - -func TestRecordApproxEqual(t *testing.T) { - for name, recs := range arrdata.Records { - t.Run(name, func(t *testing.T) { - rec0 := recs[0] - rec1 := recs[1] - if !array.RecordApproxEqual(rec0, rec0) { - t.Fatalf("identical records should compare equal:\nrecord:\n%v", rec0) - } - - if array.RecordApproxEqual(rec0, rec1) && name != "nulls" { - t.Fatalf("non-identical records should not compare equal:\nrec0:\n%v\nrec1:\n%v", rec0, rec1) - } - - sub00 := rec0.NewSlice(0, recs[0].NumRows()-1) - defer sub00.Release() - sub01 := rec0.NewSlice(1, recs[0].NumRows()) - defer sub01.Release() - - if array.RecordApproxEqual(sub00, sub01) && name != "nulls" { - t.Fatalf("non-identical records should not compare equal:\nsub0:\n%v\nsub1:\n%v", sub00, sub01) - } - }) - } -} - -func TestChunkedEqual(t *testing.T) { - for name, recs := range arrdata.Records { - t.Run(name, func(t *testing.T) { - tbl := array.NewTableFromRecords(recs[0].Schema(), recs) - defer tbl.Release() - - for i := 0; i < int(tbl.NumCols()); i++ { - if !array.ChunkedEqual(tbl.Column(i).Data(), tbl.Column(i).Data()) && name != "nulls" { - t.Fatalf("identical chunked arrays should compare as equal:\narr:%v\n", tbl.Column(i).Data()) - } - } - }) - } -} - -func TestChunkedApproxEqual(t *testing.T) { - fb := array.NewFloat64Builder(memory.DefaultAllocator) - defer fb.Release() - - fb.AppendValues([]float64{1, 2, 3, 4, 5}, nil) - f1 := fb.NewFloat64Array() - defer f1.Release() - - fb.AppendValues([]float64{6, 7}, nil) - f2 := fb.NewFloat64Array() - defer f2.Release() - - fb.AppendValues([]float64{8, 9, 10}, nil) - f3 := fb.NewFloat64Array() - defer f3.Release() - - c1 := arrow.NewChunked( - arrow.PrimitiveTypes.Float64, - []arrow.Array{f1, f2, f3}, - ) - defer c1.Release() - - fb.AppendValues([]float64{1, 2, 3}, nil) - f4 := fb.NewFloat64Array() - defer f4.Release() - - fb.AppendValues([]float64{4, 5}, nil) - f5 := fb.NewFloat64Array() - defer f5.Release() - - fb.AppendValues([]float64{6, 7, 8, 9}, nil) - f6 := fb.NewFloat64Array() - defer f6.Release() - - fb.AppendValues([]float64{10}, nil) - f7 := fb.NewFloat64Array() - defer f7.Release() - - c2 := arrow.NewChunked( - arrow.PrimitiveTypes.Float64, - []arrow.Array{f4, f5, f6, f7}, - ) - defer c2.Release() - - assert.True(t, array.ChunkedEqual(c1, c2)) - assert.True(t, array.ChunkedApproxEqual(c1, c2)) -} - -func TestTableEqual(t *testing.T) { - for name, recs := range arrdata.Records { - t.Run(name, func(t *testing.T) { - tbl := array.NewTableFromRecords(recs[0].Schema(), recs) - defer tbl.Release() - - if !array.TableEqual(tbl, tbl) { - t.Fatalf("identical tables should compare as equal:\tbl:%v\n", tbl) - } - if !array.TableApproxEqual(tbl, tbl) { - t.Fatalf("identical tables should compare as approx equal:\tbl:%v\n", tbl) - } - }) - } -} diff --git a/go/arrow/array/concat.go b/go/arrow/array/concat.go deleted file mode 100644 index 3d2b4b4b83167..0000000000000 --- a/go/arrow/array/concat.go +++ /dev/null @@ -1,933 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "errors" - "fmt" - "math" - "math/bits" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/encoded" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/bitutils" - "github.com/apache/arrow/go/v18/internal/utils" -) - -// Concatenate creates a new arrow.Array which is the concatenation of the -// passed in arrays. Returns nil if an error is encountered. -// -// The passed in arrays still need to be released manually, and will not be -// released by this function. -func Concatenate(arrs []arrow.Array, mem memory.Allocator) (result arrow.Array, err error) { - if len(arrs) == 0 { - return nil, errors.New("array/concat: must pass at least one array") - } - - // gather Data of inputs - data := make([]arrow.ArrayData, len(arrs)) - for i, ar := range arrs { - if !arrow.TypeEqual(ar.DataType(), arrs[0].DataType()) { - return nil, fmt.Errorf("arrays to be concatenated must be identically typed, but %s and %s were encountered", - arrs[0].DataType(), ar.DataType()) - } - data[i] = ar.Data() - } - - out, err := concat(data, mem) - if err != nil { - return nil, err - } - - defer out.Release() - return MakeFromData(out), nil -} - -// simple struct to hold ranges -type rng struct { - offset, len int -} - -// simple bitmap struct to reference a specific slice of a bitmap where the range -// offset and length are in bits -type bitmap struct { - data []byte - rng rng -} - -// gather up the bitmaps from the passed in data objects -func gatherBitmaps(data []arrow.ArrayData, idx int) []bitmap { - out := make([]bitmap, len(data)) - for i, d := range data { - if d.Buffers()[idx] != nil { - out[i].data = d.Buffers()[idx].Bytes() - } - out[i].rng.offset = d.Offset() - out[i].rng.len = d.Len() - } - return out -} - -// gatherFixedBuffers gathers up the buffer objects of the given index, specifically -// returning only the slices of the buffers which are relevant to the passed in arrays -// in case they are themselves slices of other arrays. nil buffers are ignored and not -// in the output slice. -func gatherFixedBuffers(data []arrow.ArrayData, idx, byteWidth int) []*memory.Buffer { - out := make([]*memory.Buffer, 0, len(data)) - for _, d := range data { - buf := d.Buffers()[idx] - if buf == nil { - continue - } - - out = append(out, memory.NewBufferBytes(buf.Bytes()[d.Offset()*byteWidth:(d.Offset()+d.Len())*byteWidth])) - } - return out -} - -// gatherBuffersFixedWidthType is like gatherFixedBuffers, but uses a datatype to determine the size -// to use for determining the byte slice rather than a passed in bytewidth. -func gatherBuffersFixedWidthType(data []arrow.ArrayData, idx int, fixed arrow.FixedWidthDataType) []*memory.Buffer { - return gatherFixedBuffers(data, idx, fixed.BitWidth()/8) -} - -// gatherBufferRanges requires that len(ranges) == len(data) and returns a list of buffers -// which represent the corresponding range of each buffer in the specified index of each -// data object. -func gatherBufferRanges(data []arrow.ArrayData, idx int, ranges []rng) []*memory.Buffer { - out := make([]*memory.Buffer, 0, len(data)) - for i, d := range data { - buf := d.Buffers()[idx] - if buf == nil { - debug.Assert(ranges[i].len == 0, "misaligned buffer value ranges") - continue - } - - out = append(out, memory.NewBufferBytes(buf.Bytes()[ranges[i].offset:ranges[i].offset+ranges[i].len])) - } - return out -} - -// gatherChildren gathers the children data objects for child of index idx for all of the data objects. -func gatherChildren(data []arrow.ArrayData, idx int) []arrow.ArrayData { - return gatherChildrenMultiplier(data, idx, 1) -} - -// gatherChildrenMultiplier gathers the full data slice of the underlying values from the children data objects -// such as the values data for a list array so that it can return a slice of the buffer for a given -// index into the children. -func gatherChildrenMultiplier(data []arrow.ArrayData, idx, multiplier int) []arrow.ArrayData { - out := make([]arrow.ArrayData, len(data)) - for i, d := range data { - out[i] = NewSliceData(d.Children()[idx], int64(d.Offset()*multiplier), int64(d.Offset()+d.Len())*int64(multiplier)) - } - return out -} - -// gatherChildrenRanges returns a slice of Data objects which each represent slices of the given ranges from the -// child in the specified index from each data object. -func gatherChildrenRanges(data []arrow.ArrayData, idx int, ranges []rng) []arrow.ArrayData { - debug.Assert(len(data) == len(ranges), "mismatched children ranges for concat") - out := make([]arrow.ArrayData, len(data)) - for i, d := range data { - out[i] = NewSliceData(d.Children()[idx], int64(ranges[i].offset), int64(ranges[i].offset+ranges[i].len)) - } - return out -} - -// creates a single contiguous buffer which contains the concatenation of all of the passed -// in buffer objects. -func concatBuffers(bufs []*memory.Buffer, mem memory.Allocator) *memory.Buffer { - outLen := 0 - for _, b := range bufs { - outLen += b.Len() - } - out := memory.NewResizableBuffer(mem) - out.Resize(outLen) - - data := out.Bytes() - for _, b := range bufs { - copy(data, b.Bytes()) - data = data[b.Len():] - } - return out -} - -func handle32BitOffsets(outLen int, buffers []*memory.Buffer, out *memory.Buffer) (*memory.Buffer, []rng, error) { - dst := arrow.Int32Traits.CastFromBytes(out.Bytes()) - valuesRanges := make([]rng, len(buffers)) - nextOffset := int32(0) - nextElem := int(0) - for i, b := range buffers { - if b.Len() == 0 { - valuesRanges[i].offset = 0 - valuesRanges[i].len = 0 - continue - } - - // when we gather our buffers, we sliced off the last offset from the buffer - // so that we could count the lengths accurately - src := arrow.Int32Traits.CastFromBytes(b.Bytes()) - valuesRanges[i].offset = int(src[0]) - // expand our slice to see that final offset - expand := src[:len(src)+1] - // compute the length of this range by taking the final offset and subtracting where we started. - valuesRanges[i].len = int(expand[len(src)]) - valuesRanges[i].offset - - if nextOffset > math.MaxInt32-int32(valuesRanges[i].len) { - return nil, nil, errors.New("offset overflow while concatenating arrays") - } - - // adjust each offset by the difference between our last ending point and our starting point - adj := nextOffset - src[0] - for j, o := range src { - dst[nextElem+j] = adj + o - } - - // the next index for an element in the output buffer - nextElem += b.Len() / arrow.Int32SizeBytes - // update our offset counter to be the total current length of our output - nextOffset += int32(valuesRanges[i].len) - } - - // final offset should point to the end of the data - dst[outLen] = nextOffset - return out, valuesRanges, nil -} - -func unifyDictionaries(mem memory.Allocator, data []arrow.ArrayData, dt *arrow.DictionaryType) ([]*memory.Buffer, arrow.Array, error) { - unifier, err := NewDictionaryUnifier(mem, dt.ValueType) - if err != nil { - return nil, nil, err - } - defer unifier.Release() - - newLookup := make([]*memory.Buffer, len(data)) - for i, d := range data { - dictArr := MakeFromData(d.Dictionary()) - defer dictArr.Release() - newLookup[i], err = unifier.UnifyAndTranspose(dictArr) - if err != nil { - return nil, nil, err - } - } - - unified, err := unifier.GetResultWithIndexType(dt.IndexType) - if err != nil { - for _, b := range newLookup { - b.Release() - } - return nil, nil, err - } - return newLookup, unified, nil -} - -func concatDictIndices(mem memory.Allocator, data []arrow.ArrayData, idxType arrow.FixedWidthDataType, transpositions []*memory.Buffer) (out *memory.Buffer, err error) { - defer func() { - if err != nil && out != nil { - out.Release() - out = nil - } - }() - - idxWidth := idxType.BitWidth() / 8 - outLen := 0 - for i, d := range data { - outLen += d.Len() - defer transpositions[i].Release() - } - - out = memory.NewResizableBuffer(mem) - out.Resize(outLen * idxWidth) - - outData := out.Bytes() - for i, d := range data { - transposeMap := arrow.Int32Traits.CastFromBytes(transpositions[i].Bytes()) - src := d.Buffers()[1].Bytes() - if d.Buffers()[0] == nil { - if err = utils.TransposeIntsBuffers(idxType, idxType, src, outData, d.Offset(), 0, d.Len(), transposeMap); err != nil { - return - } - } else { - rdr := bitutils.NewBitRunReader(d.Buffers()[0].Bytes(), int64(d.Offset()), int64(d.Len())) - pos := 0 - for { - run := rdr.NextRun() - if run.Len == 0 { - break - } - - if run.Set { - err = utils.TransposeIntsBuffers(idxType, idxType, src, outData, d.Offset()+pos, pos, int(run.Len), transposeMap) - if err != nil { - return - } - } else { - memory.Set(outData[pos:pos+(int(run.Len)*idxWidth)], 0x00) - } - - pos += int(run.Len) - } - } - outData = outData[d.Len()*idxWidth:] - } - return -} - -func handle64BitOffsets(outLen int, buffers []*memory.Buffer, out *memory.Buffer) (*memory.Buffer, []rng, error) { - dst := arrow.Int64Traits.CastFromBytes(out.Bytes()) - valuesRanges := make([]rng, len(buffers)) - nextOffset := int64(0) - nextElem := int(0) - for i, b := range buffers { - if b.Len() == 0 { - valuesRanges[i].offset = 0 - valuesRanges[i].len = 0 - continue - } - - // when we gather our buffers, we sliced off the last offset from the buffer - // so that we could count the lengths accurately - src := arrow.Int64Traits.CastFromBytes(b.Bytes()) - valuesRanges[i].offset = int(src[0]) - // expand our slice to see that final offset - expand := src[:len(src)+1] - // compute the length of this range by taking the final offset and subtracting where we started. - valuesRanges[i].len = int(expand[len(src)]) - valuesRanges[i].offset - - if nextOffset > math.MaxInt64-int64(valuesRanges[i].len) { - return nil, nil, errors.New("offset overflow while concatenating arrays") - } - - // adjust each offset by the difference between our last ending point and our starting point - adj := nextOffset - src[0] - for j, o := range src { - dst[nextElem+j] = adj + o - } - - // the next index for an element in the output buffer - nextElem += b.Len() / arrow.Int64SizeBytes - // update our offset counter to be the total current length of our output - nextOffset += int64(valuesRanges[i].len) - } - - // final offset should point to the end of the data - dst[outLen] = nextOffset - return out, valuesRanges, nil -} - -// concatOffsets creates a single offset buffer which represents the concatenation of all of the -// offsets buffers, adjusting the offsets appropriately to their new relative locations. -// -// It also returns the list of ranges that need to be fetched for the corresponding value buffers -// to construct the final concatenated value buffer. -func concatOffsets(buffers []*memory.Buffer, byteWidth int, mem memory.Allocator) (*memory.Buffer, []rng, error) { - outLen := 0 - for _, b := range buffers { - outLen += b.Len() / byteWidth - } - - out := memory.NewResizableBuffer(mem) - out.Resize(byteWidth * (outLen + 1)) - - switch byteWidth { - case arrow.Int64SizeBytes: - return handle64BitOffsets(outLen, buffers, out) - default: - return handle32BitOffsets(outLen, buffers, out) - } -} - -func sumArraySizes(data []arrow.ArrayData) int { - outSize := 0 - for _, arr := range data { - outSize += arr.Len() - } - return outSize -} - -func getListViewBufferValues[T int32 | int64](data arrow.ArrayData, i int) []T { - bytes := data.Buffers()[i].Bytes() - base := (*T)(unsafe.Pointer(&bytes[0])) - ret := unsafe.Slice(base, data.Offset()+data.Len()) - return ret[data.Offset():] -} - -func putListViewOffsets32(in arrow.ArrayData, displacement int32, out *memory.Buffer, outOff int) { - debug.Assert(in.DataType().ID() == arrow.LIST_VIEW, "putListViewOffsets32: expected LIST_VIEW data") - inOff, inLen := in.Offset(), in.Len() - if inLen == 0 { - return - } - bitmap := in.Buffers()[0] - srcOffsets := getListViewBufferValues[int32](in, 1) - srcSizes := getListViewBufferValues[int32](in, 2) - isValidAndNonEmpty := func(i int) bool { - return (bitmap == nil || bitutil.BitIsSet(bitmap.Bytes(), inOff+i)) && srcSizes[i] > 0 - } - - dstOffsets := arrow.Int32Traits.CastFromBytes(out.Bytes()) - for i, offset := range srcOffsets { - if isValidAndNonEmpty(i) { - // This is guaranteed by RangeOfValuesUsed returning the smallest offset - // of valid and non-empty list-views. - debug.Assert(offset+displacement >= 0, "putListViewOffsets32: offset underflow while concatenating arrays") - dstOffsets[outOff+i] = offset + displacement - } else { - dstOffsets[outOff+i] = 0 - } - } -} - -func putListViewOffsets64(in arrow.ArrayData, displacement int64, out *memory.Buffer, outOff int) { - debug.Assert(in.DataType().ID() == arrow.LARGE_LIST_VIEW, "putListViewOffsets64: expected LARGE_LIST_VIEW data") - inOff, inLen := in.Offset(), in.Len() - if inLen == 0 { - return - } - bitmap := in.Buffers()[0] - srcOffsets := getListViewBufferValues[int64](in, 1) - srcSizes := getListViewBufferValues[int64](in, 2) - isValidAndNonEmpty := func(i int) bool { - return (bitmap == nil || bitutil.BitIsSet(bitmap.Bytes(), inOff+i)) && srcSizes[i] > 0 - } - - dstOffsets := arrow.Int64Traits.CastFromBytes(out.Bytes()) - for i, offset := range srcOffsets { - if isValidAndNonEmpty(i) { - // This is guaranteed by RangeOfValuesUsed returning the smallest offset - // of valid and non-empty list-views. - debug.Assert(offset+displacement >= 0, "putListViewOffsets64: offset underflow while concatenating arrays") - dstOffsets[outOff+i] = offset + displacement - } else { - dstOffsets[outOff+i] = 0 - } - } -} - -// Concatenate buffers holding list-view offsets into a single buffer of offsets -// -// valueRanges contains the relevant ranges of values in the child array actually -// referenced to by the views. Most commonly, these ranges will start from 0, -// but when that is not the case, we need to adjust the displacement of offsets. -// The concatenated child array does not contain values from the beginning -// if they are not referenced to by any view. -func concatListViewOffsets(data []arrow.ArrayData, byteWidth int, valueRanges []rng, mem memory.Allocator) (*memory.Buffer, error) { - outSize := sumArraySizes(data) - if byteWidth == 4 && outSize > math.MaxInt32 { - return nil, fmt.Errorf("%w: offset overflow while concatenating arrays", arrow.ErrInvalid) - } - out := memory.NewResizableBuffer(mem) - out.Resize(byteWidth * outSize) - - numChildValues, elementsLength := 0, 0 - for i, arr := range data { - displacement := numChildValues - valueRanges[i].offset - if byteWidth == 4 { - putListViewOffsets32(arr, int32(displacement), out, elementsLength) - } else { - putListViewOffsets64(arr, int64(displacement), out, elementsLength) - } - elementsLength += arr.Len() - numChildValues += valueRanges[i].len - } - debug.Assert(elementsLength == outSize, "implementation error") - - return out, nil -} - -func zeroNullListViewSizes[T int32 | int64](data arrow.ArrayData) { - if data.Len() == 0 || data.Buffers()[0] == nil { - return - } - validity := data.Buffers()[0].Bytes() - sizes := getListViewBufferValues[T](data, 2) - - for i := 0; i < data.Len(); i++ { - if !bitutil.BitIsSet(validity, data.Offset()+i) { - sizes[i] = 0 - } - } -} - -func concatListView(data []arrow.ArrayData, offsetType arrow.FixedWidthDataType, out *Data, mem memory.Allocator) (err error) { - // Calculate the ranges of values that each list-view array uses - valueRanges := make([]rng, len(data)) - for i, input := range data { - offset, len := rangeOfValuesUsed(input) - valueRanges[i].offset = offset - valueRanges[i].len = len - } - - // Gather the children ranges of each input array - childData := gatherChildrenRanges(data, 0, valueRanges) - for _, c := range childData { - defer c.Release() - } - - // Concatenate the values - values, err := concat(childData, mem) - if err != nil { - return err - } - - // Concatenate the offsets - offsetBuffer, err := concatListViewOffsets(data, offsetType.Bytes(), valueRanges, mem) - if err != nil { - return err - } - - // Concatenate the sizes - sizeBuffers := gatherBuffersFixedWidthType(data, 2, offsetType) - sizeBuffer := concatBuffers(sizeBuffers, mem) - - out.childData = []arrow.ArrayData{values} - out.buffers[1] = offsetBuffer - out.buffers[2] = sizeBuffer - - // To make sure the sizes don't reference values that are not in the new - // concatenated values array, we zero the sizes of null list-view values. - if offsetType.ID() == arrow.INT32 { - zeroNullListViewSizes[int32](out) - } else { - zeroNullListViewSizes[int64](out) - } - - return nil -} - -// concat is the implementation for actually performing the concatenation of the arrow.ArrayData -// objects that we can call internally for nested types. -func concat(data []arrow.ArrayData, mem memory.Allocator) (arr arrow.ArrayData, err error) { - out := &Data{refCount: 1, dtype: data[0].DataType(), nulls: 0} - defer func() { - if pErr := recover(); pErr != nil { - err = utils.FormatRecoveredError("arrow/concat", pErr) - } - if err != nil { - out.Release() - } - }() - for _, d := range data { - out.length += d.Len() - if out.nulls == UnknownNullCount || d.NullN() == UnknownNullCount { - out.nulls = UnknownNullCount - continue - } - out.nulls += d.NullN() - } - - out.buffers = make([]*memory.Buffer, len(data[0].Buffers())) - if out.nulls != 0 && out.dtype.ID() != arrow.NULL { - bm, err := concatBitmaps(gatherBitmaps(data, 0), mem) - if err != nil { - return nil, err - } - out.buffers[0] = bm - } - - dt := out.dtype - if dt.ID() == arrow.EXTENSION { - dt = dt.(arrow.ExtensionType).StorageType() - } - - switch dt := dt.(type) { - case *arrow.NullType: - case *arrow.BooleanType: - bm, err := concatBitmaps(gatherBitmaps(data, 1), mem) - if err != nil { - return nil, err - } - out.buffers[1] = bm - case *arrow.DictionaryType: - idxType := dt.IndexType.(arrow.FixedWidthDataType) - // two cases: all dictionaries are the same or we need to unify them - dictsSame := true - dict0 := MakeFromData(data[0].Dictionary()) - defer dict0.Release() - for _, d := range data { - dict := MakeFromData(d.Dictionary()) - if !Equal(dict0, dict) { - dict.Release() - dictsSame = false - break - } - dict.Release() - } - - indexBuffers := gatherBuffersFixedWidthType(data, 1, idxType) - if dictsSame { - out.dictionary = dict0.Data().(*Data) - out.dictionary.Retain() - out.buffers[1] = concatBuffers(indexBuffers, mem) - break - } - - indexLookup, unifiedDict, err := unifyDictionaries(mem, data, dt) - if err != nil { - return nil, err - } - defer unifiedDict.Release() - out.dictionary = unifiedDict.Data().(*Data) - out.dictionary.Retain() - - out.buffers[1], err = concatDictIndices(mem, data, idxType, indexLookup) - if err != nil { - return nil, err - } - case arrow.FixedWidthDataType: - out.buffers[1] = concatBuffers(gatherBuffersFixedWidthType(data, 1, dt), mem) - case arrow.BinaryViewDataType: - out.buffers = out.buffers[:2] - for _, d := range data { - for _, buf := range d.Buffers()[2:] { - buf.Retain() - out.buffers = append(out.buffers, buf) - } - } - - out.buffers[1] = concatBuffers(gatherFixedBuffers(data, 1, arrow.ViewHeaderSizeBytes), mem) - - var ( - s = arrow.ViewHeaderTraits.CastFromBytes(out.buffers[1].Bytes()) - i = data[0].Len() - precedingBufsCount int - ) - - for idx := 1; idx < len(data); idx++ { - precedingBufsCount += len(data[idx-1].Buffers()) - 2 - - for end := i + data[idx].Len(); i < end; i++ { - if s[i].IsInline() { - continue - } - - bufIndex := s[i].BufferIndex() + int32(precedingBufsCount) - s[i].SetIndexOffset(bufIndex, s[i].BufferOffset()) - } - } - case arrow.BinaryDataType: - offsetWidth := dt.Layout().Buffers[1].ByteWidth - offsetBuffer, valueRanges, err := concatOffsets(gatherFixedBuffers(data, 1, offsetWidth), offsetWidth, mem) - if err != nil { - return nil, err - } - out.buffers[1] = offsetBuffer - out.buffers[2] = concatBuffers(gatherBufferRanges(data, 2, valueRanges), mem) - case *arrow.ListType: - offsetWidth := dt.Layout().Buffers[1].ByteWidth - offsetBuffer, valueRanges, err := concatOffsets(gatherFixedBuffers(data, 1, offsetWidth), offsetWidth, mem) - if err != nil { - return nil, err - } - childData := gatherChildrenRanges(data, 0, valueRanges) - for _, c := range childData { - defer c.Release() - } - - out.buffers[1] = offsetBuffer - out.childData = make([]arrow.ArrayData, 1) - out.childData[0], err = concat(childData, mem) - if err != nil { - return nil, err - } - case *arrow.LargeListType: - offsetWidth := dt.Layout().Buffers[1].ByteWidth - offsetBuffer, valueRanges, err := concatOffsets(gatherFixedBuffers(data, 1, offsetWidth), offsetWidth, mem) - if err != nil { - return nil, err - } - childData := gatherChildrenRanges(data, 0, valueRanges) - for _, c := range childData { - defer c.Release() - } - - out.buffers[1] = offsetBuffer - out.childData = make([]arrow.ArrayData, 1) - out.childData[0], err = concat(childData, mem) - if err != nil { - return nil, err - } - case *arrow.ListViewType: - offsetType := arrow.PrimitiveTypes.Int32.(arrow.FixedWidthDataType) - err := concatListView(data, offsetType, out, mem) - if err != nil { - return nil, err - } - case *arrow.LargeListViewType: - offsetType := arrow.PrimitiveTypes.Int64.(arrow.FixedWidthDataType) - err := concatListView(data, offsetType, out, mem) - if err != nil { - return nil, err - } - case *arrow.FixedSizeListType: - childData := gatherChildrenMultiplier(data, 0, int(dt.Len())) - for _, c := range childData { - defer c.Release() - } - - children, err := concat(childData, mem) - if err != nil { - return nil, err - } - out.childData = []arrow.ArrayData{children} - case *arrow.StructType: - out.childData = make([]arrow.ArrayData, dt.NumFields()) - for i := range dt.Fields() { - children := gatherChildren(data, i) - for _, c := range children { - defer c.Release() - } - - childData, err := concat(children, mem) - if err != nil { - return nil, err - } - out.childData[i] = childData - } - case *arrow.MapType: - offsetWidth := dt.Layout().Buffers[1].ByteWidth - offsetBuffer, valueRanges, err := concatOffsets(gatherFixedBuffers(data, 1, offsetWidth), offsetWidth, mem) - if err != nil { - return nil, err - } - childData := gatherChildrenRanges(data, 0, valueRanges) - for _, c := range childData { - defer c.Release() - } - - out.buffers[1] = offsetBuffer - out.childData = make([]arrow.ArrayData, 1) - out.childData[0], err = concat(childData, mem) - if err != nil { - return nil, err - } - case *arrow.RunEndEncodedType: - physicalLength, overflow := int(0), false - // we can't use gatherChildren because the Offset and Len of - // data doesn't correspond to the physical length or offset - runs := make([]arrow.ArrayData, len(data)) - values := make([]arrow.ArrayData, len(data)) - for i, d := range data { - plen := encoded.GetPhysicalLength(d) - off := encoded.FindPhysicalOffset(d) - - runs[i] = NewSliceData(d.Children()[0], int64(off), int64(off+plen)) - defer runs[i].Release() - values[i] = NewSliceData(d.Children()[1], int64(off), int64(off+plen)) - defer values[i].Release() - - physicalLength, overflow = addOvf(physicalLength, plen) - if overflow { - return nil, fmt.Errorf("%w: run end encoded array length must fit into a 32-bit signed integer", - arrow.ErrInvalid) - } - } - - runEndsByteWidth := runs[0].DataType().(arrow.FixedWidthDataType).Bytes() - runEndsBuffers := gatherFixedBuffers(runs, 1, runEndsByteWidth) - outRunEndsLen := physicalLength * runEndsByteWidth - outRunEndsBuf := memory.NewResizableBuffer(mem) - outRunEndsBuf.Resize(outRunEndsLen) - defer outRunEndsBuf.Release() - - if err := updateRunEnds(runEndsByteWidth, data, runEndsBuffers, outRunEndsBuf); err != nil { - return nil, err - } - - out.childData = make([]arrow.ArrayData, 2) - out.childData[0] = NewData(data[0].Children()[0].DataType(), int(physicalLength), - []*memory.Buffer{nil, outRunEndsBuf}, nil, 0, 0) - - var err error - out.childData[1], err = concat(values, mem) - if err != nil { - out.childData[0].Release() - return nil, err - } - default: - return nil, fmt.Errorf("concatenate not implemented for type %s", dt) - } - - return out, nil -} - -// check overflow in the addition, taken from bits.Add but adapted for signed integers -// rather than unsigned integers. bits.UintSize will be either 32 or 64 based on -// whether our architecture is 32 bit or 64. The operation is the same for both cases, -// the only difference is how much we need to shift by 30 for 32 bit and 62 for 64 bit. -// Thus, bits.UintSize - 2 is how much we shift right by to check if we had an overflow -// in the signed addition. -// -// First return is the result of the sum, the second return is true if there was an overflow -func addOvf(x, y int) (int, bool) { - sum := x + y - return sum, ((x&y)|((x|y)&^sum))>>(bits.UintSize-2) == 1 -} - -// concatenate bitmaps together and return a buffer with the combined bitmaps -func concatBitmaps(bitmaps []bitmap, mem memory.Allocator) (*memory.Buffer, error) { - var ( - outlen int - overflow bool - ) - - for _, bm := range bitmaps { - if outlen, overflow = addOvf(outlen, bm.rng.len); overflow { - return nil, errors.New("length overflow when concatenating arrays") - } - } - - out := memory.NewResizableBuffer(mem) - out.Resize(int(bitutil.BytesForBits(int64(outlen)))) - dst := out.Bytes() - - offset := 0 - for _, bm := range bitmaps { - if bm.data == nil { // if the bitmap is nil, that implies that the value is true for all elements - bitutil.SetBitsTo(out.Bytes(), int64(offset), int64(bm.rng.len), true) - } else { - bitutil.CopyBitmap(bm.data, bm.rng.offset, bm.rng.len, dst, offset) - } - offset += bm.rng.len - } - return out, nil -} - -func updateRunEnds(byteWidth int, inputData []arrow.ArrayData, inputBuffers []*memory.Buffer, outputBuffer *memory.Buffer) error { - switch byteWidth { - case 2: - out := arrow.Int16Traits.CastFromBytes(outputBuffer.Bytes()) - return updateRunsInt16(inputData, inputBuffers, out) - case 4: - out := arrow.Int32Traits.CastFromBytes(outputBuffer.Bytes()) - return updateRunsInt32(inputData, inputBuffers, out) - case 8: - out := arrow.Int64Traits.CastFromBytes(outputBuffer.Bytes()) - return updateRunsInt64(inputData, inputBuffers, out) - } - return fmt.Errorf("%w: invalid dataType for RLE runEnds", arrow.ErrInvalid) -} - -func updateRunsInt16(inputData []arrow.ArrayData, inputBuffers []*memory.Buffer, output []int16) error { - // for now we will not attempt to optimize by checking if we - // can fold the end and beginning of each array we're concatenating - // into a single run - pos := 0 - for i, buf := range inputBuffers { - if buf.Len() == 0 { - continue - } - src := arrow.Int16Traits.CastFromBytes(buf.Bytes()) - if pos == 0 { - pos += copy(output, src) - continue - } - - lastEnd := output[pos-1] - // we can check the last runEnd in the src and add it to the - // last value that we're adjusting them all by to see if we - // are going to overflow - if int64(lastEnd)+int64(int(src[len(src)-1])-inputData[i].Offset()) > math.MaxInt16 { - return fmt.Errorf("%w: overflow in run-length-encoded run ends concat", arrow.ErrInvalid) - } - - // adjust all of the run ends by first normalizing them (e - data[i].offset) - // then adding the previous value we ended on. Since the offset - // is a logical length offset it should be accurate to just subtract - // it from each value. - for j, e := range src { - output[pos+j] = lastEnd + int16(int(e)-inputData[i].Offset()) - } - pos += len(src) - } - return nil -} - -func updateRunsInt32(inputData []arrow.ArrayData, inputBuffers []*memory.Buffer, output []int32) error { - // for now we will not attempt to optimize by checking if we - // can fold the end and beginning of each array we're concatenating - // into a single run - pos := 0 - for i, buf := range inputBuffers { - if buf.Len() == 0 { - continue - } - src := arrow.Int32Traits.CastFromBytes(buf.Bytes()) - if pos == 0 { - pos += copy(output, src) - continue - } - - lastEnd := output[pos-1] - // we can check the last runEnd in the src and add it to the - // last value that we're adjusting them all by to see if we - // are going to overflow - if int64(lastEnd)+int64(int(src[len(src)-1])-inputData[i].Offset()) > math.MaxInt32 { - return fmt.Errorf("%w: overflow in run-length-encoded run ends concat", arrow.ErrInvalid) - } - - // adjust all of the run ends by first normalizing them (e - data[i].offset) - // then adding the previous value we ended on. Since the offset - // is a logical length offset it should be accurate to just subtract - // it from each value. - for j, e := range src { - output[pos+j] = lastEnd + int32(int(e)-inputData[i].Offset()) - } - pos += len(src) - } - return nil -} - -func updateRunsInt64(inputData []arrow.ArrayData, inputBuffers []*memory.Buffer, output []int64) error { - // for now we will not attempt to optimize by checking if we - // can fold the end and beginning of each array we're concatenating - // into a single run - pos := 0 - for i, buf := range inputBuffers { - if buf.Len() == 0 { - continue - } - src := arrow.Int64Traits.CastFromBytes(buf.Bytes()) - if pos == 0 { - pos += copy(output, src) - continue - } - - lastEnd := output[pos-1] - // we can check the last runEnd in the src and add it to the - // last value that we're adjusting them all by to see if we - // are going to overflow - if uint64(lastEnd)+uint64(int(src[len(src)-1])-inputData[i].Offset()) > math.MaxInt64 { - return fmt.Errorf("%w: overflow in run-length-encoded run ends concat", arrow.ErrInvalid) - } - - // adjust all of the run ends by first normalizing them (e - data[i].offset) - // then adding the previous value we ended on. Since the offset - // is a logical length offset it should be accurate to just subtract - // it from each value. - for j, e := range src { - output[pos+j] = lastEnd + e - int64(inputData[i].Offset()) - } - pos += len(src) - } - return nil -} diff --git a/go/arrow/array/concat_test.go b/go/arrow/array/concat_test.go deleted file mode 100644 index 7e6a3c08efd5c..0000000000000 --- a/go/arrow/array/concat_test.go +++ /dev/null @@ -1,789 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "fmt" - "math" - "sort" - "strings" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/internal/testing/gen" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "github.com/stretchr/testify/suite" - "golang.org/x/exp/rand" -) - -func TestConcatenateValueBuffersNull(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - inputs := make([]arrow.Array, 0) - - bldr := array.NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - defer bldr.Release() - - arr := bldr.NewArray() - defer arr.Release() - inputs = append(inputs, arr) - - bldr.AppendNull() - arr = bldr.NewArray() - defer arr.Release() - inputs = append(inputs, arr) - - actual, err := array.Concatenate(inputs, mem) - assert.NoError(t, err) - defer actual.Release() - - assert.True(t, array.Equal(actual, inputs[1])) -} - -func TestConcatenate(t *testing.T) { - tests := []struct { - dt arrow.DataType - }{ - {arrow.FixedWidthTypes.Boolean}, - {arrow.PrimitiveTypes.Int8}, - {arrow.PrimitiveTypes.Uint8}, - {arrow.PrimitiveTypes.Int16}, - {arrow.PrimitiveTypes.Uint16}, - {arrow.PrimitiveTypes.Int32}, - {arrow.PrimitiveTypes.Uint32}, - {arrow.PrimitiveTypes.Int64}, - {arrow.PrimitiveTypes.Uint64}, - {arrow.PrimitiveTypes.Float32}, - {arrow.PrimitiveTypes.Float64}, - {arrow.BinaryTypes.String}, - {arrow.BinaryTypes.LargeString}, - {arrow.ListOf(arrow.PrimitiveTypes.Int8)}, - {arrow.LargeListOf(arrow.PrimitiveTypes.Int8)}, - {arrow.ListViewOf(arrow.PrimitiveTypes.Int8)}, - {arrow.LargeListViewOf(arrow.PrimitiveTypes.Int8)}, - {arrow.FixedSizeListOf(3, arrow.PrimitiveTypes.Int8)}, - {arrow.StructOf()}, - {arrow.MapOf(arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Int8)}, - {&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: arrow.PrimitiveTypes.Float64}}, - {arrow.BinaryTypes.StringView}, - } - - for _, tt := range tests { - t.Run(tt.dt.Name(), func(t *testing.T) { - suite.Run(t, &ConcatTestSuite{ - seed: 0xdeadbeef, - dt: tt.dt, - nullProbs: []float64{0.0, 0.1, 0.5, 0.9, 1.0}, - sizes: []int32{0, 1, 2, 4, 16, 31, 1234}, - }) - }) - } -} - -type ConcatTestSuite struct { - suite.Suite - - seed uint64 - rng gen.RandomArrayGenerator - dt arrow.DataType - - nullProbs []float64 - sizes []int32 - - mem *memory.CheckedAllocator -} - -func (cts *ConcatTestSuite) SetupSuite() { - cts.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) - cts.rng = gen.NewRandomArrayGenerator(cts.seed, cts.mem) -} - -func (cts *ConcatTestSuite) TearDownSuite() { - cts.mem.AssertSize(cts.T(), 0) -} - -func (cts *ConcatTestSuite) generateArr(size int64, nullprob float64) arrow.Array { - switch cts.dt.ID() { - case arrow.BOOL: - return cts.rng.Boolean(size, 0.5, nullprob) - case arrow.INT8: - return cts.rng.Int8(size, 0, 127, nullprob) - case arrow.UINT8: - return cts.rng.Uint8(size, 0, 127, nullprob) - case arrow.INT16: - return cts.rng.Int16(size, 0, 127, nullprob) - case arrow.UINT16: - return cts.rng.Uint16(size, 0, 127, nullprob) - case arrow.INT32: - return cts.rng.Int32(size, 0, 127, nullprob) - case arrow.UINT32: - return cts.rng.Uint32(size, 0, 127, nullprob) - case arrow.INT64: - return cts.rng.Int64(size, 0, 127, nullprob) - case arrow.UINT64: - return cts.rng.Uint64(size, 0, 127, nullprob) - case arrow.FLOAT32: - return cts.rng.Float32(size, 0, 127, nullprob) - case arrow.FLOAT64: - return cts.rng.Float64(size, 0, 127, nullprob) - case arrow.NULL: - return array.NewNull(int(size)) - case arrow.STRING: - return cts.rng.String(size, 0, 15, nullprob) - case arrow.LARGE_STRING: - return cts.rng.LargeString(size, 0, 15, nullprob) - case arrow.STRING_VIEW: - return cts.rng.StringView(size, 0, 20, nullprob) - case arrow.LIST: - valuesSize := size * 4 - values := cts.rng.Int8(valuesSize, 0, 127, nullprob).(*array.Int8) - defer values.Release() - offsetsVector := cts.offsets(int32(valuesSize), int32(size)) - // ensure the first and last offsets encompass the whole values - offsetsVector[0] = 0 - offsetsVector[len(offsetsVector)-1] = int32(valuesSize) - - bldr := array.NewListBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8) - defer bldr.Release() - - valid := make([]bool, len(offsetsVector)-1) - for i := range valid { - valid[i] = true - } - bldr.AppendValues(offsetsVector, valid) - vb := bldr.ValueBuilder().(*array.Int8Builder) - for i := 0; i < values.Len(); i++ { - if values.IsValid(i) { - vb.Append(values.Value(i)) - } else { - vb.AppendNull() - } - } - return bldr.NewArray() - case arrow.LARGE_LIST: - valuesSize := size * 8 - values := cts.rng.Int8(valuesSize, 0, 127, nullprob).(*array.Int8) - defer values.Release() - offsetsVector := cts.largeoffsets(int64(valuesSize), int32(size)) - // ensure the first and last offsets encompass the whole values - offsetsVector[0] = 0 - offsetsVector[len(offsetsVector)-1] = int64(valuesSize) - - bldr := array.NewLargeListBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8) - defer bldr.Release() - - valid := make([]bool, len(offsetsVector)-1) - for i := range valid { - valid[i] = true - } - bldr.AppendValues(offsetsVector, valid) - vb := bldr.ValueBuilder().(*array.Int8Builder) - for i := 0; i < values.Len(); i++ { - if values.IsValid(i) { - vb.Append(values.Value(i)) - } else { - vb.AppendNull() - } - } - return bldr.NewArray() - case arrow.LIST_VIEW: - arr := cts.rng.ListView(cts.dt.(arrow.VarLenListLikeType), size, 0, 20, nullprob) - err := arr.ValidateFull() - cts.NoError(err) - return arr - case arrow.LARGE_LIST_VIEW: - arr := cts.rng.LargeListView(cts.dt.(arrow.VarLenListLikeType), size, 0, 20, nullprob) - err := arr.ValidateFull() - cts.NoError(err) - return arr - case arrow.FIXED_SIZE_LIST: - const listsize = 3 - valuesSize := size * listsize - values := cts.rng.Int8(valuesSize, 0, 127, nullprob) - defer values.Release() - - data := array.NewData(arrow.FixedSizeListOf(listsize, arrow.PrimitiveTypes.Int8), int(size), []*memory.Buffer{nil}, []arrow.ArrayData{values.Data()}, 0, 0) - defer data.Release() - return array.MakeFromData(data) - case arrow.STRUCT: - foo := cts.rng.Int8(size, 0, 127, nullprob) - defer foo.Release() - bar := cts.rng.Float64(size, 0, 127, nullprob) - defer bar.Release() - baz := cts.rng.Boolean(size, 0.5, nullprob) - defer baz.Release() - - data := array.NewData(arrow.StructOf( - arrow.Field{Name: "foo", Type: foo.DataType(), Nullable: true}, - arrow.Field{Name: "bar", Type: bar.DataType(), Nullable: true}, - arrow.Field{Name: "baz", Type: baz.DataType(), Nullable: true}), - int(size), []*memory.Buffer{nil}, []arrow.ArrayData{foo.Data(), bar.Data(), baz.Data()}, 0, 0) - defer data.Release() - return array.NewStructData(data) - case arrow.MAP: - valuesSize := size * 4 - keys := cts.rng.Uint16(valuesSize, 0, 127, 0).(*array.Uint16) - defer keys.Release() - values := cts.rng.Int8(valuesSize, 0, 127, nullprob).(*array.Int8) - defer values.Release() - - offsetsVector := cts.offsets(int32(valuesSize), int32(size)) - offsetsVector[0] = 0 - offsetsVector[len(offsetsVector)-1] = int32(valuesSize) - - bldr := array.NewMapBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Int8, false) - defer bldr.Release() - - kb := bldr.KeyBuilder().(*array.Uint16Builder) - vb := bldr.ItemBuilder().(*array.Int8Builder) - - valid := make([]bool, len(offsetsVector)-1) - for i := range valid { - valid[i] = true - } - bldr.AppendValues(offsetsVector, valid) - for i := 0; i < int(valuesSize); i++ { - kb.Append(keys.Value(i)) - if values.IsValid(i) { - vb.Append(values.Value(i)) - } else { - vb.AppendNull() - } - } - return bldr.NewArray() - case arrow.DICTIONARY: - indices := cts.rng.Int32(size, 0, 127, nullprob) - defer indices.Release() - dict := cts.rng.Float64(128, 0.0, 127.0, nullprob) - defer dict.Release() - return array.NewDictionaryArray(cts.dt, indices, dict) - default: - return nil - } -} - -func (cts *ConcatTestSuite) slices(arr arrow.Array, offsets []int32) []arrow.Array { - slices := make([]arrow.Array, len(offsets)-1) - for i := 0; i != len(slices); i++ { - slices[i] = array.NewSlice(arr, int64(offsets[i]), int64(offsets[i+1])) - } - return slices -} - -func (cts *ConcatTestSuite) checkTrailingBitsZeroed(bitmap *memory.Buffer, length int64) { - if preceding := bitutil.PrecedingBitmask[length%8]; preceding != 0 { - lastByte := bitmap.Bytes()[length/8] - cts.Equal(lastByte&preceding, lastByte, length, preceding) - } -} - -func (cts *ConcatTestSuite) offsets(length, slicecount int32) []int32 { - offsets := make([]int32, slicecount+1) - dist := rand.New(rand.NewSource(cts.seed)) - for i := range offsets { - offsets[i] = dist.Int31n(length + 1) - } - sort.Slice(offsets, func(i, j int) bool { return offsets[i] < offsets[j] }) - return offsets -} - -func (cts *ConcatTestSuite) largeoffsets(length int64, slicecount int32) []int64 { - offsets := make([]int64, slicecount+1) - dist := rand.New(rand.NewSource(cts.seed)) - for i := range offsets { - offsets[i] = dist.Int63n(length + 1) - } - sort.Slice(offsets, func(i, j int) bool { return offsets[i] < offsets[j] }) - return offsets -} - -func (cts *ConcatTestSuite) TestCheckConcat() { - for _, sz := range cts.sizes { - cts.Run(fmt.Sprintf("size %d", sz), func() { - offsets := cts.offsets(sz, 3) - for _, np := range cts.nullProbs { - cts.Run(fmt.Sprintf("nullprob %0.2f", np), func() { - scopedMem := memory.NewCheckedAllocatorScope(cts.mem) - defer scopedMem.CheckSize(cts.T()) - - arr := cts.generateArr(int64(sz), np) - defer arr.Release() - expected := array.NewSlice(arr, int64(offsets[0]), int64(offsets[len(offsets)-1])) - defer expected.Release() - - slices := cts.slices(arr, offsets) - for _, s := range slices { - if s.DataType().ID() == arrow.LIST_VIEW { - err := s.(*array.ListView).ValidateFull() - cts.NoError(err) - } - defer s.Release() - } - - actual, err := array.Concatenate(slices, cts.mem) - cts.NoError(err) - if arr.DataType().ID() == arrow.LIST_VIEW { - lv := actual.(*array.ListView) - err := lv.ValidateFull() - cts.NoError(err) - } - defer actual.Release() - - cts.Truef(array.Equal(expected, actual), "expected: %s\ngot: %s\n", expected, actual) - if len(actual.Data().Buffers()) > 0 { - if actual.Data().Buffers()[0] != nil { - cts.checkTrailingBitsZeroed(actual.Data().Buffers()[0], int64(actual.Len())) - } - if actual.DataType().ID() == arrow.BOOL { - cts.checkTrailingBitsZeroed(actual.Data().Buffers()[1], int64(actual.Len())) - } - } - }) - } - }) - } -} - -func TestConcatDifferentDicts(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - t.Run("simple dicts", func(t *testing.T) { - scopedMem := memory.NewCheckedAllocatorScope(mem) - defer scopedMem.CheckSize(t) - - dictType := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: arrow.BinaryTypes.String} - dict1, err := array.DictArrayFromJSON(mem, dictType, `[1, 2, null, 3, 0]`, `["A0", "A1", "A2", "A3"]`) - require.NoError(t, err) - defer dict1.Release() - dict2, err := array.DictArrayFromJSON(mem, dictType, `[null, 4, 2, 1]`, `["B0", "B1", "B2", "B3", "B4"]`) - require.NoError(t, err) - defer dict2.Release() - - expected, err := array.DictArrayFromJSON(mem, dictType, `[1, 2, null, 3, 0, null, 8, 6, 5]`, `["A0", "A1", "A2", "A3", "B0", "B1", "B2", "B3", "B4"]`) - require.NoError(t, err) - defer expected.Release() - - concat, err := array.Concatenate([]arrow.Array{dict1, dict2}, mem) - assert.NoError(t, err) - defer concat.Release() - assert.Truef(t, array.Equal(concat, expected), "got: %s, expected: %s", concat, expected) - }) - - t.Run("larger", func(t *testing.T) { - scopedMem := memory.NewCheckedAllocatorScope(mem) - defer scopedMem.CheckSize(t) - - const size = 500 - dictType := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint16, ValueType: arrow.BinaryTypes.String} - - idxBuilder, exIdxBldr := array.NewUint16Builder(mem), array.NewUint16Builder(mem) - defer idxBuilder.Release() - defer exIdxBldr.Release() - idxBuilder.Reserve(size) - exIdxBldr.Reserve(size * 2) - - for i := uint16(0); i < size; i++ { - idxBuilder.UnsafeAppend(i) - exIdxBldr.UnsafeAppend(i) - } - for i := uint16(size); i < 2*size; i++ { - exIdxBldr.UnsafeAppend(i) - } - - indices, expIndices := idxBuilder.NewArray(), exIdxBldr.NewArray() - defer indices.Release() - defer expIndices.Release() - - // create three dictionaries. First maps i -> "{i}", second maps i->"{500+i}", - // each for 500 values and the third maps i -> "{i}" but for 1000 values. - // first and second concatenated should end up equaling the third. All strings - // padded to length 8 so we can know the size ahead of time. - valuesOneBldr, valuesTwoBldr := array.NewStringBuilder(mem), array.NewStringBuilder(mem) - defer valuesOneBldr.Release() - defer valuesTwoBldr.Release() - - valuesOneBldr.Reserve(size) - valuesTwoBldr.Reserve(size) - valuesOneBldr.ReserveData(size * 8) - valuesTwoBldr.ReserveData(size * 8) - - for i := 0; i < size; i++ { - valuesOneBldr.Append(fmt.Sprintf("%-8d", i)) - valuesTwoBldr.Append(fmt.Sprintf("%-8d", i+size)) - } - - dict1, dict2 := valuesOneBldr.NewArray(), valuesTwoBldr.NewArray() - defer dict1.Release() - defer dict2.Release() - expectedDict, err := array.Concatenate([]arrow.Array{dict1, dict2}, mem) - require.NoError(t, err) - defer expectedDict.Release() - - one, two := array.NewDictionaryArray(dictType, indices, dict1), array.NewDictionaryArray(dictType, indices, dict2) - defer one.Release() - defer two.Release() - expected := array.NewDictionaryArray(dictType, expIndices, expectedDict) - defer expected.Release() - - combined, err := array.Concatenate([]arrow.Array{one, two}, mem) - assert.NoError(t, err) - defer combined.Release() - assert.Truef(t, array.Equal(combined, expected), "got: %s, expected: %s", combined, expected) - }) -} - -func TestConcatDictionaryPartialOverlap(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dt := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: arrow.BinaryTypes.String} - dictOne, err := array.DictArrayFromJSON(mem, dt, `[1, 2, null, 3, 0]`, `["A0", "A1", "C2", "C3"]`) - require.NoError(t, err) - defer dictOne.Release() - - dictTwo, err := array.DictArrayFromJSON(mem, dt, `[null, 4, 2, 1]`, `["B0", "B1", "C2", "C3", "B4"]`) - require.NoError(t, err) - defer dictTwo.Release() - - expected, err := array.DictArrayFromJSON(mem, dt, `[1, 2, null, 3, 0, null, 6, 2, 5]`, `["A0", "A1", "C2", "C3", "B0", "B1", "B4"]`) - require.NoError(t, err) - defer expected.Release() - - actual, err := array.Concatenate([]arrow.Array{dictOne, dictTwo}, mem) - assert.NoError(t, err) - defer actual.Release() - - assert.Truef(t, array.Equal(actual, expected), "got: %s, expected: %s", actual, expected) -} - -func TestConcatDictionaryDifferentSizeIndex(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dt := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: arrow.BinaryTypes.String} - biggerDt := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint16, ValueType: arrow.BinaryTypes.String} - dictOne, err := array.DictArrayFromJSON(mem, dt, `[0]`, `["A0"]`) - require.NoError(t, err) - defer dictOne.Release() - - dictTwo, err := array.DictArrayFromJSON(mem, biggerDt, `[0]`, `["B0"]`) - require.NoError(t, err) - defer dictTwo.Release() - - arr, err := array.Concatenate([]arrow.Array{dictOne, dictTwo}, mem) - assert.Nil(t, arr) - assert.Error(t, err) -} - -func TestConcatDictionaryUnifyNullInDict(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dt := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: arrow.BinaryTypes.String} - dictOne, err := array.DictArrayFromJSON(mem, dt, `[0, 1]`, `[null, "A"]`) - require.NoError(t, err) - defer dictOne.Release() - - dictTwo, err := array.DictArrayFromJSON(mem, dt, `[0, 1]`, `[null, "B"]`) - require.NoError(t, err) - defer dictTwo.Release() - - expected, err := array.DictArrayFromJSON(mem, dt, `[0, 1, 0, 2]`, `[null, "A", "B"]`) - require.NoError(t, err) - defer expected.Release() - - actual, err := array.Concatenate([]arrow.Array{dictOne, dictTwo}, mem) - assert.NoError(t, err) - defer actual.Release() - - assert.Truef(t, array.Equal(actual, expected), "got: %s, expected: %s", actual, expected) -} - -func TestConcatDictionaryEnlargedIndices(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - const size = math.MaxUint8 + 1 - dt := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: arrow.PrimitiveTypes.Uint16} - - idxBuilder := array.NewUint8Builder(mem) - defer idxBuilder.Release() - idxBuilder.Reserve(size) - for i := 0; i < size; i++ { - idxBuilder.UnsafeAppend(uint8(i)) - } - indices := idxBuilder.NewUint8Array() - defer indices.Release() - - valuesBuilder := array.NewUint16Builder(mem) - defer valuesBuilder.Release() - valuesBuilder.Reserve(size) - valuesBuilderTwo := array.NewUint16Builder(mem) - defer valuesBuilderTwo.Release() - valuesBuilderTwo.Reserve(size) - - for i := uint16(0); i < size; i++ { - valuesBuilder.UnsafeAppend(i) - valuesBuilderTwo.UnsafeAppend(i + size) - } - - dict1, dict2 := valuesBuilder.NewUint16Array(), valuesBuilderTwo.NewUint16Array() - defer dict1.Release() - defer dict2.Release() - - d1, d2 := array.NewDictionaryArray(dt, indices, dict1), array.NewDictionaryArray(dt, indices, dict2) - defer d1.Release() - defer d2.Release() - - _, err := array.Concatenate([]arrow.Array{d1, d2}, mem) - assert.Error(t, err) - - biggerDt := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint16, ValueType: arrow.PrimitiveTypes.Uint16} - bigger1, bigger2 := array.NewDictionaryArray(biggerDt, dict1, dict1), array.NewDictionaryArray(biggerDt, dict1, dict2) - defer bigger1.Release() - defer bigger2.Release() - - combined, err := array.Concatenate([]arrow.Array{bigger1, bigger2}, mem) - assert.NoError(t, err) - defer combined.Release() - - assert.EqualValues(t, size*2, combined.Len()) -} - -func TestConcatDictionaryNullSlots(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dt := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint32, ValueType: arrow.BinaryTypes.String} - dict1, err := array.DictArrayFromJSON(mem, dt, `[null, null, null, null]`, `[]`) - require.NoError(t, err) - defer dict1.Release() - - dict2, err := array.DictArrayFromJSON(mem, dt, `[null, null, null, null, 0, 1]`, `["a", "b"]`) - require.NoError(t, err) - defer dict2.Release() - - expected, err := array.DictArrayFromJSON(mem, dt, `[null, null, null, null, null, null, null, null, 0, 1]`, `["a", "b"]`) - require.NoError(t, err) - defer expected.Release() - - actual, err := array.Concatenate([]arrow.Array{dict1, dict2}, mem) - assert.NoError(t, err) - defer actual.Release() - - assert.Truef(t, array.Equal(actual, expected), "got: %s, expected: %s", actual, expected) -} - -func TestConcatRunEndEncoded(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - tests := []struct { - offsetType arrow.DataType - expected interface{} - }{ - {arrow.PrimitiveTypes.Int16, []int16{1, 11, 111, 211, 311, 411, 500, 600}}, - {arrow.PrimitiveTypes.Int32, []int32{1, 11, 111, 211, 311, 411, 500, 600}}, - {arrow.PrimitiveTypes.Int64, []int64{1, 11, 111, 211, 311, 411, 500, 600}}, - } - - for _, tt := range tests { - t.Run(tt.offsetType.String(), func(t *testing.T) { - - arrs := make([]arrow.Array, 0) - bldr := array.NewRunEndEncodedBuilder(mem, tt.offsetType, arrow.BinaryTypes.String) - defer bldr.Release() - valBldr := bldr.ValueBuilder().(*array.StringBuilder) - - bldr.Append(1) - valBldr.Append("Hello") - bldr.AppendNull() - bldr.ContinueRun(9) - - bldr.Append(100) - valBldr.Append("World") - arrs = append(arrs, bldr.NewArray()) - - bldr.Append(100) - valBldr.Append("Goku") - bldr.Append(100) - valBldr.Append("Gohan") - bldr.Append(100) - valBldr.Append("Goten") - arrs = append(arrs, bldr.NewArray()) - - bldr.AppendNull() - bldr.ContinueRun(99) - bldr.Append(100) - valBldr.Append("Vegeta") - bldr.Append(100) - valBldr.Append("Trunks") - next := bldr.NewArray() - defer next.Release() - // remove the initial null with an offset and dig into the next run - arrs = append(arrs, array.NewSlice(next, 111, int64(next.Len()))) - - for _, a := range arrs { - defer a.Release() - } - - result, err := array.Concatenate(arrs, mem) - assert.NoError(t, err) - defer result.Release() - - rle := result.(*array.RunEndEncoded) - assert.EqualValues(t, 8, rle.GetPhysicalLength()) - assert.EqualValues(t, 0, rle.GetPhysicalOffset()) - - var values interface{} - switch endsArr := rle.RunEndsArr().(type) { - case *array.Int16: - values = endsArr.Int16Values() - case *array.Int32: - values = endsArr.Int32Values() - case *array.Int64: - values = endsArr.Int64Values() - } - assert.Equal(t, tt.expected, values) - - expectedValues, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, - strings.NewReader(`["Hello", null, "World", "Goku", "Gohan", "Goten", "Vegeta", "Trunks"]`)) - defer expectedValues.Release() - assert.Truef(t, array.Equal(expectedValues, rle.Values()), "expected: %s\ngot: %s", expectedValues, rle.Values()) - }) - } -} - -func TestConcatAlmostOverflowRunEndEncoding(t *testing.T) { - tests := []struct { - offsetType arrow.DataType - max uint64 - }{ - {arrow.PrimitiveTypes.Int16, math.MaxInt16}, - {arrow.PrimitiveTypes.Int32, math.MaxInt32}, - {arrow.PrimitiveTypes.Int64, math.MaxInt64}, - } - - for _, tt := range tests { - t.Run(tt.offsetType.String(), func(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - arrs := make([]arrow.Array, 0) - bldr := array.NewRunEndEncodedBuilder(mem, tt.offsetType, arrow.BinaryTypes.String) - defer bldr.Release() - valBldr := bldr.ValueBuilder().(*array.StringBuilder) - - // max is not evenly divisible by 4, so we add one to each - // to account for that so our final concatenate will overflow - bldr.Append((tt.max / 4) + 1) - valBldr.Append("foo") - bldr.Append((tt.max / 4) + 1) - valBldr.Append("bar") - arrs = append(arrs, bldr.NewArray()) - - bldr.Append((tt.max / 4) + 1) - valBldr.Append("baz") - bldr.Append((tt.max / 4)) - valBldr.Append("bop") - arrs = append(arrs, bldr.NewArray()) - - defer func() { - for _, a := range arrs { - a.Release() - } - }() - - arr, err := array.Concatenate(arrs, mem) - assert.NoError(t, err) - defer arr.Release() - }) - } -} - -func TestConcatOverflowRunEndEncoding(t *testing.T) { - tests := []struct { - offsetType arrow.DataType - max uint64 - }{ - {arrow.PrimitiveTypes.Int16, math.MaxInt16}, - {arrow.PrimitiveTypes.Int32, math.MaxInt32}, - {arrow.PrimitiveTypes.Int64, math.MaxInt64}, - } - - for _, tt := range tests { - t.Run(tt.offsetType.String(), func(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - arrs := make([]arrow.Array, 0) - bldr := array.NewRunEndEncodedBuilder(mem, tt.offsetType, arrow.BinaryTypes.String) - defer bldr.Release() - valBldr := bldr.ValueBuilder().(*array.StringBuilder) - - // max is not evenly divisible by 4, so we add one to each - // to account for that so our final concatenate will overflow - bldr.Append((tt.max / 4) + 1) - valBldr.Append("foo") - bldr.Append((tt.max / 4) + 1) - valBldr.Append("bar") - arrs = append(arrs, bldr.NewArray()) - - bldr.Append((tt.max / 4) + 1) - valBldr.Append("baz") - bldr.Append((tt.max / 4) + 1) - valBldr.Append("bop") - arrs = append(arrs, bldr.NewArray()) - - defer func() { - for _, a := range arrs { - a.Release() - } - }() - - arr, err := array.Concatenate(arrs, mem) - assert.Nil(t, arr) - assert.ErrorIs(t, err, arrow.ErrInvalid) - }) - } -} - -func TestConcatPanic(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - allocator := &panicAllocator{ - n: 400, - Allocator: mem, - } - - g := gen.NewRandomArrayGenerator(0, memory.DefaultAllocator) - ar1 := g.ArrayOf(arrow.STRING, 32, 0) - defer ar1.Release() - ar2 := g.ArrayOf(arrow.STRING, 32, 0) - defer ar2.Release() - - concat, err := array.Concatenate([]arrow.Array{ar1, ar2}, allocator) - assert.Error(t, err) - assert.Nil(t, concat) -} diff --git a/go/arrow/array/data.go b/go/arrow/array/data.go deleted file mode 100644 index 19513ebaacf50..0000000000000 --- a/go/arrow/array/data.go +++ /dev/null @@ -1,277 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "hash/maphash" - "math/bits" - "sync/atomic" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" -) - -// Data represents the memory and metadata of an Arrow array. -type Data struct { - refCount int64 - dtype arrow.DataType - nulls int - offset int - length int - - // for dictionary arrays: buffers will be the null validity bitmap and the indexes that reference - // values in the dictionary member. childData would be empty in a dictionary array - buffers []*memory.Buffer // TODO(sgc): should this be an interface? - childData []arrow.ArrayData // TODO(sgc): managed by ListArray, StructArray and UnionArray types - dictionary *Data // only populated for dictionary arrays -} - -// NewData creates a new Data. -func NewData(dtype arrow.DataType, length int, buffers []*memory.Buffer, childData []arrow.ArrayData, nulls, offset int) *Data { - for _, b := range buffers { - if b != nil { - b.Retain() - } - } - - for _, child := range childData { - if child != nil { - child.Retain() - } - } - - return &Data{ - refCount: 1, - dtype: dtype, - nulls: nulls, - length: length, - offset: offset, - buffers: buffers, - childData: childData, - } -} - -// NewDataWithDictionary creates a new data object, but also sets the provided dictionary into the data if it's not nil -func NewDataWithDictionary(dtype arrow.DataType, length int, buffers []*memory.Buffer, nulls, offset int, dict *Data) *Data { - data := NewData(dtype, length, buffers, nil, nulls, offset) - if dict != nil { - dict.Retain() - } - data.dictionary = dict - return data -} - -func (d *Data) Copy() *Data { - // don't pass the slices directly, otherwise it retains the connection - // we need to make new slices and populate them with the same pointers - bufs := make([]*memory.Buffer, len(d.buffers)) - copy(bufs, d.buffers) - children := make([]arrow.ArrayData, len(d.childData)) - copy(children, d.childData) - - data := NewData(d.dtype, d.length, bufs, children, d.nulls, d.offset) - data.SetDictionary(d.dictionary) - return data -} - -// Reset sets the Data for re-use. -func (d *Data) Reset(dtype arrow.DataType, length int, buffers []*memory.Buffer, childData []arrow.ArrayData, nulls, offset int) { - // Retain new buffers before releasing existing buffers in-case they're the same ones to prevent accidental premature - // release. - for _, b := range buffers { - if b != nil { - b.Retain() - } - } - for _, b := range d.buffers { - if b != nil { - b.Release() - } - } - d.buffers = buffers - - // Retain new children data before releasing existing children data in-case they're the same ones to prevent accidental - // premature release. - for _, d := range childData { - if d != nil { - d.Retain() - } - } - for _, d := range d.childData { - if d != nil { - d.Release() - } - } - d.childData = childData - - d.dtype = dtype - d.length = length - d.nulls = nulls - d.offset = offset -} - -// Retain increases the reference count by 1. -// Retain may be called simultaneously from multiple goroutines. -func (d *Data) Retain() { - atomic.AddInt64(&d.refCount, 1) -} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -// Release may be called simultaneously from multiple goroutines. -func (d *Data) Release() { - debug.Assert(atomic.LoadInt64(&d.refCount) > 0, "too many releases") - - if atomic.AddInt64(&d.refCount, -1) == 0 { - for _, b := range d.buffers { - if b != nil { - b.Release() - } - } - - for _, b := range d.childData { - b.Release() - } - - if d.dictionary != nil { - d.dictionary.Release() - } - d.dictionary, d.buffers, d.childData = nil, nil, nil - } -} - -// DataType returns the DataType of the data. -func (d *Data) DataType() arrow.DataType { return d.dtype } - -func (d *Data) SetNullN(n int) { d.nulls = n } - -// NullN returns the number of nulls. -func (d *Data) NullN() int { return d.nulls } - -// Len returns the length. -func (d *Data) Len() int { return d.length } - -// Offset returns the offset. -func (d *Data) Offset() int { return d.offset } - -// Buffers returns the buffers. -func (d *Data) Buffers() []*memory.Buffer { return d.buffers } - -func (d *Data) Children() []arrow.ArrayData { return d.childData } - -// Dictionary returns the ArrayData object for the dictionary member, or nil -func (d *Data) Dictionary() arrow.ArrayData { return d.dictionary } - -// SetDictionary allows replacing the dictionary for this particular Data object -func (d *Data) SetDictionary(dict arrow.ArrayData) { - if d.dictionary != nil { - d.dictionary.Release() - d.dictionary = nil - } - if dict.(*Data) != nil { - dict.Retain() - d.dictionary = dict.(*Data) - } -} - -// SizeInBytes returns the size of the Data and any children and/or dictionary in bytes by -// recursively examining the nested structures of children and/or dictionary. -// The value returned is an upper-bound since offset is not taken into account. -func (d *Data) SizeInBytes() uint64 { - var size uint64 - - if d == nil { - return 0 - } - - for _, b := range d.Buffers() { - if b != nil { - size += uint64(b.Len()) - } - } - for _, c := range d.Children() { - size += c.SizeInBytes() - } - if d.dictionary != nil { - size += d.dictionary.SizeInBytes() - } - - return size -} - -// NewSliceData returns a new slice that shares backing data with the input. -// The returned Data slice starts at i and extends j-i elements, such as: -// -// slice := data[i:j] -// -// The returned value must be Release'd after use. -// -// NewSliceData panics if the slice is outside the valid range of the input Data. -// NewSliceData panics if j < i. -func NewSliceData(data arrow.ArrayData, i, j int64) arrow.ArrayData { - if j > int64(data.Len()) || i > j || data.Offset()+int(i) > data.Offset()+data.Len() { - panic("arrow/array: index out of range") - } - - for _, b := range data.Buffers() { - if b != nil { - b.Retain() - } - } - - for _, child := range data.Children() { - if child != nil { - child.Retain() - } - } - - if data.(*Data).dictionary != nil { - data.(*Data).dictionary.Retain() - } - - o := &Data{ - refCount: 1, - dtype: data.DataType(), - nulls: UnknownNullCount, - length: int(j - i), - offset: data.Offset() + int(i), - buffers: data.Buffers(), - childData: data.Children(), - dictionary: data.(*Data).dictionary, - } - - if data.NullN() == 0 { - o.nulls = 0 - } - - return o -} - -func Hash(h *maphash.Hash, data arrow.ArrayData) { - a := data.(*Data) - - h.Write((*[bits.UintSize / 8]byte)(unsafe.Pointer(&a.length))[:]) - h.Write((*[bits.UintSize / 8]byte)(unsafe.Pointer(&a.length))[:]) - if len(a.buffers) > 0 && a.buffers[0] != nil { - h.Write(a.buffers[0].Bytes()) - } - for _, c := range a.childData { - Hash(h, c) - } -} diff --git a/go/arrow/array/data_test.go b/go/arrow/array/data_test.go deleted file mode 100644 index 2cfc64fbe2d7e..0000000000000 --- a/go/arrow/array/data_test.go +++ /dev/null @@ -1,138 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "slices" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestDataReset(t *testing.T) { - var ( - buffers1 = make([]*memory.Buffer, 0, 3) - buffers2 = make([]*memory.Buffer, 0, 3) - ) - for i := 0; i < cap(buffers1); i++ { - buffers1 = append(buffers1, memory.NewBufferBytes([]byte("some-bytes1"))) - buffers2 = append(buffers2, memory.NewBufferBytes([]byte("some-bytes2"))) - } - - data := NewData(&arrow.StringType{}, 10, buffers1, nil, 0, 0) - data.Reset(&arrow.Int64Type{}, 5, buffers2, nil, 1, 2) - - for i := 0; i < 2; i++ { - assert.Equal(t, buffers2, data.Buffers()) - assert.Equal(t, &arrow.Int64Type{}, data.DataType()) - assert.Equal(t, 1, data.NullN()) - assert.Equal(t, 2, data.Offset()) - assert.Equal(t, 5, data.Len()) - - // Make sure it works when resetting the data with its own buffers (new buffers are retained - // before old ones are released.) - data.Reset(&arrow.Int64Type{}, 5, data.Buffers(), nil, 1, 2) - } -} - -func TestSizeInBytes(t *testing.T) { - var buffers1 = make([]*memory.Buffer, 0, 3) - - for i := 0; i < cap(buffers1); i++ { - buffers1 = append(buffers1, memory.NewBufferBytes([]byte("15-bytes-buffer"))) - } - data := NewData(&arrow.StringType{}, 10, buffers1, nil, 0, 0) - var arrayData arrow.ArrayData = data - dataWithChild := NewData(&arrow.StringType{}, 10, buffers1, []arrow.ArrayData{arrayData}, 0, 0) - - buffers2 := slices.Clone(buffers1) - buffers2[0] = nil - dataWithNilBuffer := NewData(&arrow.StringType{}, 10, buffers2, nil, 0, 0) - - t.Run("nil buffers", func(t *testing.T) { - expectedSize := uint64(30) - if actualSize := dataWithNilBuffer.SizeInBytes(); actualSize != expectedSize { - t.Errorf("expected size %d, got %d", expectedSize, actualSize) - } - }) - - t.Run("buffers only", func(t *testing.T) { - expectedSize := uint64(45) - if actualSize := data.SizeInBytes(); actualSize != expectedSize { - t.Errorf("expected size %d, got %d", expectedSize, actualSize) - } - }) - - t.Run("buffers and child data", func(t *testing.T) { - // 45 bytes in buffers, 45 bytes in child data - expectedSize := uint64(90) - if actualSize := dataWithChild.SizeInBytes(); actualSize != expectedSize { - t.Errorf("expected size %d, got %d", expectedSize, actualSize) - } - }) - - t.Run("buffers and nested child data", func(t *testing.T) { - var dataWithChildArrayData arrow.ArrayData = dataWithChild - var dataWithNestedChild arrow.ArrayData = NewData(&arrow.StringType{}, 10, buffers1, []arrow.ArrayData{dataWithChildArrayData}, 0, 0) - // 45 bytes in buffers, 90 bytes in nested child data - expectedSize := uint64(135) - if actualSize := dataWithNestedChild.SizeInBytes(); actualSize != expectedSize { - t.Errorf("expected size %d, got %d", expectedSize, actualSize) - } - }) - - t.Run("buffers and dictionary", func(t *testing.T) { - dictData := data - dataWithDict := NewDataWithDictionary(&arrow.StringType{}, 10, buffers1, 0, 0, dictData) - // 45 bytes in buffers, 45 bytes in dictionary - expectedSize := uint64(90) - if actualSize := dataWithDict.SizeInBytes(); actualSize != expectedSize { - t.Errorf("expected size %d, got %d", expectedSize, actualSize) - } - }) - - t.Run("sliced data", func(t *testing.T) { - sliceData := NewSliceData(arrayData, 3, 5) - // offset is not taken into account in SizeInBytes() - expectedSize := uint64(45) - if actualSize := sliceData.SizeInBytes(); actualSize != expectedSize { - t.Errorf("expected size %d, got %d", expectedSize, actualSize) - } - }) - - t.Run("sliced data with children", func(t *testing.T) { - var dataWithChildArrayData arrow.ArrayData = dataWithChild - sliceData := NewSliceData(dataWithChildArrayData, 3, 5) - // offset is not taken into account in SizeInBytes() - expectedSize := uint64(90) - if actualSize := sliceData.SizeInBytes(); actualSize != expectedSize { - t.Errorf("expected size %d, got %d", expectedSize, actualSize) - } - }) - - t.Run("buffers with children which are sliced data", func(t *testing.T) { - sliceData := NewSliceData(arrayData, 3, 5) - dataWithSlicedChildren := NewData(&arrow.StringType{}, 10, buffers1, []arrow.ArrayData{sliceData}, 0, 0) - // offset is not taken into account in SizeInBytes() - expectedSize := uint64(90) - if actualSize := dataWithSlicedChildren.SizeInBytes(); actualSize != expectedSize { - t.Errorf("expected size %d, got %d", expectedSize, actualSize) - } - }) -} diff --git a/go/arrow/array/decimal128.go b/go/arrow/array/decimal128.go deleted file mode 100644 index fd9e53f7f4c06..0000000000000 --- a/go/arrow/array/decimal128.go +++ /dev/null @@ -1,368 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "fmt" - "math/big" - "reflect" - "strings" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -// A type which represents an immutable sequence of 128-bit decimal values. -type Decimal128 struct { - array - - values []decimal128.Num -} - -func NewDecimal128Data(data arrow.ArrayData) *Decimal128 { - a := &Decimal128{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *Decimal128) Value(i int) decimal128.Num { return a.values[i] } - -func (a *Decimal128) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return a.GetOneForMarshal(i).(string) -} - -func (a *Decimal128) Values() []decimal128.Num { return a.values } - -func (a *Decimal128) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", a.Value(i)) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Decimal128) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Decimal128Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} -func (a *Decimal128) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - typ := a.DataType().(*arrow.Decimal128Type) - n := a.Value(i) - scale := typ.Scale - f := (&big.Float{}).SetInt(n.BigInt()) - if scale < 0 { - f.SetPrec(128).Mul(f, (&big.Float{}).SetInt(decimal128.GetScaleMultiplier(int(-scale)).BigInt())) - } else { - f.SetPrec(128).Quo(f, (&big.Float{}).SetInt(decimal128.GetScaleMultiplier(int(scale)).BigInt())) - } - return f.Text('g', int(typ.Precision)) -} - -// ["1.23", ] -func (a *Decimal128) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - vals[i] = a.GetOneForMarshal(i) - } - return json.Marshal(vals) -} - -func arrayEqualDecimal128(left, right *Decimal128) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -type Decimal128Builder struct { - builder - - dtype *arrow.Decimal128Type - data *memory.Buffer - rawData []decimal128.Num -} - -func NewDecimal128Builder(mem memory.Allocator, dtype *arrow.Decimal128Type) *Decimal128Builder { - return &Decimal128Builder{ - builder: builder{refCount: 1, mem: mem}, - dtype: dtype, - } -} - -func (b *Decimal128Builder) Type() arrow.DataType { return b.dtype } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Decimal128Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Decimal128Builder) Append(v decimal128.Num) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Decimal128Builder) UnsafeAppend(v decimal128.Num) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Decimal128Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Decimal128Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Decimal128Builder) AppendEmptyValue() { - b.Append(decimal128.Num{}) -} - -func (b *Decimal128Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Decimal128Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Decimal128Builder) AppendValues(v []decimal128.Num, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - if len(v) > 0 { - arrow.Decimal128Traits.Copy(b.rawData[b.length:], v) - } - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Decimal128Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Decimal128Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Decimal128Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Decimal128Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Decimal128Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Decimal128Traits.BytesRequired(n)) - b.rawData = arrow.Decimal128Traits.CastFromBytes(b.data.Bytes()) - } -} - -// NewArray creates a Decimal128 array from the memory buffers used by the builder and resets the Decimal128Builder -// so it can be used to build a new array. -func (b *Decimal128Builder) NewArray() arrow.Array { - return b.NewDecimal128Array() -} - -// NewDecimal128Array creates a Decimal128 array from the memory buffers used by the builder and resets the Decimal128Builder -// so it can be used to build a new array. -func (b *Decimal128Builder) NewDecimal128Array() (a *Decimal128) { - data := b.newData() - a = NewDecimal128Data(data) - data.Release() - return -} - -func (b *Decimal128Builder) newData() (data *Data) { - bytesRequired := arrow.Decimal128Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Decimal128Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - val, err := decimal128.FromString(s, b.dtype.Precision, b.dtype.Scale) - if err != nil { - b.AppendNull() - return err - } - b.Append(val) - return nil -} - -func (b *Decimal128Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case float64: - val, err := decimal128.FromFloat64(v, b.dtype.Precision, b.dtype.Scale) - if err != nil { - return err - } - b.Append(val) - case string: - val, err := decimal128.FromString(v, b.dtype.Precision, b.dtype.Scale) - if err != nil { - return err - } - b.Append(val) - case json.Number: - val, err := decimal128.FromString(v.String(), b.dtype.Precision, b.dtype.Scale) - if err != nil { - return err - } - b.Append(val) - case nil: - b.AppendNull() - return nil - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(decimal128.Num{}), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Decimal128Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -// UnmarshalJSON will add the unmarshalled values to this builder. -// -// If the values are strings, they will get parsed with big.ParseFloat using -// a rounding mode of big.ToNearestAway currently. -func (b *Decimal128Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("decimal128 builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -var ( - _ arrow.Array = (*Decimal128)(nil) - _ Builder = (*Decimal128Builder)(nil) -) diff --git a/go/arrow/array/decimal128_test.go b/go/arrow/array/decimal128_test.go deleted file mode 100644 index 707a4f1a6c8d5..0000000000000 --- a/go/arrow/array/decimal128_test.go +++ /dev/null @@ -1,283 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestNewDecimal128Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewDecimal128Builder(mem, &arrow.Decimal128Type{Precision: 10, Scale: 1}) - defer ab.Release() - - ab.Retain() - ab.Release() - - want := []decimal128.Num{ - decimal128.New(1, 1), - decimal128.New(2, 2), - decimal128.New(3, 3), - {}, - decimal128.FromI64(-5), - decimal128.FromI64(-6), - {}, - decimal128.FromI64(8), - decimal128.FromI64(9), - decimal128.FromI64(10), - } - valids := []bool{true, true, true, false, true, true, false, true, true, true} - - for i, valid := range valids { - switch { - case valid: - ab.Append(want[i]) - default: - ab.AppendNull() - } - } - - // check state of builder before NewDecimal128Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewArray().(*array.Decimal128) - a.Retain() - a.Release() - - // check state of builder after NewDecimal128Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewDecimal128Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewDecimal128Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewDecimal128Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - - assert.Equal(t, want, a.Values(), "unexpected Decimal128Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Equal(t, 4, a.Data().Buffers()[0].Len(), "should be 4 bytes due to minBuilderCapacity") - assert.Len(t, a.Values(), 10, "unexpected length of Decimal128Values") - assert.Equal(t, 10*arrow.Decimal128SizeBytes, a.Data().Buffers()[1].Len()) - - a.Release() - ab.Append(decimal128.FromI64(7)) - ab.Append(decimal128.FromI64(8)) - - a = ab.NewDecimal128Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []decimal128.Num{decimal128.FromI64(7), decimal128.FromI64(8)}, a.Values()) - assert.Len(t, a.Values(), 2) - assert.Equal(t, 2*arrow.Decimal128SizeBytes, a.Data().Buffers()[1].Len()) - - a.Release() -} - -func TestDecimal128Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewDecimal128Builder(mem, &arrow.Decimal128Type{Precision: 10, Scale: 1}) - defer ab.Release() - - want := []decimal128.Num{decimal128.FromI64(3), decimal128.FromI64(4)} - - ab.AppendValues([]decimal128.Num{}, nil) - a := ab.NewDecimal128Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewDecimal128Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(want, nil) - a = ab.NewDecimal128Array() - assert.Equal(t, want, a.Values()) - a.Release() - - ab.AppendValues([]decimal128.Num{}, nil) - ab.AppendValues(want, nil) - a = ab.NewDecimal128Array() - assert.Equal(t, want, a.Values()) - a.Release() - - ab.AppendValues(want, nil) - ab.AppendValues([]decimal128.Num{}, nil) - a = ab.NewDecimal128Array() - assert.Equal(t, want, a.Values()) - a.Release() -} - -func TestDecimal128Slice(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.Decimal128Type{Precision: 10, Scale: 1} - b := array.NewDecimal128Builder(mem, dtype) - defer b.Release() - - var data = []decimal128.Num{ - decimal128.FromI64(-1), - decimal128.FromI64(+0), - decimal128.FromI64(+1), - decimal128.New(-4, 4), - } - b.AppendValues(data[:2], nil) - b.AppendNull() - b.Append(data[3]) - - arr := b.NewDecimal128Array() - defer arr.Release() - - if got, want := arr.Len(), len(data); got != want { - t.Fatalf("invalid array length: got=%d, want=%d", got, want) - } - - slice := array.NewSliceData(arr.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Decimal128) - if !ok { - t.Fatalf("could not type-assert to array.String") - } - - if got, want := v.String(), `[(null) {4 -4}]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - assert.Equal(t, array.NullValueStr, v.ValueStr(0)) - assert.Equal(t, "-7.378697629e+18", v.ValueStr(1)) - - if got, want := v.NullN(), 1; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - if got, want := v.Data().Offset(), 2; got != want { - t.Fatalf("invalid offset: got=%d, want=%d", got, want) - } -} - -func TestDecimal128StringRoundTrip(t *testing.T) { - dt := &arrow.Decimal128Type{Precision: 20, Scale: 5} - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewDecimal128Builder(mem, dt) - defer b.Release() - - values := []decimal128.Num{ - decimal128.New(1, 1), - decimal128.New(1, 2), - decimal128.New(1, 3), - {}, - decimal128.FromI64(-5), - decimal128.FromI64(-6), - {}, - decimal128.FromI64(8), - decimal128.FromI64(9), - decimal128.FromI64(10), - } - val1, err := decimal128.FromString("0.99", dt.Precision, dt.Scale) - if err != nil { - t.Fatal(err) - } - val2, err := decimal128.FromString("1234567890.12345", dt.Precision, dt.Scale) - if err != nil { - t.Fatal(err) - } - values = append(values, val1, val2) - - valid := []bool{true, true, true, false, true, true, false, true, true, true, true, true} - - b.AppendValues(values, valid) - - arr := b.NewArray().(*array.Decimal128) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewDecimal128Builder(mem, dt) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Decimal128) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestDecimal128GetOneForMarshal(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.Decimal128Type{Precision: 38, Scale: 20} - - b := array.NewDecimal128Builder(mem, dtype) - defer b.Release() - - cases := []struct { - give any - want any - }{ - {"1", "1"}, - {"1.25", "1.25"}, - {"0.99", "0.99"}, - {"1234567890.123456789", "1234567890.123456789"}, - {nil, nil}, - {"-0.99", "-0.99"}, - {"-1234567890.123456789", "-1234567890.123456789"}, - {"0.0000000000000000001", "1e-19"}, - } - for _, v := range cases { - if v.give == nil { - b.AppendNull() - continue - } - - dt, err := decimal128.FromString(v.give.(string), dtype.Precision, dtype.Scale) - if err != nil { - t.Fatal(err) - } - b.Append(dt) - } - - arr := b.NewDecimal128Array() - defer arr.Release() - - if got, want := arr.Len(), len(cases); got != want { - t.Fatalf("invalid array length: got=%d, want=%d", got, want) - } - - for i := range cases { - assert.Equalf(t, cases[i].want, arr.GetOneForMarshal(i), "unexpected value at index %d", i) - } -} diff --git a/go/arrow/array/decimal256.go b/go/arrow/array/decimal256.go deleted file mode 100644 index 6431306f969c3..0000000000000 --- a/go/arrow/array/decimal256.go +++ /dev/null @@ -1,368 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "fmt" - "math/big" - "reflect" - "strings" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/decimal256" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -// Decimal256 is a type that represents an immutable sequence of 256-bit decimal values. -type Decimal256 struct { - array - - values []decimal256.Num -} - -func NewDecimal256Data(data arrow.ArrayData) *Decimal256 { - a := &Decimal256{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *Decimal256) Value(i int) decimal256.Num { return a.values[i] } - -func (a *Decimal256) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return a.GetOneForMarshal(i).(string) -} - -func (a *Decimal256) Values() []decimal256.Num { return a.values } - -func (a *Decimal256) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", a.Value(i)) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Decimal256) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Decimal256Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Decimal256) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - typ := a.DataType().(*arrow.Decimal256Type) - n := a.Value(i) - scale := typ.Scale - f := (&big.Float{}).SetInt(n.BigInt()) - if scale < 0 { - f.SetPrec(256).Mul(f, (&big.Float{}).SetInt(decimal256.GetScaleMultiplier(int(-scale)).BigInt())) - } else { - f.SetPrec(256).Quo(f, (&big.Float{}).SetInt(decimal256.GetScaleMultiplier(int(scale)).BigInt())) - } - return f.Text('g', int(typ.Precision)) -} - -func (a *Decimal256) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - vals[i] = a.GetOneForMarshal(i) - } - return json.Marshal(vals) -} - -func arrayEqualDecimal256(left, right *Decimal256) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -type Decimal256Builder struct { - builder - - dtype *arrow.Decimal256Type - data *memory.Buffer - rawData []decimal256.Num -} - -func NewDecimal256Builder(mem memory.Allocator, dtype *arrow.Decimal256Type) *Decimal256Builder { - return &Decimal256Builder{ - builder: builder{refCount: 1, mem: mem}, - dtype: dtype, - } -} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Decimal256Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Decimal256Builder) Append(v decimal256.Num) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Decimal256Builder) UnsafeAppend(v decimal256.Num) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Decimal256Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Decimal256Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Decimal256Builder) AppendEmptyValue() { - b.Append(decimal256.Num{}) -} - -func (b *Decimal256Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Decimal256Builder) Type() arrow.DataType { return b.dtype } - -func (b *Decimal256Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Decimal256Builder) AppendValues(v []decimal256.Num, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("arrow/array: len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - if len(v) > 0 { - arrow.Decimal256Traits.Copy(b.rawData[b.length:], v) - } - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Decimal256Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Decimal256Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Decimal256Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Decimal256Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Decimal256Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Decimal256Traits.BytesRequired(n)) - b.rawData = arrow.Decimal256Traits.CastFromBytes(b.data.Bytes()) - } -} - -// NewArray creates a Decimal256 array from the memory buffers used by the builder and resets the Decimal256Builder -// so it can be used to build a new array. -func (b *Decimal256Builder) NewArray() arrow.Array { - return b.NewDecimal256Array() -} - -// NewDecimal256Array creates a Decimal256 array from the memory buffers used by the builder and resets the Decimal256Builder -// so it can be used to build a new array. -func (b *Decimal256Builder) NewDecimal256Array() (a *Decimal256) { - data := b.newData() - a = NewDecimal256Data(data) - data.Release() - return -} - -func (b *Decimal256Builder) newData() (data *Data) { - bytesRequired := arrow.Decimal256Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Decimal256Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - val, err := decimal256.FromString(s, b.dtype.Precision, b.dtype.Scale) - if err != nil { - b.AppendNull() - return err - } - b.Append(val) - return nil -} - -func (b *Decimal256Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case float64: - val, err := decimal256.FromFloat64(v, b.dtype.Precision, b.dtype.Scale) - if err != nil { - return err - } - b.Append(val) - case string: - out, err := decimal256.FromString(v, b.dtype.Precision, b.dtype.Scale) - if err != nil { - return err - } - b.Append(out) - case json.Number: - out, err := decimal256.FromString(v.String(), b.dtype.Precision, b.dtype.Scale) - if err != nil { - return err - } - b.Append(out) - case nil: - b.AppendNull() - return nil - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(decimal256.Num{}), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Decimal256Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -// UnmarshalJSON will add the unmarshalled values to this builder. -// -// If the values are strings, they will get parsed with big.ParseFloat using -// a rounding mode of big.ToNearestAway currently. -func (b *Decimal256Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("arrow/array: decimal256 builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -var ( - _ arrow.Array = (*Decimal256)(nil) - _ Builder = (*Decimal256Builder)(nil) -) diff --git a/go/arrow/array/decimal256_test.go b/go/arrow/array/decimal256_test.go deleted file mode 100644 index 8adb810165430..0000000000000 --- a/go/arrow/array/decimal256_test.go +++ /dev/null @@ -1,293 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/decimal256" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestNewDecimal256Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewDecimal256Builder(mem, &arrow.Decimal256Type{Precision: 10, Scale: 1}) - defer ab.Release() - - ab.Retain() - ab.Release() - - want := []decimal256.Num{ - decimal256.New(1, 1, 1, 1), - decimal256.New(2, 2, 2, 2), - decimal256.New(3, 3, 3, 3), - {}, - decimal256.FromI64(-5), - decimal256.FromI64(-6), - {}, - decimal256.FromI64(8), - decimal256.FromI64(9), - decimal256.FromI64(10), - } - valids := []bool{true, true, true, false, true, true, false, true, true, true} - - for i, valid := range valids { - switch { - case valid: - ab.Append(want[i]) - default: - ab.AppendNull() - } - } - - // check state of builder before NewDecimal256Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewArray().(*array.Decimal256) - a.Retain() - a.Release() - - // check state of builder after NewDecimal256Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewDecimal256Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewDecimal256Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewDecimal256Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - - assert.Equal(t, want, a.Values(), "unexpected Decimal256Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Equal(t, 4, a.Data().Buffers()[0].Len(), "should be 4 bytes due to minBuilderCapacity") - assert.Len(t, a.Values(), 10, "unexpected length of Decimal256Values") - assert.Equal(t, 10*arrow.Decimal256SizeBytes, a.Data().Buffers()[1].Len()) - - a.Release() - ab.Append(decimal256.FromI64(7)) - ab.Append(decimal256.FromI64(8)) - - a = ab.NewDecimal256Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, 4, a.Data().Buffers()[0].Len(), "should be 4 bytes due to minBuilderCapacity") - assert.Equal(t, []decimal256.Num{decimal256.FromI64(7), decimal256.FromI64(8)}, a.Values()) - assert.Len(t, a.Values(), 2) - assert.Equal(t, 2*arrow.Decimal256SizeBytes, a.Data().Buffers()[1].Len()) - - a.Release() -} - -func TestDecimal256Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewDecimal256Builder(mem, &arrow.Decimal256Type{Precision: 10, Scale: 1}) - defer ab.Release() - - want := []decimal256.Num{decimal256.FromI64(3), decimal256.FromI64(4)} - - ab.AppendValues([]decimal256.Num{}, nil) - a := ab.NewDecimal256Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewDecimal256Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(want, nil) - a = ab.NewDecimal256Array() - assert.Equal(t, want, a.Values()) - a.Release() - - ab.AppendValues([]decimal256.Num{}, nil) - ab.AppendValues(want, nil) - a = ab.NewDecimal256Array() - assert.Equal(t, want, a.Values()) - a.Release() - - ab.AppendValues(want, nil) - ab.AppendValues([]decimal256.Num{}, nil) - a = ab.NewDecimal256Array() - assert.Equal(t, want, a.Values()) - a.Release() -} - -func TestDecimal256Slice(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.Decimal256Type{Precision: 10, Scale: 1} - b := array.NewDecimal256Builder(mem, dtype) - defer b.Release() - - var data = []decimal256.Num{ - decimal256.FromI64(-1), - decimal256.FromI64(+0), - decimal256.FromI64(+1), - decimal256.New(4, 4, 4, 4), - } - b.AppendValues(data[:2], nil) - b.AppendNull() - b.Append(data[3]) - - arr := b.NewDecimal256Array() - defer arr.Release() - - if got, want := arr.Len(), len(data); got != want { - t.Fatalf("invalid array length: got=%d, want=%d", got, want) - } - - slice := array.NewSliceData(arr.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Decimal256) - if !ok { - t.Fatalf("could not type-assert to array.String") - } - - if got, want := v.String(), `[(null) {[4 4 4 4]}]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - assert.Equal(t, array.NullValueStr, v.ValueStr(0)) - assert.Equal(t, "2.510840694e+57", v.ValueStr(1)) - - if got, want := v.NullN(), 1; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - if got, want := v.Data().Offset(), 2; got != want { - t.Fatalf("invalid offset: got=%d, want=%d", got, want) - } -} - -func TestDecimal256StringRoundTrip(t *testing.T) { - dt := &arrow.Decimal256Type{Precision: 70, Scale: 10} - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewDecimal256Builder(mem, dt) - defer b.Release() - - values := []decimal256.Num{ - decimal256.New(1, 1, 1, 1), - decimal256.New(2, 2, 2, 2), - decimal256.New(3, 3, 3, 3), - {}, - decimal256.FromI64(-5), - decimal256.FromI64(-6), - {}, - decimal256.FromI64(8), - decimal256.FromI64(9), - decimal256.FromI64(10), - } - val1, err := decimal256.FromString("0.99", dt.Precision, dt.Scale) - if err != nil { - t.Fatal(err) - } - val2, err := decimal256.FromString("1234567890.123456789", dt.Precision, dt.Scale) - if err != nil { - t.Fatal(err) - } - values = append(values, val1, val2) - - valid := []bool{true, true, true, false, true, true, false, true, true, true, true, true} - - b.AppendValues(values, valid) - - arr := b.NewArray().(*array.Decimal256) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewDecimal256Builder(mem, dt) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - v := arr.ValueStr(i) - assert.NoError(t, b1.AppendValueFromString(v)) - } - - arr1 := b1.NewArray().(*array.Decimal256) - defer arr1.Release() - - for i := 0; i < arr.Len(); i++ { - if arr.IsNull(i) && arr1.IsNull(i) { - continue - } - if arr.Value(i) != arr1.Value(i) { - t.Fatalf("unexpected value at index %d: got=%v, want=%v", i, arr1.Value(i), arr.Value(i)) - } - } - assert.True(t, array.Equal(arr, arr1)) -} - -func TestDecimal256GetOneForMarshal(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.Decimal256Type{Precision: 38, Scale: 20} - - b := array.NewDecimal256Builder(mem, dtype) - defer b.Release() - - cases := []struct { - give any - want any - }{ - {"1", "1"}, - {"1.25", "1.25"}, - {"0.99", "0.99"}, - {"1234567890.123456789", "1234567890.123456789"}, - {nil, nil}, - {"-0.99", "-0.99"}, - {"-1234567890.123456789", "-1234567890.123456789"}, - {"0.0000000000000000001", "1e-19"}, - } - for _, v := range cases { - if v.give == nil { - b.AppendNull() - continue - } - - dt, err := decimal256.FromString(v.give.(string), dtype.Precision, dtype.Scale) - if err != nil { - t.Fatal(err) - } - b.Append(dt) - } - - arr := b.NewDecimal256Array() - defer arr.Release() - - if got, want := arr.Len(), len(cases); got != want { - t.Fatalf("invalid array length: got=%d, want=%d", got, want) - } - - for i := range cases { - assert.Equalf(t, cases[i].want, arr.GetOneForMarshal(i), "unexpected value at index %d", i) - } -} diff --git a/go/arrow/array/decimal_test.go b/go/arrow/array/decimal_test.go deleted file mode 100644 index b321bd7fbbe7b..0000000000000 --- a/go/arrow/array/decimal_test.go +++ /dev/null @@ -1,222 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "fmt" - "math/big" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/decimal256" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/suite" -) - -type decimalValue interface{} - -func bitmapFromSlice(vals []bool) []byte { - out := make([]byte, int(bitutil.BytesForBits(int64(len(vals))))) - writer := bitutil.NewBitmapWriter(out, 0, len(vals)) - for _, val := range vals { - if val { - writer.Set() - } else { - writer.Clear() - } - writer.Next() - } - writer.Finish() - return out -} - -type DecimalTestSuite struct { - suite.Suite - - dt arrow.DataType - mem *memory.CheckedAllocator -} - -func (d *DecimalTestSuite) SetupTest() { - d.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) -} - -func (d *DecimalTestSuite) TearDownTest() { - d.mem.AssertSize(d.T(), 0) -} - -func (d *DecimalTestSuite) makeData(input []decimalValue, out []byte) { - switch d.dt.ID() { - case arrow.DECIMAL128: - for _, v := range input { - arrow.Decimal128Traits.PutValue(out, v.(decimal128.Num)) - out = out[arrow.Decimal128SizeBytes:] - } - case arrow.DECIMAL256: - for _, v := range input { - arrow.Decimal256Traits.PutValue(out, v.(decimal256.Num)) - out = out[arrow.Decimal256SizeBytes:] - } - } -} - -func (d *DecimalTestSuite) testCreate(bitWidth int, prec int32, draw []decimalValue, valids []bool, offset int64) arrow.Array { - switch bitWidth { - case 128: - d.dt = &arrow.Decimal128Type{Precision: prec, Scale: 4} - case 256: - d.dt = &arrow.Decimal256Type{Precision: prec, Scale: 4} - } - - bldr := array.NewBuilder(d.mem, d.dt) - defer bldr.Release() - bldr.Reserve(len(draw)) - - nullCount := 0 - for i, b := range valids { - if b { - switch v := draw[i].(type) { - case decimal128.Num: - bldr.(*array.Decimal128Builder).Append(v) - case decimal256.Num: - bldr.(*array.Decimal256Builder).Append(v) - } - } else { - bldr.AppendNull() - nullCount++ - } - } - - arr := bldr.NewArray() - d.EqualValues(0, bldr.Len()) - - rawBytes := make([]byte, len(draw)*(d.dt.(arrow.FixedWidthDataType).BitWidth()/8)) - d.makeData(draw, rawBytes) - - expectedData := memory.NewBufferBytes(rawBytes) - expectedNullBitmap := bitmapFromSlice(valids) - expectedNullCount := len(draw) - bitutil.CountSetBits(expectedNullBitmap, 0, len(valids)) - - expected := array.NewData(d.dt, len(valids), []*memory.Buffer{memory.NewBufferBytes(expectedNullBitmap), expectedData}, nil, expectedNullCount, 0) - defer expected.Release() - - expectedArr := array.MakeFromData(expected) - defer expectedArr.Release() - - lhs := array.NewSlice(arr, offset, int64(arr.Len())-offset) - rhs := array.NewSlice(expectedArr, offset, int64(expectedArr.Len())-offset) - defer func() { - lhs.Release() - rhs.Release() - }() - - d.Truef(array.Equal(lhs, rhs), "expected: %s, got: %s\n", rhs, lhs) - return arr -} - -type Decimal128TestSuite struct { - DecimalTestSuite -} - -func (d *Decimal128TestSuite) runTest(f func(prec int32)) { - for prec := int32(1); prec <= 38; prec++ { - d.Run(fmt.Sprintf("prec=%d", prec), func() { f(prec) }) - } -} - -func (d *Decimal128TestSuite) TestNoNulls() { - d.runTest(func(prec int32) { - draw := []decimalValue{decimal128.FromU64(1), decimal128.FromI64(-2), - decimal128.FromU64(2389), decimal128.FromU64(4), - decimal128.FromI64(-12348)} - valids := []bool{true, true, true, true, true} - arr := d.testCreate(128, prec, draw, valids, 0) - arr.Release() - arr = d.testCreate(128, prec, draw, valids, 2) - arr.Release() - }) -} - -func (d *Decimal128TestSuite) TestWithNulls() { - d.runTest(func(prec int32) { - draw := []decimalValue{decimal128.FromU64(1), decimal128.FromU64(2), - decimal128.FromI64(-1), decimal128.FromI64(4), decimal128.FromI64(-1), - decimal128.FromI64(1), decimal128.FromI64(2)} - bigVal, _ := (&big.Int{}).SetString("230342903942234234", 10) - draw = append(draw, decimal128.FromBigInt(bigVal)) - - bigNeg, _ := (&big.Int{}).SetString("-23049302932235234", 10) - draw = append(draw, decimal128.FromBigInt(bigNeg)) - - valids := []bool{true, true, false, true, false, true, true, true, true} - arr := d.testCreate(128, prec, draw, valids, 0) - arr.Release() - arr = d.testCreate(128, prec, draw, valids, 2) - arr.Release() - }) -} - -type Decimal256TestSuite struct { - DecimalTestSuite -} - -func (d *Decimal256TestSuite) runTest(f func(prec int32)) { - for _, prec := range []int32{1, 2, 5, 10, 38, 39, 40, 75, 76} { - d.Run(fmt.Sprintf("prec=%d", prec), func() { f(prec) }) - } -} - -func (d *Decimal256TestSuite) TestNoNulls() { - d.runTest(func(prec int32) { - draw := []decimalValue{decimal256.FromU64(1), decimal256.FromI64(-2), - decimal256.FromU64(2389), decimal256.FromU64(4), - decimal256.FromI64(-12348)} - valids := []bool{true, true, true, true, true} - arr := d.testCreate(256, prec, draw, valids, 0) - arr.Release() - arr = d.testCreate(256, prec, draw, valids, 2) - arr.Release() - }) -} - -func (d *Decimal256TestSuite) TestWithNulls() { - d.runTest(func(prec int32) { - draw := []decimalValue{decimal256.FromU64(1), decimal256.FromU64(2), - decimal256.FromI64(-1), decimal256.FromI64(4), decimal256.FromI64(-1), - decimal256.FromI64(1), decimal256.FromI64(2)} - - // (pow(2, 255) - 1) - bigVal, _ := (&big.Int{}).SetString("57896044618658097711785492504343953926634992332820282019728792003956564819967", 10) - draw = append(draw, decimal256.FromBigInt(bigVal)) - - draw = append(draw, decimal256.FromBigInt(bigVal.Neg(bigVal))) - - valids := []bool{true, true, false, true, false, true, true, true, true} - arr := d.testCreate(256, prec, draw, valids, 0) - arr.Release() - arr = d.testCreate(256, prec, draw, valids, 2) - arr.Release() - }) -} - -func TestDecimal(t *testing.T) { - suite.Run(t, new(Decimal128TestSuite)) - suite.Run(t, new(Decimal256TestSuite)) -} diff --git a/go/arrow/array/dictionary.go b/go/arrow/array/dictionary.go deleted file mode 100644 index ca7fed5257085..0000000000000 --- a/go/arrow/array/dictionary.go +++ /dev/null @@ -1,1958 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "errors" - "fmt" - "math" - "math/bits" - "sync/atomic" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/decimal256" - "github.com/apache/arrow/go/v18/arrow/float16" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/hashing" - "github.com/apache/arrow/go/v18/internal/json" - "github.com/apache/arrow/go/v18/internal/utils" -) - -// Dictionary represents the type for dictionary-encoded data with a data -// dependent dictionary. -// -// A dictionary array contains an array of non-negative integers (the "dictionary" -// indices") along with a data type containing a "dictionary" corresponding to -// the distinct values represented in the data. -// -// For example, the array: -// -// ["foo", "bar", "foo", "bar", "foo", "bar"] -// -// with dictionary ["bar", "foo"], would have the representation of: -// -// indices: [1, 0, 1, 0, 1, 0] -// dictionary: ["bar", "foo"] -// -// The indices in principle may be any integer type. -type Dictionary struct { - array - - indices arrow.Array - dict arrow.Array -} - -// NewDictionaryArray constructs a dictionary array with the provided indices -// and dictionary using the given type. -func NewDictionaryArray(typ arrow.DataType, indices, dict arrow.Array) *Dictionary { - a := &Dictionary{} - a.array.refCount = 1 - dictdata := NewData(typ, indices.Len(), indices.Data().Buffers(), indices.Data().Children(), indices.NullN(), indices.Data().Offset()) - dictdata.dictionary = dict.Data().(*Data) - dict.Data().Retain() - - defer dictdata.Release() - a.setData(dictdata) - return a -} - -// checkIndexBounds returns an error if any value in the provided integer -// arraydata is >= the passed upperlimit or < 0. otherwise nil -func checkIndexBounds(indices *Data, upperlimit uint64) error { - if indices.length == 0 { - return nil - } - - var maxval uint64 - switch indices.dtype.ID() { - case arrow.UINT8: - maxval = math.MaxUint8 - case arrow.UINT16: - maxval = math.MaxUint16 - case arrow.UINT32: - maxval = math.MaxUint32 - case arrow.UINT64: - maxval = math.MaxUint64 - } - // for unsigned integers, if the values array is larger than the maximum - // index value (especially for UINT8/UINT16), then there's no need to - // boundscheck. for signed integers we still need to bounds check - // because a value could be < 0. - isSigned := maxval == 0 - if !isSigned && upperlimit > maxval { - return nil - } - - start := indices.offset - end := indices.offset + indices.length - - // TODO(ARROW-15950): lift BitSetRunReader from parquet to utils - // and use it here for performance improvement. - - switch indices.dtype.ID() { - case arrow.INT8: - data := arrow.Int8Traits.CastFromBytes(indices.buffers[1].Bytes()) - min, max := utils.GetMinMaxInt8(data[start:end]) - if min < 0 || max >= int8(upperlimit) { - return fmt.Errorf("contains out of bounds index: min: %d, max: %d", min, max) - } - case arrow.UINT8: - data := arrow.Uint8Traits.CastFromBytes(indices.buffers[1].Bytes()) - _, max := utils.GetMinMaxUint8(data[start:end]) - if max >= uint8(upperlimit) { - return fmt.Errorf("contains out of bounds index: max: %d", max) - } - case arrow.INT16: - data := arrow.Int16Traits.CastFromBytes(indices.buffers[1].Bytes()) - min, max := utils.GetMinMaxInt16(data[start:end]) - if min < 0 || max >= int16(upperlimit) { - return fmt.Errorf("contains out of bounds index: min: %d, max: %d", min, max) - } - case arrow.UINT16: - data := arrow.Uint16Traits.CastFromBytes(indices.buffers[1].Bytes()) - _, max := utils.GetMinMaxUint16(data[start:end]) - if max >= uint16(upperlimit) { - return fmt.Errorf("contains out of bounds index: max: %d", max) - } - case arrow.INT32: - data := arrow.Int32Traits.CastFromBytes(indices.buffers[1].Bytes()) - min, max := utils.GetMinMaxInt32(data[start:end]) - if min < 0 || max >= int32(upperlimit) { - return fmt.Errorf("contains out of bounds index: min: %d, max: %d", min, max) - } - case arrow.UINT32: - data := arrow.Uint32Traits.CastFromBytes(indices.buffers[1].Bytes()) - _, max := utils.GetMinMaxUint32(data[start:end]) - if max >= uint32(upperlimit) { - return fmt.Errorf("contains out of bounds index: max: %d", max) - } - case arrow.INT64: - data := arrow.Int64Traits.CastFromBytes(indices.buffers[1].Bytes()) - min, max := utils.GetMinMaxInt64(data[start:end]) - if min < 0 || max >= int64(upperlimit) { - return fmt.Errorf("contains out of bounds index: min: %d, max: %d", min, max) - } - case arrow.UINT64: - data := arrow.Uint64Traits.CastFromBytes(indices.buffers[1].Bytes()) - _, max := utils.GetMinMaxUint64(data[indices.offset : indices.offset+indices.length]) - if max >= upperlimit { - return fmt.Errorf("contains out of bounds value: max: %d", max) - } - default: - return fmt.Errorf("invalid type for bounds checking: %T", indices.dtype) - } - - return nil -} - -// NewValidatedDictionaryArray constructs a dictionary array from the provided indices -// and dictionary arrays, while also performing validation checks to ensure correctness -// such as bounds checking at are usually skipped for performance. -func NewValidatedDictionaryArray(typ *arrow.DictionaryType, indices, dict arrow.Array) (*Dictionary, error) { - if indices.DataType().ID() != typ.IndexType.ID() { - return nil, fmt.Errorf("dictionary type index (%T) does not match indices array type (%T)", typ.IndexType, indices.DataType()) - } - - if !arrow.TypeEqual(typ.ValueType, dict.DataType()) { - return nil, fmt.Errorf("dictionary value type (%T) does not match dict array type (%T)", typ.ValueType, dict.DataType()) - } - - if err := checkIndexBounds(indices.Data().(*Data), uint64(dict.Len())); err != nil { - return nil, err - } - - return NewDictionaryArray(typ, indices, dict), nil -} - -// NewDictionaryData creates a strongly typed Dictionary array from -// an ArrayData object with a datatype of arrow.Dictionary and a dictionary -func NewDictionaryData(data arrow.ArrayData) *Dictionary { - a := &Dictionary{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (d *Dictionary) Retain() { - atomic.AddInt64(&d.refCount, 1) -} - -func (d *Dictionary) Release() { - debug.Assert(atomic.LoadInt64(&d.refCount) > 0, "too many releases") - - if atomic.AddInt64(&d.refCount, -1) == 0 { - d.data.Release() - d.data, d.nullBitmapBytes = nil, nil - d.indices.Release() - d.indices = nil - if d.dict != nil { - d.dict.Release() - d.dict = nil - } - } -} - -func (d *Dictionary) setData(data *Data) { - d.array.setData(data) - - dictType := data.dtype.(*arrow.DictionaryType) - if data.dictionary == nil { - if data.length > 0 { - panic("arrow/array: no dictionary set in Data for Dictionary array") - } - } else { - debug.Assert(arrow.TypeEqual(dictType.ValueType, data.dictionary.DataType()), "mismatched dictionary value types") - } - - indexData := NewData(dictType.IndexType, data.length, data.buffers, data.childData, data.nulls, data.offset) - defer indexData.Release() - d.indices = MakeFromData(indexData) -} - -// Dictionary returns the values array that makes up the dictionary for this -// array. -func (d *Dictionary) Dictionary() arrow.Array { - if d.dict == nil { - d.dict = MakeFromData(d.data.dictionary) - } - return d.dict -} - -// Indices returns the underlying array of indices as it's own array -func (d *Dictionary) Indices() arrow.Array { - return d.indices -} - -// CanCompareIndices returns true if the dictionary arrays can be compared -// without having to unify the dictionaries themselves first. -// This means that the index types are equal too. -func (d *Dictionary) CanCompareIndices(other *Dictionary) bool { - if !arrow.TypeEqual(d.indices.DataType(), other.indices.DataType()) { - return false - } - - minlen := int64(min(d.data.dictionary.length, other.data.dictionary.length)) - return SliceEqual(d.Dictionary(), 0, minlen, other.Dictionary(), 0, minlen) -} - -func (d *Dictionary) ValueStr(i int) string { - if d.IsNull(i) { - return NullValueStr - } - return d.Dictionary().ValueStr(d.GetValueIndex(i)) -} - -func (d *Dictionary) String() string { - return fmt.Sprintf("{ dictionary: %v\n indices: %v }", d.Dictionary(), d.Indices()) -} - -// GetValueIndex returns the dictionary index for the value at index i of the array. -// The actual value can be retrieved by using d.Dictionary().(valuetype).Value(d.GetValueIndex(i)) -func (d *Dictionary) GetValueIndex(i int) int { - indiceData := d.data.buffers[1].Bytes() - // we know the value is non-negative per the spec, so - // we can use the unsigned value regardless. - switch d.indices.DataType().ID() { - case arrow.UINT8, arrow.INT8: - return int(uint8(indiceData[d.data.offset+i])) - case arrow.UINT16, arrow.INT16: - return int(arrow.Uint16Traits.CastFromBytes(indiceData)[d.data.offset+i]) - case arrow.UINT32, arrow.INT32: - idx := arrow.Uint32Traits.CastFromBytes(indiceData)[d.data.offset+i] - debug.Assert(bits.UintSize == 64 || idx <= math.MaxInt32, "arrow/dictionary: truncation of index value") - return int(idx) - case arrow.UINT64, arrow.INT64: - idx := arrow.Uint64Traits.CastFromBytes(indiceData)[d.data.offset+i] - debug.Assert((bits.UintSize == 32 && idx <= math.MaxInt32) || (bits.UintSize == 64 && idx <= math.MaxInt64), "arrow/dictionary: truncation of index value") - return int(idx) - } - debug.Assert(false, "unreachable dictionary index") - return -1 -} - -func (d *Dictionary) GetOneForMarshal(i int) interface{} { - if d.IsNull(i) { - return nil - } - vidx := d.GetValueIndex(i) - return d.Dictionary().GetOneForMarshal(vidx) -} - -func (d *Dictionary) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, d.Len()) - for i := 0; i < d.Len(); i++ { - vals[i] = d.GetOneForMarshal(i) - } - return json.Marshal(vals) -} - -func arrayEqualDict(l, r *Dictionary) bool { - return Equal(l.Dictionary(), r.Dictionary()) && Equal(l.indices, r.indices) -} - -func arrayApproxEqualDict(l, r *Dictionary, opt equalOption) bool { - return arrayApproxEqual(l.Dictionary(), r.Dictionary(), opt) && arrayApproxEqual(l.indices, r.indices, opt) -} - -// helper for building the properly typed indices of the dictionary builder -type IndexBuilder struct { - Builder - Append func(int) -} - -func createIndexBuilder(mem memory.Allocator, dt arrow.FixedWidthDataType) (ret IndexBuilder, err error) { - ret = IndexBuilder{Builder: NewBuilder(mem, dt)} - switch dt.ID() { - case arrow.INT8: - ret.Append = func(idx int) { - ret.Builder.(*Int8Builder).Append(int8(idx)) - } - case arrow.UINT8: - ret.Append = func(idx int) { - ret.Builder.(*Uint8Builder).Append(uint8(idx)) - } - case arrow.INT16: - ret.Append = func(idx int) { - ret.Builder.(*Int16Builder).Append(int16(idx)) - } - case arrow.UINT16: - ret.Append = func(idx int) { - ret.Builder.(*Uint16Builder).Append(uint16(idx)) - } - case arrow.INT32: - ret.Append = func(idx int) { - ret.Builder.(*Int32Builder).Append(int32(idx)) - } - case arrow.UINT32: - ret.Append = func(idx int) { - ret.Builder.(*Uint32Builder).Append(uint32(idx)) - } - case arrow.INT64: - ret.Append = func(idx int) { - ret.Builder.(*Int64Builder).Append(int64(idx)) - } - case arrow.UINT64: - ret.Append = func(idx int) { - ret.Builder.(*Uint64Builder).Append(uint64(idx)) - } - default: - debug.Assert(false, "dictionary index type must be integral") - err = fmt.Errorf("dictionary index type must be integral, not %s", dt) - } - - return -} - -// helper function to construct an appropriately typed memo table based on -// the value type for the dictionary -func createMemoTable(mem memory.Allocator, dt arrow.DataType) (ret hashing.MemoTable, err error) { - switch dt.ID() { - case arrow.INT8: - ret = hashing.NewInt8MemoTable(0) - case arrow.UINT8: - ret = hashing.NewUint8MemoTable(0) - case arrow.INT16: - ret = hashing.NewInt16MemoTable(0) - case arrow.UINT16: - ret = hashing.NewUint16MemoTable(0) - case arrow.INT32: - ret = hashing.NewInt32MemoTable(0) - case arrow.UINT32: - ret = hashing.NewUint32MemoTable(0) - case arrow.INT64: - ret = hashing.NewInt64MemoTable(0) - case arrow.UINT64: - ret = hashing.NewUint64MemoTable(0) - case arrow.DURATION, arrow.TIMESTAMP, arrow.DATE64, arrow.TIME64: - ret = hashing.NewInt64MemoTable(0) - case arrow.TIME32, arrow.DATE32, arrow.INTERVAL_MONTHS: - ret = hashing.NewInt32MemoTable(0) - case arrow.FLOAT16: - ret = hashing.NewUint16MemoTable(0) - case arrow.FLOAT32: - ret = hashing.NewFloat32MemoTable(0) - case arrow.FLOAT64: - ret = hashing.NewFloat64MemoTable(0) - case arrow.BINARY, arrow.FIXED_SIZE_BINARY, arrow.DECIMAL128, arrow.DECIMAL256, arrow.INTERVAL_DAY_TIME, arrow.INTERVAL_MONTH_DAY_NANO: - ret = hashing.NewBinaryMemoTable(0, 0, NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)) - case arrow.STRING: - ret = hashing.NewBinaryMemoTable(0, 0, NewBinaryBuilder(mem, arrow.BinaryTypes.String)) - case arrow.NULL: - default: - err = fmt.Errorf("unimplemented dictionary value type, %s", dt) - } - - return -} - -type DictionaryBuilder interface { - Builder - - NewDictionaryArray() *Dictionary - NewDelta() (indices, delta arrow.Array, err error) - AppendArray(arrow.Array) error - AppendIndices([]int, []bool) - ResetFull() - DictionarySize() int -} - -type dictionaryBuilder struct { - builder - - dt *arrow.DictionaryType - deltaOffset int - memoTable hashing.MemoTable - idxBuilder IndexBuilder -} - -// NewDictionaryBuilderWithDict initializes a dictionary builder and inserts the values from `init` as the first -// values in the dictionary, but does not insert them as values into the array. -func NewDictionaryBuilderWithDict(mem memory.Allocator, dt *arrow.DictionaryType, init arrow.Array) DictionaryBuilder { - if init != nil && !arrow.TypeEqual(dt.ValueType, init.DataType()) { - panic(fmt.Errorf("arrow/array: cannot initialize dictionary type %T with array of type %T", dt.ValueType, init.DataType())) - } - - idxbldr, err := createIndexBuilder(mem, dt.IndexType.(arrow.FixedWidthDataType)) - if err != nil { - panic(fmt.Errorf("arrow/array: unsupported builder for index type of %T", dt)) - } - - memo, err := createMemoTable(mem, dt.ValueType) - if err != nil { - panic(fmt.Errorf("arrow/array: unsupported builder for value type of %T", dt)) - } - - bldr := dictionaryBuilder{ - builder: builder{refCount: 1, mem: mem}, - idxBuilder: idxbldr, - memoTable: memo, - dt: dt, - } - - switch dt.ValueType.ID() { - case arrow.NULL: - ret := &NullDictionaryBuilder{bldr} - debug.Assert(init == nil, "arrow/array: doesn't make sense to init a null dictionary") - return ret - case arrow.UINT8: - ret := &Uint8DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Uint8)); err != nil { - panic(err) - } - } - return ret - case arrow.INT8: - ret := &Int8DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Int8)); err != nil { - panic(err) - } - } - return ret - case arrow.UINT16: - ret := &Uint16DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Uint16)); err != nil { - panic(err) - } - } - return ret - case arrow.INT16: - ret := &Int16DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Int16)); err != nil { - panic(err) - } - } - return ret - case arrow.UINT32: - ret := &Uint32DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Uint32)); err != nil { - panic(err) - } - } - return ret - case arrow.INT32: - ret := &Int32DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Int32)); err != nil { - panic(err) - } - } - return ret - case arrow.UINT64: - ret := &Uint64DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Uint64)); err != nil { - panic(err) - } - } - return ret - case arrow.INT64: - ret := &Int64DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Int64)); err != nil { - panic(err) - } - } - return ret - case arrow.FLOAT16: - ret := &Float16DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Float16)); err != nil { - panic(err) - } - } - return ret - case arrow.FLOAT32: - ret := &Float32DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Float32)); err != nil { - panic(err) - } - } - return ret - case arrow.FLOAT64: - ret := &Float64DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Float64)); err != nil { - panic(err) - } - } - return ret - case arrow.STRING: - ret := &BinaryDictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertStringDictValues(init.(*String)); err != nil { - panic(err) - } - } - return ret - case arrow.BINARY: - ret := &BinaryDictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Binary)); err != nil { - panic(err) - } - } - return ret - case arrow.FIXED_SIZE_BINARY: - ret := &FixedSizeBinaryDictionaryBuilder{ - bldr, dt.ValueType.(*arrow.FixedSizeBinaryType).ByteWidth, - } - if init != nil { - if err = ret.InsertDictValues(init.(*FixedSizeBinary)); err != nil { - panic(err) - } - } - return ret - case arrow.DATE32: - ret := &Date32DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Date32)); err != nil { - panic(err) - } - } - return ret - case arrow.DATE64: - ret := &Date64DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Date64)); err != nil { - panic(err) - } - } - return ret - case arrow.TIMESTAMP: - ret := &TimestampDictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Timestamp)); err != nil { - panic(err) - } - } - return ret - case arrow.TIME32: - ret := &Time32DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Time32)); err != nil { - panic(err) - } - } - return ret - case arrow.TIME64: - ret := &Time64DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Time64)); err != nil { - panic(err) - } - } - return ret - case arrow.INTERVAL_MONTHS: - ret := &MonthIntervalDictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*MonthInterval)); err != nil { - panic(err) - } - } - return ret - case arrow.INTERVAL_DAY_TIME: - ret := &DayTimeDictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*DayTimeInterval)); err != nil { - panic(err) - } - } - return ret - case arrow.DECIMAL128: - ret := &Decimal128DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Decimal128)); err != nil { - panic(err) - } - } - return ret - case arrow.DECIMAL256: - ret := &Decimal256DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Decimal256)); err != nil { - panic(err) - } - } - return ret - case arrow.LIST: - case arrow.STRUCT: - case arrow.SPARSE_UNION: - case arrow.DENSE_UNION: - case arrow.DICTIONARY: - case arrow.MAP: - case arrow.EXTENSION: - case arrow.FIXED_SIZE_LIST: - case arrow.DURATION: - ret := &DurationDictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Duration)); err != nil { - panic(err) - } - } - return ret - case arrow.LARGE_STRING: - case arrow.LARGE_BINARY: - case arrow.LARGE_LIST: - case arrow.INTERVAL_MONTH_DAY_NANO: - ret := &MonthDayNanoDictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*MonthDayNanoInterval)); err != nil { - panic(err) - } - } - return ret - } - - panic("arrow/array: unimplemented dictionary key type") -} - -func NewDictionaryBuilder(mem memory.Allocator, dt *arrow.DictionaryType) DictionaryBuilder { - return NewDictionaryBuilderWithDict(mem, dt, nil) -} - -func (b *dictionaryBuilder) Type() arrow.DataType { return b.dt } - -func (b *dictionaryBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - b.idxBuilder.Release() - b.idxBuilder.Builder = nil - if binmemo, ok := b.memoTable.(*hashing.BinaryMemoTable); ok { - binmemo.Release() - } - b.memoTable = nil - } -} - -func (b *dictionaryBuilder) AppendNull() { - b.length += 1 - b.nulls += 1 - b.idxBuilder.AppendNull() -} - -func (b *dictionaryBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *dictionaryBuilder) AppendEmptyValue() { - b.length += 1 - b.idxBuilder.AppendEmptyValue() -} - -func (b *dictionaryBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *dictionaryBuilder) Reserve(n int) { - b.idxBuilder.Reserve(n) -} - -func (b *dictionaryBuilder) Resize(n int) { - b.idxBuilder.Resize(n) - b.length = b.idxBuilder.Len() -} - -func (b *dictionaryBuilder) ResetFull() { - b.builder.reset() - b.idxBuilder.NewArray().Release() - b.memoTable.Reset() -} - -func (b *dictionaryBuilder) Cap() int { return b.idxBuilder.Cap() } - -func (b *dictionaryBuilder) IsNull(i int) bool { return b.idxBuilder.IsNull(i) } - -func (b *dictionaryBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("dictionary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -func (b *dictionaryBuilder) Unmarshal(dec *json.Decoder) error { - bldr := NewBuilder(b.mem, b.dt.ValueType) - defer bldr.Release() - - if err := bldr.Unmarshal(dec); err != nil { - return err - } - - arr := bldr.NewArray() - defer arr.Release() - return b.AppendArray(arr) -} - -func (b *dictionaryBuilder) AppendValueFromString(s string) error { - bldr := NewBuilder(b.mem, b.dt.ValueType) - defer bldr.Release() - - if err := bldr.AppendValueFromString(s); err != nil { - return err - } - - arr := bldr.NewArray() - defer arr.Release() - return b.AppendArray(arr) -} - -func (b *dictionaryBuilder) UnmarshalOne(dec *json.Decoder) error { - bldr := NewBuilder(b.mem, b.dt.ValueType) - defer bldr.Release() - - if err := bldr.UnmarshalOne(dec); err != nil { - return err - } - - arr := bldr.NewArray() - defer arr.Release() - return b.AppendArray(arr) -} - -func (b *dictionaryBuilder) NewArray() arrow.Array { - return b.NewDictionaryArray() -} - -func (b *dictionaryBuilder) newData() *Data { - indices, dict, err := b.newWithDictOffset(0) - if err != nil { - panic(err) - } - - indices.dtype = b.dt - indices.dictionary = dict - return indices -} - -func (b *dictionaryBuilder) NewDictionaryArray() *Dictionary { - a := &Dictionary{} - a.refCount = 1 - - indices := b.newData() - a.setData(indices) - indices.Release() - return a -} - -func (b *dictionaryBuilder) newWithDictOffset(offset int) (indices, dict *Data, err error) { - idxarr := b.idxBuilder.NewArray() - defer idxarr.Release() - - indices = idxarr.Data().(*Data) - - b.deltaOffset = b.memoTable.Size() - dict, err = GetDictArrayData(b.mem, b.dt.ValueType, b.memoTable, offset) - b.reset() - indices.Retain() - return -} - -// NewDelta returns the dictionary indices and a delta dictionary since the -// last time NewArray or NewDictionaryArray were called, and resets the state -// of the builder (except for the dictionary / memotable) -func (b *dictionaryBuilder) NewDelta() (indices, delta arrow.Array, err error) { - indicesData, deltaData, err := b.newWithDictOffset(b.deltaOffset) - if err != nil { - return nil, nil, err - } - - defer indicesData.Release() - defer deltaData.Release() - indices, delta = MakeFromData(indicesData), MakeFromData(deltaData) - return -} - -func (b *dictionaryBuilder) insertDictValue(val interface{}) error { - _, _, err := b.memoTable.GetOrInsert(val) - return err -} - -func (b *dictionaryBuilder) insertDictBytes(val []byte) error { - _, _, err := b.memoTable.GetOrInsertBytes(val) - return err -} - -func (b *dictionaryBuilder) appendValue(val interface{}) error { - idx, _, err := b.memoTable.GetOrInsert(val) - b.idxBuilder.Append(idx) - b.length += 1 - return err -} - -func (b *dictionaryBuilder) appendBytes(val []byte) error { - idx, _, err := b.memoTable.GetOrInsertBytes(val) - b.idxBuilder.Append(idx) - b.length += 1 - return err -} - -func getvalFn(arr arrow.Array) func(i int) interface{} { - switch typedarr := arr.(type) { - case *Int8: - return func(i int) interface{} { return typedarr.Value(i) } - case *Uint8: - return func(i int) interface{} { return typedarr.Value(i) } - case *Int16: - return func(i int) interface{} { return typedarr.Value(i) } - case *Uint16: - return func(i int) interface{} { return typedarr.Value(i) } - case *Int32: - return func(i int) interface{} { return typedarr.Value(i) } - case *Uint32: - return func(i int) interface{} { return typedarr.Value(i) } - case *Int64: - return func(i int) interface{} { return typedarr.Value(i) } - case *Uint64: - return func(i int) interface{} { return typedarr.Value(i) } - case *Float16: - return func(i int) interface{} { return typedarr.Value(i).Uint16() } - case *Float32: - return func(i int) interface{} { return typedarr.Value(i) } - case *Float64: - return func(i int) interface{} { return typedarr.Value(i) } - case *Duration: - return func(i int) interface{} { return int64(typedarr.Value(i)) } - case *Timestamp: - return func(i int) interface{} { return int64(typedarr.Value(i)) } - case *Date64: - return func(i int) interface{} { return int64(typedarr.Value(i)) } - case *Time64: - return func(i int) interface{} { return int64(typedarr.Value(i)) } - case *Time32: - return func(i int) interface{} { return int32(typedarr.Value(i)) } - case *Date32: - return func(i int) interface{} { return int32(typedarr.Value(i)) } - case *MonthInterval: - return func(i int) interface{} { return int32(typedarr.Value(i)) } - case *Binary: - return func(i int) interface{} { return typedarr.Value(i) } - case *FixedSizeBinary: - return func(i int) interface{} { return typedarr.Value(i) } - case *String: - return func(i int) interface{} { return typedarr.Value(i) } - case *Decimal128: - return func(i int) interface{} { - val := typedarr.Value(i) - return (*(*[arrow.Decimal128SizeBytes]byte)(unsafe.Pointer(&val)))[:] - } - case *Decimal256: - return func(i int) interface{} { - val := typedarr.Value(i) - return (*(*[arrow.Decimal256SizeBytes]byte)(unsafe.Pointer(&val)))[:] - } - case *DayTimeInterval: - return func(i int) interface{} { - val := typedarr.Value(i) - return (*(*[arrow.DayTimeIntervalSizeBytes]byte)(unsafe.Pointer(&val)))[:] - } - case *MonthDayNanoInterval: - return func(i int) interface{} { - val := typedarr.Value(i) - return (*(*[arrow.MonthDayNanoIntervalSizeBytes]byte)(unsafe.Pointer(&val)))[:] - } - } - - panic("arrow/array: invalid dictionary value type") -} - -func (b *dictionaryBuilder) AppendArray(arr arrow.Array) error { - debug.Assert(arrow.TypeEqual(b.dt.ValueType, arr.DataType()), "wrong value type of array to append to dict") - - valfn := getvalFn(arr) - for i := 0; i < arr.Len(); i++ { - if arr.IsNull(i) { - b.AppendNull() - } else { - if err := b.appendValue(valfn(i)); err != nil { - return err - } - } - } - return nil -} - -func (b *dictionaryBuilder) IndexBuilder() IndexBuilder { - return b.idxBuilder -} - -func (b *dictionaryBuilder) AppendIndices(indices []int, valid []bool) { - b.length += len(indices) - switch idxbldr := b.idxBuilder.Builder.(type) { - case *Int8Builder: - vals := make([]int8, len(indices)) - for i, v := range indices { - vals[i] = int8(v) - } - idxbldr.AppendValues(vals, valid) - case *Int16Builder: - vals := make([]int16, len(indices)) - for i, v := range indices { - vals[i] = int16(v) - } - idxbldr.AppendValues(vals, valid) - case *Int32Builder: - vals := make([]int32, len(indices)) - for i, v := range indices { - vals[i] = int32(v) - } - idxbldr.AppendValues(vals, valid) - case *Int64Builder: - vals := make([]int64, len(indices)) - for i, v := range indices { - vals[i] = int64(v) - } - idxbldr.AppendValues(vals, valid) - case *Uint8Builder: - vals := make([]uint8, len(indices)) - for i, v := range indices { - vals[i] = uint8(v) - } - idxbldr.AppendValues(vals, valid) - case *Uint16Builder: - vals := make([]uint16, len(indices)) - for i, v := range indices { - vals[i] = uint16(v) - } - idxbldr.AppendValues(vals, valid) - case *Uint32Builder: - vals := make([]uint32, len(indices)) - for i, v := range indices { - vals[i] = uint32(v) - } - idxbldr.AppendValues(vals, valid) - case *Uint64Builder: - vals := make([]uint64, len(indices)) - for i, v := range indices { - vals[i] = uint64(v) - } - idxbldr.AppendValues(vals, valid) - } -} - -func (b *dictionaryBuilder) DictionarySize() int { - return b.memoTable.Size() -} - -type NullDictionaryBuilder struct { - dictionaryBuilder -} - -func (b *NullDictionaryBuilder) NewArray() arrow.Array { - return b.NewDictionaryArray() -} - -func (b *NullDictionaryBuilder) NewDictionaryArray() *Dictionary { - idxarr := b.idxBuilder.NewArray() - defer idxarr.Release() - - out := idxarr.Data().(*Data) - dictarr := NewNull(0) - defer dictarr.Release() - - dictarr.data.Retain() - out.dtype = b.dt - out.dictionary = dictarr.data - - return NewDictionaryData(out) -} - -func (b *NullDictionaryBuilder) AppendArray(arr arrow.Array) error { - if arr.DataType().ID() != arrow.NULL { - return fmt.Errorf("cannot append non-null array to null dictionary") - } - - for i := 0; i < arr.(*Null).Len(); i++ { - b.AppendNull() - } - return nil -} - -type Int8DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Int8DictionaryBuilder) Append(v int8) error { return b.appendValue(v) } -func (b *Int8DictionaryBuilder) InsertDictValues(arr *Int8) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(v); err != nil { - break - } - } - return -} - -type Uint8DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Uint8DictionaryBuilder) Append(v uint8) error { return b.appendValue(v) } -func (b *Uint8DictionaryBuilder) InsertDictValues(arr *Uint8) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(v); err != nil { - break - } - } - return -} - -type Int16DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Int16DictionaryBuilder) Append(v int16) error { return b.appendValue(v) } -func (b *Int16DictionaryBuilder) InsertDictValues(arr *Int16) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(v); err != nil { - break - } - } - return -} - -type Uint16DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Uint16DictionaryBuilder) Append(v uint16) error { return b.appendValue(v) } -func (b *Uint16DictionaryBuilder) InsertDictValues(arr *Uint16) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(v); err != nil { - break - } - } - return -} - -type Int32DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Int32DictionaryBuilder) Append(v int32) error { return b.appendValue(v) } -func (b *Int32DictionaryBuilder) InsertDictValues(arr *Int32) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(v); err != nil { - break - } - } - return -} - -type Uint32DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Uint32DictionaryBuilder) Append(v uint32) error { return b.appendValue(v) } -func (b *Uint32DictionaryBuilder) InsertDictValues(arr *Uint32) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(v); err != nil { - break - } - } - return -} - -type Int64DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Int64DictionaryBuilder) Append(v int64) error { return b.appendValue(v) } -func (b *Int64DictionaryBuilder) InsertDictValues(arr *Int64) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(v); err != nil { - break - } - } - return -} - -type Uint64DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Uint64DictionaryBuilder) Append(v uint64) error { return b.appendValue(v) } -func (b *Uint64DictionaryBuilder) InsertDictValues(arr *Uint64) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(v); err != nil { - break - } - } - return -} - -type DurationDictionaryBuilder struct { - dictionaryBuilder -} - -func (b *DurationDictionaryBuilder) Append(v arrow.Duration) error { return b.appendValue(int64(v)) } -func (b *DurationDictionaryBuilder) InsertDictValues(arr *Duration) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(int64(v)); err != nil { - break - } - } - return -} - -type TimestampDictionaryBuilder struct { - dictionaryBuilder -} - -func (b *TimestampDictionaryBuilder) Append(v arrow.Timestamp) error { return b.appendValue(int64(v)) } -func (b *TimestampDictionaryBuilder) InsertDictValues(arr *Timestamp) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(int64(v)); err != nil { - break - } - } - return -} - -type Time32DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Time32DictionaryBuilder) Append(v arrow.Time32) error { return b.appendValue(int32(v)) } -func (b *Time32DictionaryBuilder) InsertDictValues(arr *Time32) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(int32(v)); err != nil { - break - } - } - return -} - -type Time64DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Time64DictionaryBuilder) Append(v arrow.Time64) error { return b.appendValue(int64(v)) } -func (b *Time64DictionaryBuilder) InsertDictValues(arr *Time64) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(int64(v)); err != nil { - break - } - } - return -} - -type Date32DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Date32DictionaryBuilder) Append(v arrow.Date32) error { return b.appendValue(int32(v)) } -func (b *Date32DictionaryBuilder) InsertDictValues(arr *Date32) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(int32(v)); err != nil { - break - } - } - return -} - -type Date64DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Date64DictionaryBuilder) Append(v arrow.Date64) error { return b.appendValue(int64(v)) } -func (b *Date64DictionaryBuilder) InsertDictValues(arr *Date64) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(int64(v)); err != nil { - break - } - } - return -} - -type MonthIntervalDictionaryBuilder struct { - dictionaryBuilder -} - -func (b *MonthIntervalDictionaryBuilder) Append(v arrow.MonthInterval) error { - return b.appendValue(int32(v)) -} -func (b *MonthIntervalDictionaryBuilder) InsertDictValues(arr *MonthInterval) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(int32(v)); err != nil { - break - } - } - return -} - -type Float16DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Float16DictionaryBuilder) Append(v float16.Num) error { return b.appendValue(v.Uint16()) } -func (b *Float16DictionaryBuilder) InsertDictValues(arr *Float16) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(v.Uint16()); err != nil { - break - } - } - return -} - -type Float32DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Float32DictionaryBuilder) Append(v float32) error { return b.appendValue(v) } -func (b *Float32DictionaryBuilder) InsertDictValues(arr *Float32) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(v); err != nil { - break - } - } - return -} - -type Float64DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Float64DictionaryBuilder) Append(v float64) error { return b.appendValue(v) } -func (b *Float64DictionaryBuilder) InsertDictValues(arr *Float64) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(v); err != nil { - break - } - } - return -} - -type BinaryDictionaryBuilder struct { - dictionaryBuilder -} - -func (b *BinaryDictionaryBuilder) Append(v []byte) error { - if v == nil { - b.AppendNull() - return nil - } - - return b.appendBytes(v) -} - -func (b *BinaryDictionaryBuilder) AppendString(v string) error { return b.appendBytes([]byte(v)) } -func (b *BinaryDictionaryBuilder) InsertDictValues(arr *Binary) (err error) { - if !arrow.TypeEqual(arr.DataType(), b.dt.ValueType) { - return fmt.Errorf("dictionary insert type mismatch: cannot insert values of type %T to dictionary type %T", arr.DataType(), b.dt.ValueType) - } - - for i := 0; i < arr.Len(); i++ { - if err = b.insertDictBytes(arr.Value(i)); err != nil { - break - } - } - return -} -func (b *BinaryDictionaryBuilder) InsertStringDictValues(arr *String) (err error) { - if !arrow.TypeEqual(arr.DataType(), b.dt.ValueType) { - return fmt.Errorf("dictionary insert type mismatch: cannot insert values of type %T to dictionary type %T", arr.DataType(), b.dt.ValueType) - } - - for i := 0; i < arr.Len(); i++ { - if err = b.insertDictValue(arr.Value(i)); err != nil { - break - } - } - return -} - -func (b *BinaryDictionaryBuilder) GetValueIndex(i int) int { - switch b := b.idxBuilder.Builder.(type) { - case *Uint8Builder: - return int(b.Value(i)) - case *Int8Builder: - return int(b.Value(i)) - case *Uint16Builder: - return int(b.Value(i)) - case *Int16Builder: - return int(b.Value(i)) - case *Uint32Builder: - return int(b.Value(i)) - case *Int32Builder: - return int(b.Value(i)) - case *Uint64Builder: - return int(b.Value(i)) - case *Int64Builder: - return int(b.Value(i)) - default: - return -1 - } -} - -func (b *BinaryDictionaryBuilder) Value(i int) []byte { - switch mt := b.memoTable.(type) { - case *hashing.BinaryMemoTable: - return mt.Value(i) - } - return nil -} - -func (b *BinaryDictionaryBuilder) ValueStr(i int) string { - return string(b.Value(i)) -} - -type FixedSizeBinaryDictionaryBuilder struct { - dictionaryBuilder - byteWidth int -} - -func (b *FixedSizeBinaryDictionaryBuilder) Append(v []byte) error { - return b.appendValue(v[:b.byteWidth]) -} -func (b *FixedSizeBinaryDictionaryBuilder) InsertDictValues(arr *FixedSizeBinary) (err error) { - var ( - beg = arr.array.data.offset * b.byteWidth - end = (arr.array.data.offset + arr.data.length) * b.byteWidth - ) - data := arr.valueBytes[beg:end] - for len(data) > 0 { - if err = b.insertDictValue(data[:b.byteWidth]); err != nil { - break - } - data = data[b.byteWidth:] - } - return -} - -type Decimal128DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Decimal128DictionaryBuilder) Append(v decimal128.Num) error { - return b.appendValue((*(*[arrow.Decimal128SizeBytes]byte)(unsafe.Pointer(&v)))[:]) -} -func (b *Decimal128DictionaryBuilder) InsertDictValues(arr *Decimal128) (err error) { - data := arrow.Decimal128Traits.CastToBytes(arr.values) - for len(data) > 0 { - if err = b.insertDictValue(data[:arrow.Decimal128SizeBytes]); err != nil { - break - } - data = data[arrow.Decimal128SizeBytes:] - } - return -} - -type Decimal256DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Decimal256DictionaryBuilder) Append(v decimal256.Num) error { - return b.appendValue((*(*[arrow.Decimal256SizeBytes]byte)(unsafe.Pointer(&v)))[:]) -} -func (b *Decimal256DictionaryBuilder) InsertDictValues(arr *Decimal256) (err error) { - data := arrow.Decimal256Traits.CastToBytes(arr.values) - for len(data) > 0 { - if err = b.insertDictValue(data[:arrow.Decimal256SizeBytes]); err != nil { - break - } - data = data[arrow.Decimal256SizeBytes:] - } - return -} - -type MonthDayNanoDictionaryBuilder struct { - dictionaryBuilder -} - -func (b *MonthDayNanoDictionaryBuilder) Append(v arrow.MonthDayNanoInterval) error { - return b.appendValue((*(*[arrow.MonthDayNanoIntervalSizeBytes]byte)(unsafe.Pointer(&v)))[:]) -} -func (b *MonthDayNanoDictionaryBuilder) InsertDictValues(arr *MonthDayNanoInterval) (err error) { - data := arrow.MonthDayNanoIntervalTraits.CastToBytes(arr.values) - for len(data) > 0 { - if err = b.insertDictValue(data[:arrow.MonthDayNanoIntervalSizeBytes]); err != nil { - break - } - data = data[arrow.MonthDayNanoIntervalSizeBytes:] - } - return -} - -type DayTimeDictionaryBuilder struct { - dictionaryBuilder -} - -func (b *DayTimeDictionaryBuilder) Append(v arrow.DayTimeInterval) error { - return b.appendValue((*(*[arrow.DayTimeIntervalSizeBytes]byte)(unsafe.Pointer(&v)))[:]) -} -func (b *DayTimeDictionaryBuilder) InsertDictValues(arr *DayTimeInterval) (err error) { - data := arrow.DayTimeIntervalTraits.CastToBytes(arr.values) - for len(data) > 0 { - if err = b.insertDictValue(data[:arrow.DayTimeIntervalSizeBytes]); err != nil { - break - } - data = data[arrow.DayTimeIntervalSizeBytes:] - } - return -} - -func IsTrivialTransposition(transposeMap []int32) bool { - for i, t := range transposeMap { - if t != int32(i) { - return false - } - } - return true -} - -func TransposeDictIndices(mem memory.Allocator, data arrow.ArrayData, inType, outType arrow.DataType, dict arrow.ArrayData, transposeMap []int32) (arrow.ArrayData, error) { - // inType may be different from data->dtype if data is ExtensionType - if inType.ID() != arrow.DICTIONARY || outType.ID() != arrow.DICTIONARY { - return nil, errors.New("arrow/array: expected dictionary type") - } - - var ( - inDictType = inType.(*arrow.DictionaryType) - outDictType = outType.(*arrow.DictionaryType) - inIndexType = inDictType.IndexType - outIndexType = outDictType.IndexType.(arrow.FixedWidthDataType) - ) - - if inIndexType.ID() == outIndexType.ID() && IsTrivialTransposition(transposeMap) { - // index type and values will be identical, we can reuse the existing buffers - return NewDataWithDictionary(outType, data.Len(), []*memory.Buffer{data.Buffers()[0], data.Buffers()[1]}, - data.NullN(), data.Offset(), dict.(*Data)), nil - } - - // default path: compute the transposed indices as a new buffer - outBuf := memory.NewResizableBuffer(mem) - outBuf.Resize(data.Len() * int(bitutil.BytesForBits(int64(outIndexType.BitWidth())))) - defer outBuf.Release() - - // shift null buffer if original offset is non-zero - var nullBitmap *memory.Buffer - if data.Offset() != 0 && data.NullN() != 0 { - nullBitmap = memory.NewResizableBuffer(mem) - nullBitmap.Resize(int(bitutil.BytesForBits(int64(data.Len())))) - bitutil.CopyBitmap(data.Buffers()[0].Bytes(), data.Offset(), data.Len(), nullBitmap.Bytes(), 0) - defer nullBitmap.Release() - } else { - nullBitmap = data.Buffers()[0] - } - - outData := NewDataWithDictionary(outType, data.Len(), - []*memory.Buffer{nullBitmap, outBuf}, data.NullN(), 0, dict.(*Data)) - err := utils.TransposeIntsBuffers(inIndexType, outIndexType, - data.Buffers()[1].Bytes(), outBuf.Bytes(), data.Offset(), outData.offset, data.Len(), transposeMap) - return outData, err -} - -// DictionaryUnifier defines the interface used for unifying, and optionally producing -// transposition maps for, multiple dictionary arrays incrementally. -type DictionaryUnifier interface { - // Unify adds the provided array of dictionary values to be unified. - Unify(arrow.Array) error - // UnifyAndTranspose adds the provided array of dictionary values, - // just like Unify but returns an allocated buffer containing a mapping - // to transpose dictionary indices. - UnifyAndTranspose(dict arrow.Array) (transposed *memory.Buffer, err error) - // GetResult returns the dictionary type (choosing the smallest index type - // that can represent all the values) and the new unified dictionary. - // - // Calling GetResult clears the existing dictionary from the unifier so it - // can be reused by calling Unify/UnifyAndTranspose again with new arrays. - GetResult() (outType arrow.DataType, outDict arrow.Array, err error) - // GetResultWithIndexType is like GetResult, but allows specifying the type - // of the dictionary indexes rather than letting the unifier pick. If the - // passed in index type isn't large enough to represent all of the dictionary - // values, an error will be returned instead. The new unified dictionary - // is returned. - GetResultWithIndexType(indexType arrow.DataType) (arrow.Array, error) - // Release should be called to clean up any allocated scratch memo-table used - // for building the unified dictionary. - Release() -} - -type unifier struct { - mem memory.Allocator - valueType arrow.DataType - memoTable hashing.MemoTable -} - -// NewDictionaryUnifier constructs and returns a new dictionary unifier for dictionaries -// of valueType, using the provided allocator for allocating the unified dictionary -// and the memotable used for building it. -// -// This will only work for non-nested types currently. a nested valueType or dictionary type -// will result in an error. -func NewDictionaryUnifier(alloc memory.Allocator, valueType arrow.DataType) (DictionaryUnifier, error) { - memoTable, err := createMemoTable(alloc, valueType) - if err != nil { - return nil, err - } - return &unifier{ - mem: alloc, - valueType: valueType, - memoTable: memoTable, - }, nil -} - -func (u *unifier) Release() { - if bin, ok := u.memoTable.(*hashing.BinaryMemoTable); ok { - bin.Release() - } -} - -func (u *unifier) Unify(dict arrow.Array) (err error) { - if !arrow.TypeEqual(u.valueType, dict.DataType()) { - return fmt.Errorf("dictionary type different from unifier: %s, expected: %s", dict.DataType(), u.valueType) - } - - valFn := getvalFn(dict) - for i := 0; i < dict.Len(); i++ { - if dict.IsNull(i) { - u.memoTable.GetOrInsertNull() - continue - } - - if _, _, err = u.memoTable.GetOrInsert(valFn(i)); err != nil { - return err - } - } - return -} - -func (u *unifier) UnifyAndTranspose(dict arrow.Array) (transposed *memory.Buffer, err error) { - if !arrow.TypeEqual(u.valueType, dict.DataType()) { - return nil, fmt.Errorf("dictionary type different from unifier: %s, expected: %s", dict.DataType(), u.valueType) - } - - transposed = memory.NewResizableBuffer(u.mem) - transposed.Resize(arrow.Int32Traits.BytesRequired(dict.Len())) - - newIdxes := arrow.Int32Traits.CastFromBytes(transposed.Bytes()) - valFn := getvalFn(dict) - for i := 0; i < dict.Len(); i++ { - if dict.IsNull(i) { - idx, _ := u.memoTable.GetOrInsertNull() - newIdxes[i] = int32(idx) - continue - } - - idx, _, err := u.memoTable.GetOrInsert(valFn(i)) - if err != nil { - transposed.Release() - return nil, err - } - newIdxes[i] = int32(idx) - } - return -} - -func (u *unifier) GetResult() (outType arrow.DataType, outDict arrow.Array, err error) { - dictLen := u.memoTable.Size() - var indexType arrow.DataType - switch { - case dictLen <= math.MaxInt8: - indexType = arrow.PrimitiveTypes.Int8 - case dictLen <= math.MaxInt16: - indexType = arrow.PrimitiveTypes.Int16 - case dictLen <= math.MaxInt32: - indexType = arrow.PrimitiveTypes.Int32 - default: - indexType = arrow.PrimitiveTypes.Int64 - } - outType = &arrow.DictionaryType{IndexType: indexType, ValueType: u.valueType} - - dictData, err := GetDictArrayData(u.mem, u.valueType, u.memoTable, 0) - if err != nil { - return nil, nil, err - } - - u.memoTable.Reset() - - defer dictData.Release() - outDict = MakeFromData(dictData) - return -} - -func (u *unifier) GetResultWithIndexType(indexType arrow.DataType) (arrow.Array, error) { - dictLen := u.memoTable.Size() - var toobig bool - switch indexType.ID() { - case arrow.UINT8: - toobig = dictLen > math.MaxUint8 - case arrow.INT8: - toobig = dictLen > math.MaxInt8 - case arrow.UINT16: - toobig = dictLen > math.MaxUint16 - case arrow.INT16: - toobig = dictLen > math.MaxInt16 - case arrow.UINT32: - toobig = uint(dictLen) > math.MaxUint32 - case arrow.INT32: - toobig = dictLen > math.MaxInt32 - case arrow.UINT64: - toobig = uint64(dictLen) > uint64(math.MaxUint64) - case arrow.INT64: - default: - return nil, fmt.Errorf("arrow/array: invalid dictionary index type: %s, must be integral", indexType) - } - if toobig { - return nil, errors.New("arrow/array: cannot combine dictionaries. unified dictionary requires a larger index type") - } - - dictData, err := GetDictArrayData(u.mem, u.valueType, u.memoTable, 0) - if err != nil { - return nil, err - } - - u.memoTable.Reset() - - defer dictData.Release() - return MakeFromData(dictData), nil -} - -type binaryUnifier struct { - mem memory.Allocator - memoTable *hashing.BinaryMemoTable -} - -// NewBinaryDictionaryUnifier constructs and returns a new dictionary unifier for dictionaries -// of binary values, using the provided allocator for allocating the unified dictionary -// and the memotable used for building it. -func NewBinaryDictionaryUnifier(alloc memory.Allocator) DictionaryUnifier { - return &binaryUnifier{ - mem: alloc, - memoTable: hashing.NewBinaryMemoTable(0, 0, NewBinaryBuilder(alloc, arrow.BinaryTypes.Binary)), - } -} - -func (u *binaryUnifier) Release() { - u.memoTable.Release() -} - -func (u *binaryUnifier) Unify(dict arrow.Array) (err error) { - if !arrow.TypeEqual(arrow.BinaryTypes.Binary, dict.DataType()) { - return fmt.Errorf("dictionary type different from unifier: %s, expected: %s", dict.DataType(), arrow.BinaryTypes.Binary) - } - - typedDict := dict.(*Binary) - for i := 0; i < dict.Len(); i++ { - if dict.IsNull(i) { - u.memoTable.GetOrInsertNull() - continue - } - - if _, _, err = u.memoTable.GetOrInsertBytes(typedDict.Value(i)); err != nil { - return err - } - } - return -} - -func (u *binaryUnifier) UnifyAndTranspose(dict arrow.Array) (transposed *memory.Buffer, err error) { - if !arrow.TypeEqual(arrow.BinaryTypes.Binary, dict.DataType()) { - return nil, fmt.Errorf("dictionary type different from unifier: %s, expected: %s", dict.DataType(), arrow.BinaryTypes.Binary) - } - - transposed = memory.NewResizableBuffer(u.mem) - transposed.Resize(arrow.Int32Traits.BytesRequired(dict.Len())) - - newIdxes := arrow.Int32Traits.CastFromBytes(transposed.Bytes()) - typedDict := dict.(*Binary) - for i := 0; i < dict.Len(); i++ { - if dict.IsNull(i) { - idx, _ := u.memoTable.GetOrInsertNull() - newIdxes[i] = int32(idx) - continue - } - - idx, _, err := u.memoTable.GetOrInsertBytes(typedDict.Value(i)) - if err != nil { - transposed.Release() - return nil, err - } - newIdxes[i] = int32(idx) - } - return -} - -func (u *binaryUnifier) GetResult() (outType arrow.DataType, outDict arrow.Array, err error) { - dictLen := u.memoTable.Size() - var indexType arrow.DataType - switch { - case dictLen <= math.MaxInt8: - indexType = arrow.PrimitiveTypes.Int8 - case dictLen <= math.MaxInt16: - indexType = arrow.PrimitiveTypes.Int16 - case dictLen <= math.MaxInt32: - indexType = arrow.PrimitiveTypes.Int32 - default: - indexType = arrow.PrimitiveTypes.Int64 - } - outType = &arrow.DictionaryType{IndexType: indexType, ValueType: arrow.BinaryTypes.Binary} - - dictData, err := GetDictArrayData(u.mem, arrow.BinaryTypes.Binary, u.memoTable, 0) - if err != nil { - return nil, nil, err - } - - u.memoTable.Reset() - - defer dictData.Release() - outDict = MakeFromData(dictData) - return -} - -func (u *binaryUnifier) GetResultWithIndexType(indexType arrow.DataType) (arrow.Array, error) { - dictLen := u.memoTable.Size() - var toobig bool - switch indexType.ID() { - case arrow.UINT8: - toobig = dictLen > math.MaxUint8 - case arrow.INT8: - toobig = dictLen > math.MaxInt8 - case arrow.UINT16: - toobig = dictLen > math.MaxUint16 - case arrow.INT16: - toobig = dictLen > math.MaxInt16 - case arrow.UINT32: - toobig = uint(dictLen) > math.MaxUint32 - case arrow.INT32: - toobig = dictLen > math.MaxInt32 - case arrow.UINT64: - toobig = uint64(dictLen) > uint64(math.MaxUint64) - case arrow.INT64: - default: - return nil, fmt.Errorf("arrow/array: invalid dictionary index type: %s, must be integral", indexType) - } - if toobig { - return nil, errors.New("arrow/array: cannot combine dictionaries. unified dictionary requires a larger index type") - } - - dictData, err := GetDictArrayData(u.mem, arrow.BinaryTypes.Binary, u.memoTable, 0) - if err != nil { - return nil, err - } - - u.memoTable.Reset() - - defer dictData.Release() - return MakeFromData(dictData), nil -} - -func unifyRecursive(mem memory.Allocator, typ arrow.DataType, chunks []*Data) (changed bool, err error) { - debug.Assert(len(chunks) != 0, "must provide non-zero length chunk slice") - var extType arrow.DataType - - if typ.ID() == arrow.EXTENSION { - extType = typ - typ = typ.(arrow.ExtensionType).StorageType() - } - - if nestedTyp, ok := typ.(arrow.NestedType); ok { - children := make([]*Data, len(chunks)) - for i, f := range nestedTyp.Fields() { - for j, c := range chunks { - children[j] = c.childData[i].(*Data) - } - - childChanged, err := unifyRecursive(mem, f.Type, children) - if err != nil { - return false, err - } - if childChanged { - // only when unification actually occurs - for j := range chunks { - chunks[j].childData[i] = children[j] - } - changed = true - } - } - } - - if typ.ID() == arrow.DICTIONARY { - dictType := typ.(*arrow.DictionaryType) - var ( - uni DictionaryUnifier - newDict arrow.Array - ) - // unify any nested dictionaries first, but the unifier doesn't support - // nested dictionaries yet so this would fail. - uni, err = NewDictionaryUnifier(mem, dictType.ValueType) - if err != nil { - return changed, err - } - defer uni.Release() - transposeMaps := make([]*memory.Buffer, len(chunks)) - for i, c := range chunks { - debug.Assert(c.dictionary != nil, "missing dictionary data for dictionary array") - arr := MakeFromData(c.dictionary) - defer arr.Release() - if transposeMaps[i], err = uni.UnifyAndTranspose(arr); err != nil { - return - } - defer transposeMaps[i].Release() - } - - if newDict, err = uni.GetResultWithIndexType(dictType.IndexType); err != nil { - return - } - defer newDict.Release() - - for j := range chunks { - chnk, err := TransposeDictIndices(mem, chunks[j], typ, typ, newDict.Data(), arrow.Int32Traits.CastFromBytes(transposeMaps[j].Bytes())) - if err != nil { - return changed, err - } - chunks[j].Release() - chunks[j] = chnk.(*Data) - if extType != nil { - chunks[j].dtype = extType - } - } - changed = true - } - - return -} - -// UnifyChunkedDicts takes a chunked array of dictionary type and will unify -// the dictionary across all of the chunks with the returned chunked array -// having all chunks share the same dictionary. -// -// The return from this *must* have Release called on it unless an error is returned -// in which case the *arrow.Chunked will be nil. -// -// If there is 1 or fewer chunks, then nothing is modified and this function will just -// call Retain on the passed in Chunked array (so Release can safely be called on it). -// The same is true if the type of the array is not a dictionary or if no changes are -// needed for all of the chunks to be using the same dictionary. -func UnifyChunkedDicts(alloc memory.Allocator, chnkd *arrow.Chunked) (*arrow.Chunked, error) { - if len(chnkd.Chunks()) <= 1 { - chnkd.Retain() - return chnkd, nil - } - - chunksData := make([]*Data, len(chnkd.Chunks())) - for i, c := range chnkd.Chunks() { - c.Data().Retain() - chunksData[i] = c.Data().(*Data) - } - changed, err := unifyRecursive(alloc, chnkd.DataType(), chunksData) - if err != nil || !changed { - for _, c := range chunksData { - c.Release() - } - if err == nil { - chnkd.Retain() - } else { - chnkd = nil - } - return chnkd, err - } - - chunks := make([]arrow.Array, len(chunksData)) - for i, c := range chunksData { - chunks[i] = MakeFromData(c) - defer chunks[i].Release() - c.Release() - } - - return arrow.NewChunked(chnkd.DataType(), chunks), nil -} - -// UnifyTableDicts performs UnifyChunkedDicts on each column of the table so that -// any dictionary column will have the dictionaries of its chunks unified. -// -// The returned Table should always be Release'd unless a non-nil error was returned, -// in which case the table returned will be nil. -func UnifyTableDicts(alloc memory.Allocator, table arrow.Table) (arrow.Table, error) { - cols := make([]arrow.Column, table.NumCols()) - for i := 0; i < int(table.NumCols()); i++ { - chnkd, err := UnifyChunkedDicts(alloc, table.Column(i).Data()) - if err != nil { - return nil, err - } - defer chnkd.Release() - cols[i] = *arrow.NewColumn(table.Schema().Field(i), chnkd) - defer cols[i].Release() - } - return NewTable(table.Schema(), cols, table.NumRows()), nil -} - -var ( - _ arrow.Array = (*Dictionary)(nil) - _ Builder = (*dictionaryBuilder)(nil) -) diff --git a/go/arrow/array/dictionary_test.go b/go/arrow/array/dictionary_test.go deleted file mode 100644 index ea9587d8dcdf9..0000000000000 --- a/go/arrow/array/dictionary_test.go +++ /dev/null @@ -1,1918 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "fmt" - "math" - "math/rand" - "reflect" - "strings" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/decimal256" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/types" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "github.com/stretchr/testify/suite" -) - -type PrimitiveDictionaryTestSuite struct { - suite.Suite - - mem *memory.CheckedAllocator - typ arrow.DataType - reftyp reflect.Type -} - -func (p *PrimitiveDictionaryTestSuite) SetupTest() { - p.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) -} - -func (p *PrimitiveDictionaryTestSuite) TearDownTest() { - p.mem.AssertSize(p.T(), 0) -} - -func TestPrimitiveDictionaryBuilders(t *testing.T) { - tests := []struct { - name string - typ arrow.DataType - reftyp reflect.Type - }{ - {"int8", arrow.PrimitiveTypes.Int8, reflect.TypeOf(int8(0))}, - {"uint8", arrow.PrimitiveTypes.Uint8, reflect.TypeOf(uint8(0))}, - {"int16", arrow.PrimitiveTypes.Int16, reflect.TypeOf(int16(0))}, - {"uint16", arrow.PrimitiveTypes.Uint16, reflect.TypeOf(uint16(0))}, - {"int32", arrow.PrimitiveTypes.Int32, reflect.TypeOf(int32(0))}, - {"uint32", arrow.PrimitiveTypes.Uint32, reflect.TypeOf(uint32(0))}, - {"int64", arrow.PrimitiveTypes.Int64, reflect.TypeOf(int64(0))}, - {"uint64", arrow.PrimitiveTypes.Uint64, reflect.TypeOf(uint64(0))}, - {"float32", arrow.PrimitiveTypes.Float32, reflect.TypeOf(float32(0))}, - {"float64", arrow.PrimitiveTypes.Float64, reflect.TypeOf(float64(0))}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - suite.Run(t, &PrimitiveDictionaryTestSuite{typ: tt.typ, reftyp: tt.reftyp}) - }) - } -} - -func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderBasic() { - expectedType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: p.typ} - bldr := array.NewDictionaryBuilder(p.mem, expectedType) - defer bldr.Release() - - builder := reflect.ValueOf(bldr) - appfn := builder.MethodByName("Append") - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - bldr.AppendNull() - - p.EqualValues(4, bldr.Len()) - p.EqualValues(1, bldr.NullN()) - - p.EqualValues(2, bldr.DictionarySize()) - - arr := bldr.NewArray().(*array.Dictionary) - defer arr.Release() - - p.True(arrow.TypeEqual(expectedType, arr.DataType())) - expectedDict, _, err := array.FromJSON(p.mem, expectedType.ValueType, strings.NewReader("[1, 2]")) - p.NoError(err) - defer expectedDict.Release() - - expectedIndices, _, err := array.FromJSON(p.mem, expectedType.IndexType, strings.NewReader("[0, 1, 0, null]")) - p.NoError(err) - defer expectedIndices.Release() - - expected := array.NewDictionaryArray(expectedType, expectedIndices, expectedDict) - defer expected.Release() - - p.True(array.Equal(expected, arr)) -} - -func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderInit() { - valueType := p.typ - dictArr, _, err := array.FromJSON(p.mem, valueType, strings.NewReader("[1, 2]")) - p.NoError(err) - defer dictArr.Release() - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: valueType} - bldr := array.NewDictionaryBuilderWithDict(p.mem, dictType, dictArr) - defer bldr.Release() - - builder := reflect.ValueOf(bldr) - appfn := builder.MethodByName("Append") - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - bldr.AppendNull() - - p.EqualValues(4, bldr.Len()) - p.EqualValues(1, bldr.NullN()) - - arr := bldr.NewDictionaryArray() - defer arr.Release() - - expectedIndices, _, err := array.FromJSON(p.mem, dictType.IndexType, strings.NewReader("[0, 1, 0, null]")) - p.NoError(err) - defer expectedIndices.Release() - - expected := array.NewDictionaryArray(dictType, expectedIndices, dictArr) - defer expected.Release() - - p.True(array.Equal(expected, arr)) -} - -func (p *PrimitiveDictionaryTestSuite) TestDictionaryNewBuilder() { - valueType := p.typ - dictArr, _, err := array.FromJSON(p.mem, valueType, strings.NewReader("[1, 2]")) - p.NoError(err) - defer dictArr.Release() - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: valueType} - bldr := array.NewBuilder(p.mem, dictType) - defer bldr.Release() - - builder := reflect.ValueOf(bldr) - appfn := builder.MethodByName("Append") - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - bldr.AppendNull() - - p.EqualValues(4, bldr.Len()) - p.EqualValues(1, bldr.NullN()) - - arr := bldr.NewArray().(*array.Dictionary) - defer arr.Release() - - expectedIndices, _, err := array.FromJSON(p.mem, dictType.IndexType, strings.NewReader("[0, 1, 0, null]")) - p.NoError(err) - defer expectedIndices.Release() - - expected := array.NewDictionaryArray(dictType, expectedIndices, dictArr) - defer expected.Release() - - p.True(array.Equal(expected, arr)) -} - -func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderAppendArr() { - valueType := p.typ - intermediate, _, err := array.FromJSON(p.mem, valueType, strings.NewReader("[1, 2, 1]")) - p.NoError(err) - defer intermediate.Release() - - expectedType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: p.typ} - bldr := array.NewDictionaryBuilder(p.mem, expectedType) - defer bldr.Release() - - bldr.AppendArray(intermediate) - result := bldr.NewArray() - defer result.Release() - - expectedDict, _, err := array.FromJSON(p.mem, expectedType.ValueType, strings.NewReader("[1, 2]")) - p.NoError(err) - defer expectedDict.Release() - - expectedIndices, _, err := array.FromJSON(p.mem, expectedType.IndexType, strings.NewReader("[0, 1, 0]")) - p.NoError(err) - defer expectedIndices.Release() - - expected := array.NewDictionaryArray(expectedType, expectedIndices, expectedDict) - defer expected.Release() - - p.True(array.Equal(expected, result)) -} - -func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderDeltaDictionary() { - expectedType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: p.typ} - bldr := array.NewDictionaryBuilder(p.mem, expectedType) - defer bldr.Release() - - builder := reflect.ValueOf(bldr) - appfn := builder.MethodByName("Append") - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) - - result := bldr.NewArray() - defer result.Release() - - exdict, _, err := array.FromJSON(p.mem, p.typ, strings.NewReader("[1, 2]")) - p.NoError(err) - defer exdict.Release() - exindices, _, err := array.FromJSON(p.mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0, 1]")) - p.NoError(err) - defer exindices.Release() - expected := array.NewDictionaryArray(result.DataType().(*arrow.DictionaryType), exindices, exdict) - defer expected.Release() - p.True(array.Equal(expected, result)) - - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface()) - - indices, delta, err := bldr.NewDelta() - p.NoError(err) - defer indices.Release() - defer delta.Release() - - exindices, _, _ = array.FromJSON(p.mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[1, 2, 2, 0, 2]")) - defer exindices.Release() - exdelta, _, _ := array.FromJSON(p.mem, p.typ, strings.NewReader("[3]")) - defer exdelta.Release() - - p.True(array.Equal(exindices, indices)) - p.True(array.Equal(exdelta, delta)) -} - -func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderDoubleDeltaDictionary() { - expectedType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: p.typ} - bldr := array.NewDictionaryBuilder(p.mem, expectedType) - defer bldr.Release() - - builder := reflect.ValueOf(bldr) - appfn := builder.MethodByName("Append") - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) - - result := bldr.NewArray() - defer result.Release() - - exdict, _, err := array.FromJSON(p.mem, p.typ, strings.NewReader("[1, 2]")) - p.NoError(err) - defer exdict.Release() - exindices, _, err := array.FromJSON(p.mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0, 1]")) - p.NoError(err) - defer exindices.Release() - expected := array.NewDictionaryArray(result.DataType().(*arrow.DictionaryType), exindices, exdict) - defer expected.Release() - p.True(array.Equal(expected, result)) - - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface()) - - indices, delta, err := bldr.NewDelta() - p.NoError(err) - defer indices.Release() - defer delta.Release() - - exindices, _, _ = array.FromJSON(p.mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[1, 2, 2, 0, 2]")) - defer exindices.Release() - exdelta, _, _ := array.FromJSON(p.mem, p.typ, strings.NewReader("[3]")) - defer exdelta.Release() - - p.True(array.Equal(exindices, indices)) - p.True(array.Equal(exdelta, delta)) - - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(4).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(5).Convert(p.reftyp)})[0].Interface()) - - indices, delta, err = bldr.NewDelta() - p.NoError(err) - defer indices.Release() - defer delta.Release() - - exindices, _, _ = array.FromJSON(p.mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 2, 3, 4]")) - defer exindices.Release() - exdelta, _, _ = array.FromJSON(p.mem, p.typ, strings.NewReader("[4, 5]")) - defer exdelta.Release() - - p.True(array.Equal(exindices, indices)) - p.True(array.Equal(exdelta, delta)) -} - -func (p *PrimitiveDictionaryTestSuite) TestNewResetBehavior() { - expectedType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: p.typ} - bldr := array.NewDictionaryBuilder(p.mem, expectedType) - defer bldr.Release() - - builder := reflect.ValueOf(bldr) - appfn := builder.MethodByName("Append") - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - bldr.AppendNull() - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) - - p.Less(0, bldr.Cap()) - p.Less(0, bldr.NullN()) - p.Equal(4, bldr.Len()) - - result := bldr.NewDictionaryArray() - defer result.Release() - - p.Zero(bldr.Cap()) - p.Zero(bldr.Len()) - p.Zero(bldr.NullN()) - - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface()) - bldr.AppendNull() - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(4).Convert(p.reftyp)})[0].Interface()) - - result = bldr.NewDictionaryArray() - defer result.Release() - - p.Equal(4, result.Dictionary().Len()) -} - -func (p *PrimitiveDictionaryTestSuite) TestResetFull() { - expectedType := &arrow.DictionaryType{IndexType: &arrow.Int32Type{}, ValueType: p.typ} - bldr := array.NewDictionaryBuilder(p.mem, expectedType) - defer bldr.Release() - - builder := reflect.ValueOf(bldr) - appfn := builder.MethodByName("Append") - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - bldr.AppendNull() - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) - - result := bldr.NewDictionaryArray() - defer result.Release() - - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface()) - result = bldr.NewDictionaryArray() - defer result.Release() - - exindices, _, _ := array.FromJSON(p.mem, arrow.PrimitiveTypes.Int32, strings.NewReader("[2]")) - exdict, _, _ := array.FromJSON(p.mem, p.typ, strings.NewReader("[1, 2, 3]")) - defer exindices.Release() - defer exdict.Release() - - p.True(array.Equal(exindices, result.Indices())) - p.True(array.Equal(exdict, result.Dictionary())) - - bldr.ResetFull() - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(4).Convert(p.reftyp)})[0].Interface()) - result = bldr.NewDictionaryArray() - defer result.Release() - - exindices, _, _ = array.FromJSON(p.mem, arrow.PrimitiveTypes.Int32, strings.NewReader("[0]")) - exdict, _, _ = array.FromJSON(p.mem, p.typ, strings.NewReader("[4]")) - defer exindices.Release() - defer exdict.Release() - - p.True(array.Equal(exindices, result.Indices())) - p.True(array.Equal(exdict, result.Dictionary())) -} - -func (p *PrimitiveDictionaryTestSuite) TestStringRoundTrip() { - dt := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: p.typ} - b := array.NewDictionaryBuilder(p.mem, dt) - defer b.Release() - - builder := reflect.ValueOf(b) - fn := builder.MethodByName("Append") - p.Nil(fn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - p.Nil(fn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) - p.Nil(fn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - b.AppendNull() - - p.EqualValues(4, b.Len()) - p.EqualValues(1, b.NullN()) - - arr := b.NewArray().(*array.Dictionary) - defer arr.Release() - p.True(arrow.TypeEqual(dt, arr.DataType())) - - b1 := array.NewDictionaryBuilder(p.mem, dt) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - p.NoError(b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Dictionary) - defer arr1.Release() - - p.Equal(arr.Len(), arr1.Len()) - p.True(array.Equal(arr, arr1)) -} - -func TestBasicStringDictionaryBuilder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: arrow.BinaryTypes.String} - bldr := array.NewDictionaryBuilder(mem, dictType) - defer bldr.Release() - - builder := bldr.(*array.BinaryDictionaryBuilder) - assert.NoError(t, builder.Append([]byte("test"))) - assert.NoError(t, builder.AppendString("test2")) - assert.NoError(t, builder.AppendString("test")) - - assert.Equal(t, "test", builder.ValueStr(builder.GetValueIndex(0))) - assert.Equal(t, "test2", builder.ValueStr(builder.GetValueIndex(1))) - assert.Equal(t, "test", builder.ValueStr(builder.GetValueIndex(2))) - - result := bldr.NewDictionaryArray() - defer result.Release() - - exdict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["test", "test2"]`)) - defer exdict.Release() - exint, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0]")) - defer exint.Release() - - assert.True(t, arrow.TypeEqual(dictType, result.DataType())) - expected := array.NewDictionaryArray(dictType, exint, exdict) - defer expected.Release() - - assert.True(t, array.Equal(expected, result)) -} - -func TestStringDictionaryInsertValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - exdict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["c", "a", "b", "d"]`)) - defer exdict.Release() - - invalidDict, _, err := array.FromJSON(mem, arrow.BinaryTypes.Binary, strings.NewReader(`["ZQ==", "Zg=="]`)) - assert.NoError(t, err) - defer invalidDict.Release() - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int16Type{}, ValueType: arrow.BinaryTypes.String} - bldr := array.NewDictionaryBuilder(mem, dictType) - defer bldr.Release() - - builder := bldr.(*array.BinaryDictionaryBuilder) - assert.NoError(t, builder.InsertStringDictValues(exdict.(*array.String))) - // inserting again should have no effect - assert.NoError(t, builder.InsertStringDictValues(exdict.(*array.String))) - - assert.Error(t, builder.InsertDictValues(invalidDict.(*array.Binary))) - - for i := 0; i < 2; i++ { - builder.AppendString("c") - builder.AppendString("a") - builder.AppendString("b") - builder.AppendNull() - builder.AppendString("d") - } - - assert.Equal(t, 10, bldr.Len()) - - result := bldr.NewDictionaryArray() - defer result.Release() - - exindices, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int16, strings.NewReader("[0, 1, 2, null, 3, 0, 1, 2, null, 3]")) - defer exindices.Release() - expected := array.NewDictionaryArray(dictType, exindices, exdict) - defer expected.Release() - assert.True(t, array.Equal(expected, result)) -} - -func TestStringDictionaryBuilderInit(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictArr, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["test", "test2"]`)) - defer dictArr.Release() - intarr, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0]")) - defer intarr.Release() - - dictType := &arrow.DictionaryType{IndexType: intarr.DataType().(arrow.FixedWidthDataType), ValueType: arrow.BinaryTypes.String} - bldr := array.NewDictionaryBuilderWithDict(mem, dictType, dictArr) - defer bldr.Release() - - builder := bldr.(*array.BinaryDictionaryBuilder) - assert.NoError(t, builder.AppendString("test")) - assert.NoError(t, builder.AppendString("test2")) - assert.NoError(t, builder.AppendString("test")) - - result := bldr.NewDictionaryArray() - defer result.Release() - - expected := array.NewDictionaryArray(dictType, intarr, dictArr) - defer expected.Release() - - assert.True(t, array.Equal(expected, result)) -} - -func TestStringDictionaryBuilderOnlyNull(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: arrow.BinaryTypes.String} - bldr := array.NewDictionaryBuilder(mem, dictType) - defer bldr.Release() - - bldr.AppendNull() - result := bldr.NewDictionaryArray() - defer result.Release() - - dict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader("[]")) - defer dict.Release() - intarr, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[null]")) - defer intarr.Release() - - expected := array.NewDictionaryArray(dictType, intarr, dict) - defer expected.Release() - - assert.True(t, array.Equal(expected, result)) -} - -func TestStringDictionaryBuilderDelta(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: arrow.BinaryTypes.String} - bldr := array.NewDictionaryBuilder(mem, dictType) - defer bldr.Release() - - builder := bldr.(*array.BinaryDictionaryBuilder) - assert.NoError(t, builder.AppendString("test")) - assert.NoError(t, builder.AppendString("test2")) - assert.NoError(t, builder.AppendString("test")) - - result := bldr.NewDictionaryArray() - defer result.Release() - - exdict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["test", "test2"]`)) - defer exdict.Release() - exint, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0]")) - defer exint.Release() - - assert.True(t, arrow.TypeEqual(dictType, result.DataType())) - expected := array.NewDictionaryArray(dictType, exint, exdict) - defer expected.Release() - - assert.True(t, array.Equal(expected, result)) - - assert.NoError(t, builder.AppendString("test2")) - assert.NoError(t, builder.AppendString("test3")) - assert.NoError(t, builder.AppendString("test2")) - - indices, delta, err := builder.NewDelta() - assert.NoError(t, err) - defer indices.Release() - defer delta.Release() - - exdelta, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["test3"]`)) - defer exdelta.Release() - exint, _, _ = array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[1, 2, 1]")) - defer exint.Release() - - assert.True(t, array.Equal(exdelta, delta)) - assert.True(t, array.Equal(exint, indices)) -} - -func TestStringDictionaryBuilderBigDelta(t *testing.T) { - const testlen = 2048 - - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int16Type{}, ValueType: arrow.BinaryTypes.String} - bldr := array.NewDictionaryBuilder(mem, dictType) - defer bldr.Release() - builder := bldr.(*array.BinaryDictionaryBuilder) - - strbldr := array.NewStringBuilder(mem) - defer strbldr.Release() - - intbldr := array.NewInt16Builder(mem) - defer intbldr.Release() - - for idx := int16(0); idx < testlen; idx++ { - var b strings.Builder - b.WriteString("test") - fmt.Fprint(&b, idx) - - val := b.String() - assert.NoError(t, builder.AppendString(val)) - strbldr.Append(val) - intbldr.Append(idx) - } - - result := bldr.NewDictionaryArray() - defer result.Release() - strarr := strbldr.NewStringArray() - defer strarr.Release() - intarr := intbldr.NewInt16Array() - defer intarr.Release() - - expected := array.NewDictionaryArray(dictType, intarr, strarr) - defer expected.Release() - - assert.True(t, array.Equal(expected, result)) - - strbldr2 := array.NewStringBuilder(mem) - defer strbldr2.Release() - intbldr2 := array.NewInt16Builder(mem) - defer intbldr2.Release() - - for idx := int16(0); idx < testlen; idx++ { - builder.AppendString("test1") - intbldr2.Append(1) - } - for idx := int16(0); idx < testlen; idx++ { - builder.AppendString("test_new_value1") - intbldr2.Append(testlen) - } - strbldr2.Append("test_new_value1") - - indices2, delta2, err := bldr.NewDelta() - assert.NoError(t, err) - defer indices2.Release() - defer delta2.Release() - strarr2 := strbldr2.NewStringArray() - defer strarr2.Release() - intarr2 := intbldr2.NewInt16Array() - defer intarr2.Release() - - assert.True(t, array.Equal(intarr2, indices2)) - assert.True(t, array.Equal(strarr2, delta2)) - - strbldr3 := array.NewStringBuilder(mem) - defer strbldr3.Release() - intbldr3 := array.NewInt16Builder(mem) - defer intbldr3.Release() - - for idx := int16(0); idx < testlen; idx++ { - assert.NoError(t, builder.AppendString("test2")) - intbldr3.Append(2) - } - for idx := int16(0); idx < testlen; idx++ { - assert.NoError(t, builder.AppendString("test_new_value2")) - intbldr3.Append(testlen + 1) - } - strbldr3.Append("test_new_value2") - - indices3, delta3, err := bldr.NewDelta() - assert.NoError(t, err) - defer indices3.Release() - defer delta3.Release() - strarr3 := strbldr3.NewStringArray() - defer strarr3.Release() - intarr3 := intbldr3.NewInt16Array() - defer intarr3.Release() - - assert.True(t, array.Equal(intarr3, indices3)) - assert.True(t, array.Equal(strarr3, delta3)) -} - -func TestStringDictionaryBuilderIsNull(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: arrow.BinaryTypes.String} - bldr := array.NewDictionaryBuilder(mem, dictType) - defer bldr.Release() - - builder := bldr.(*array.BinaryDictionaryBuilder) - assert.NoError(t, builder.AppendString("test")) - builder.AppendNull() - assert.NoError(t, builder.AppendString("test2")) - assert.NoError(t, builder.AppendString("test")) - - assert.False(t, bldr.IsNull(0)) - assert.True(t, bldr.IsNull(1)) - assert.False(t, bldr.IsNull(2)) - assert.False(t, bldr.IsNull(3)) -} - -func TestFixedSizeBinaryDictionaryBuilder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: &arrow.FixedSizeBinaryType{ByteWidth: 4}} - bldr := array.NewDictionaryBuilder(mem, dictType) - defer bldr.Release() - - builder := bldr.(*array.FixedSizeBinaryDictionaryBuilder) - test := []byte{12, 12, 11, 12} - test2 := []byte{12, 12, 11, 11} - assert.NoError(t, builder.Append(test)) - assert.NoError(t, builder.Append(test2)) - assert.NoError(t, builder.Append(test)) - - result := builder.NewDictionaryArray() - defer result.Release() - - fsbBldr := array.NewFixedSizeBinaryBuilder(mem, dictType.ValueType.(*arrow.FixedSizeBinaryType)) - defer fsbBldr.Release() - - fsbBldr.Append(test) - fsbBldr.Append(test2) - fsbArr := fsbBldr.NewFixedSizeBinaryArray() - defer fsbArr.Release() - - intbldr := array.NewInt8Builder(mem) - defer intbldr.Release() - - intbldr.AppendValues([]int8{0, 1, 0}, nil) - intArr := intbldr.NewInt8Array() - defer intArr.Release() - - expected := array.NewDictionaryArray(dictType, intArr, fsbArr) - defer expected.Release() - - assert.True(t, array.Equal(expected, result)) -} - -func TestFixedSizeBinaryDictionaryBuilderInit(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - fsbBldr := array.NewFixedSizeBinaryBuilder(mem, &arrow.FixedSizeBinaryType{ByteWidth: 4}) - defer fsbBldr.Release() - - test, test2 := []byte("abcd"), []byte("wxyz") - fsbBldr.AppendValues([][]byte{test, test2}, nil) - dictArr := fsbBldr.NewFixedSizeBinaryArray() - defer dictArr.Release() - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: dictArr.DataType()} - bldr := array.NewDictionaryBuilderWithDict(mem, dictType, dictArr) - defer bldr.Release() - - builder := bldr.(*array.FixedSizeBinaryDictionaryBuilder) - assert.NoError(t, builder.Append(test)) - assert.NoError(t, builder.Append(test2)) - assert.NoError(t, builder.Append(test)) - - result := builder.NewDictionaryArray() - defer result.Release() - - indices, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0]")) - defer indices.Release() - - expected := array.NewDictionaryArray(dictType, indices, dictArr) - defer expected.Release() - - assert.True(t, array.Equal(expected, result)) -} - -func TestFixedSizeBinaryDictionaryBuilderMakeBuilder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - fsbBldr := array.NewFixedSizeBinaryBuilder(mem, &arrow.FixedSizeBinaryType{ByteWidth: 4}) - defer fsbBldr.Release() - - test, test2 := []byte("abcd"), []byte("wxyz") - fsbBldr.AppendValues([][]byte{test, test2}, nil) - dictArr := fsbBldr.NewFixedSizeBinaryArray() - defer dictArr.Release() - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: dictArr.DataType()} - bldr := array.NewBuilder(mem, dictType) - defer bldr.Release() - - builder := bldr.(*array.FixedSizeBinaryDictionaryBuilder) - assert.NoError(t, builder.Append(test)) - assert.NoError(t, builder.Append(test2)) - assert.NoError(t, builder.Append(test)) - - result := builder.NewDictionaryArray() - defer result.Release() - - indices, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0]")) - defer indices.Release() - - expected := array.NewDictionaryArray(dictType, indices, dictArr) - defer expected.Release() - - assert.True(t, array.Equal(expected, result)) -} - -func TestFixedSizeBinaryDictionaryBuilderDeltaDictionary(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: &arrow.FixedSizeBinaryType{ByteWidth: 4}} - bldr := array.NewDictionaryBuilder(mem, dictType) - defer bldr.Release() - - builder := bldr.(*array.FixedSizeBinaryDictionaryBuilder) - test := []byte{12, 12, 11, 12} - test2 := []byte{12, 12, 11, 11} - test3 := []byte{12, 12, 11, 10} - - assert.NoError(t, builder.Append(test)) - assert.NoError(t, builder.Append(test2)) - assert.NoError(t, builder.Append(test)) - - result1 := bldr.NewDictionaryArray() - defer result1.Release() - - fsbBuilder := array.NewFixedSizeBinaryBuilder(mem, dictType.ValueType.(*arrow.FixedSizeBinaryType)) - defer fsbBuilder.Release() - - fsbBuilder.AppendValues([][]byte{test, test2}, nil) - fsbArr1 := fsbBuilder.NewFixedSizeBinaryArray() - defer fsbArr1.Release() - - intBuilder := array.NewInt8Builder(mem) - defer intBuilder.Release() - intBuilder.AppendValues([]int8{0, 1, 0}, nil) - intArr1 := intBuilder.NewInt8Array() - defer intArr1.Release() - - expected := array.NewDictionaryArray(dictType, intArr1, fsbArr1) - defer expected.Release() - assert.True(t, array.Equal(expected, result1)) - - assert.NoError(t, builder.Append(test)) - assert.NoError(t, builder.Append(test2)) - assert.NoError(t, builder.Append(test3)) - - indices2, delta2, err := builder.NewDelta() - assert.NoError(t, err) - defer indices2.Release() - defer delta2.Release() - - fsbBuilder.Append(test3) - fsbArr2 := fsbBuilder.NewFixedSizeBinaryArray() - defer fsbArr2.Release() - - intBuilder.AppendValues([]int8{0, 1, 2}, nil) - intArr2 := intBuilder.NewInt8Array() - defer intArr2.Release() - - assert.True(t, array.Equal(intArr2, indices2)) - assert.True(t, array.Equal(fsbArr2, delta2)) -} - -func TestFixedSizeBinaryDictionaryStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: &arrow.FixedSizeBinaryType{ByteWidth: 4}} - b := array.NewDictionaryBuilder(mem, dictType) - defer b.Release() - - builder := b.(*array.FixedSizeBinaryDictionaryBuilder) - test := []byte{12, 12, 11, 12} - test2 := []byte{12, 12, 11, 11} - assert.NoError(t, builder.Append(test)) - assert.NoError(t, builder.Append(test2)) - assert.NoError(t, builder.Append(test)) - - arr := builder.NewDictionaryArray() - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewDictionaryBuilder(mem, dictType) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Dictionary) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestDecimal128DictionaryBuilderBasic(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - test := []decimal128.Num{decimal128.FromI64(12), decimal128.FromI64(12), decimal128.FromI64(11), decimal128.FromI64(12)} - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: &arrow.Decimal128Type{Precision: 2, Scale: 0}} - bldr := array.NewDictionaryBuilder(mem, dictType) - defer bldr.Release() - - builder := bldr.(*array.Decimal128DictionaryBuilder) - for _, v := range test { - assert.NoError(t, builder.Append(v)) - } - - result := bldr.NewDictionaryArray() - defer result.Release() - - indices, _, _ := array.FromJSON(mem, dictType.IndexType, strings.NewReader("[0, 0, 1, 0]")) - defer indices.Release() - dict, _, _ := array.FromJSON(mem, dictType.ValueType, strings.NewReader("[12, 11]")) - defer dict.Release() - - expected := array.NewDictionaryArray(dictType, indices, dict) - defer expected.Release() - - assert.True(t, array.ApproxEqual(expected, result)) -} - -func TestDecimal256DictionaryBuilderBasic(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - test := []decimal256.Num{decimal256.FromI64(12), decimal256.FromI64(12), decimal256.FromI64(11), decimal256.FromI64(12)} - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: &arrow.Decimal256Type{Precision: 2, Scale: 0}} - bldr := array.NewDictionaryBuilder(mem, dictType) - defer bldr.Release() - - builder := bldr.(*array.Decimal256DictionaryBuilder) - for _, v := range test { - assert.NoError(t, builder.Append(v)) - } - - result := bldr.NewDictionaryArray() - defer result.Release() - - indices, _, _ := array.FromJSON(mem, dictType.IndexType, strings.NewReader("[0, 0, 1, 0]")) - defer indices.Release() - dict, _, _ := array.FromJSON(mem, dictType.ValueType, strings.NewReader("[12, 11]")) - defer dict.Release() - - expected := array.NewDictionaryArray(dictType, indices, dict) - defer expected.Release() - - assert.True(t, array.ApproxEqual(expected, result)) -} - -func TestNullDictionaryBuilderBasic(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: arrow.Null} - bldr := array.NewBuilder(mem, dictType) - defer bldr.Release() - - builder := bldr.(*array.NullDictionaryBuilder) - builder.AppendNulls(3) - assert.Equal(t, 3, builder.Len()) - assert.Equal(t, 3, builder.NullN()) - - nullarr, _, _ := array.FromJSON(mem, arrow.Null, strings.NewReader("[null, null, null]")) - defer nullarr.Release() - - assert.NoError(t, builder.AppendArray(nullarr)) - assert.Equal(t, 6, bldr.Len()) - assert.Equal(t, 6, bldr.NullN()) - - result := builder.NewDictionaryArray() - defer result.Release() - assert.Equal(t, 6, result.Len()) - assert.Equal(t, 6, result.NullN()) -} - -func TestDictionaryEquals(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - var ( - isValid = []bool{true, true, false, true, true, true} - dict, dict2 arrow.Array - indices, indices2, indices3 arrow.Array - ) - - dict, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["foo", "bar", "baz"]`)) - defer dict.Release() - dictType := &arrow.DictionaryType{IndexType: &arrow.Uint16Type{}, ValueType: arrow.BinaryTypes.String} - - dict2, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["foo", "bar", "baz", "qux"]`)) - defer dict2.Release() - dictType2 := &arrow.DictionaryType{IndexType: &arrow.Uint16Type{}, ValueType: arrow.BinaryTypes.String} - - idxbuilder := array.NewUint16Builder(mem) - defer idxbuilder.Release() - - idxbuilder.AppendValues([]uint16{1, 2, math.MaxUint16, 0, 2, 0}, isValid) - indices = idxbuilder.NewArray() - defer indices.Release() - - idxbuilder.AppendValues([]uint16{1, 2, 0, 0, 2, 0}, isValid) - indices2 = idxbuilder.NewArray() - defer indices2.Release() - - idxbuilder.AppendValues([]uint16{1, 1, 0, 0, 2, 0}, isValid) - indices3 = idxbuilder.NewArray() - defer indices3.Release() - - var ( - arr = array.NewDictionaryArray(dictType, indices, dict) - arr2 = array.NewDictionaryArray(dictType, indices2, dict) - arr3 = array.NewDictionaryArray(dictType2, indices, dict2) - arr4 = array.NewDictionaryArray(dictType, indices3, dict) - ) - defer func() { - arr.Release() - arr2.Release() - arr3.Release() - arr4.Release() - }() - - assert.True(t, array.Equal(arr, arr)) - // equal because the unequal index is masked by null - assert.True(t, array.Equal(arr, arr2)) - // unequal dictionaries - assert.False(t, array.Equal(arr, arr3)) - // unequal indices - assert.False(t, array.Equal(arr, arr4)) - assert.True(t, array.SliceEqual(arr, 3, 6, arr4, 3, 6)) - assert.False(t, array.SliceEqual(arr, 1, 3, arr4, 1, 3)) - - sz := arr.Len() - slice := array.NewSlice(arr, 2, int64(sz)) - defer slice.Release() - slice2 := array.NewSlice(arr, 2, int64(sz)) - defer slice2.Release() - - assert.Equal(t, sz-2, slice.Len()) - assert.True(t, array.Equal(slice, slice2)) - assert.True(t, array.SliceEqual(arr, 2, int64(arr.Len()), slice, 0, int64(slice.Len()))) - - // chained slice - slice2 = array.NewSlice(arr, 1, int64(arr.Len())) - defer slice2.Release() - slice2 = array.NewSlice(slice2, 1, int64(slice2.Len())) - defer slice2.Release() - - assert.True(t, array.Equal(slice, slice2)) - slice = array.NewSlice(arr, 1, 4) - defer slice.Release() - slice2 = array.NewSlice(arr, 1, 4) - defer slice2.Release() - - assert.Equal(t, 3, slice.Len()) - assert.True(t, array.Equal(slice, slice2)) - assert.True(t, array.SliceEqual(arr, 1, 4, slice, 0, int64(slice.Len()))) -} - -func TestDictionaryIndexTypes(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictIndexTypes := []arrow.DataType{ - arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Uint8, - arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Uint16, - arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Uint32, - arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Uint64, - } - - for _, indextyp := range dictIndexTypes { - t.Run(indextyp.Name(), func(t *testing.T) { - scope := memory.NewCheckedAllocatorScope(mem) - defer scope.CheckSize(t) - - dictType := &arrow.DictionaryType{IndexType: indextyp, ValueType: arrow.BinaryTypes.String} - bldr := array.NewDictionaryBuilder(mem, dictType) - defer bldr.Release() - - builder := bldr.(*array.BinaryDictionaryBuilder) - builder.AppendString("foo") - builder.AppendString("bar") - builder.AppendString("foo") - builder.AppendString("baz") - builder.Append(nil) - - assert.Equal(t, 5, builder.Len()) - assert.Equal(t, 1, builder.NullN()) - - result := builder.NewDictionaryArray() - defer result.Release() - - expectedIndices, _, _ := array.FromJSON(mem, indextyp, strings.NewReader("[0, 1, 0, 2, null]")) - defer expectedIndices.Release() - - assert.True(t, array.Equal(expectedIndices, result.Indices())) - }) - } -} - -func TestDictionaryFromArrays(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["foo", "bar", "baz"]`)) - defer dict.Release() - - dictIndexTypes := []arrow.DataType{ - arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Uint8, - arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Uint16, - arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Uint32, - arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Uint64, - } - - for _, indextyp := range dictIndexTypes { - t.Run(indextyp.Name(), func(t *testing.T) { - scope := memory.NewCheckedAllocatorScope(mem) - defer scope.CheckSize(t) - - dictType := &arrow.DictionaryType{IndexType: indextyp, ValueType: arrow.BinaryTypes.String} - indices1, _, _ := array.FromJSON(mem, indextyp, strings.NewReader("[1, 2, 0, 0, 2, 0]")) - defer indices1.Release() - - indices2, _, _ := array.FromJSON(mem, indextyp, strings.NewReader("[1, 2, 0, 3, 2, 0]")) - defer indices2.Release() - - arr1, err := array.NewValidatedDictionaryArray(dictType, indices1, dict) - assert.NoError(t, err) - defer arr1.Release() - - _, err = array.NewValidatedDictionaryArray(dictType, indices2, dict) - assert.Error(t, err) - - switch indextyp.ID() { - case arrow.INT8, arrow.INT16, arrow.INT32, arrow.INT64: - indices3, _, _ := array.FromJSON(mem, indextyp, strings.NewReader("[1, 2, 0, null, 2, 0]")) - defer indices3.Release() - bitutil.ClearBit(indices3.Data().Buffers()[0].Bytes(), 2) - arr3, err := array.NewValidatedDictionaryArray(dictType, indices3, dict) - assert.NoError(t, err) - defer arr3.Release() - } - - indices4, _, _ := array.FromJSON(mem, indextyp, strings.NewReader("[1, 2, null, 3, 2, 0]")) - defer indices4.Release() - _, err = array.NewValidatedDictionaryArray(dictType, indices4, dict) - assert.Error(t, err) - - diffIndexType := arrow.PrimitiveTypes.Int8 - if indextyp.ID() == arrow.INT8 { - diffIndexType = arrow.PrimitiveTypes.Uint8 - } - _, err = array.NewValidatedDictionaryArray(&arrow.DictionaryType{IndexType: diffIndexType, ValueType: arrow.BinaryTypes.String}, indices4, dict) - assert.Error(t, err) - }) - } -} - -func TestListOfDictionary(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - rootBuilder := array.NewBuilder(mem, arrow.ListOf(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int16, ValueType: arrow.BinaryTypes.String})) - defer rootBuilder.Release() - - listBldr := rootBuilder.(*array.ListBuilder) - dictBldr := listBldr.ValueBuilder().(*array.BinaryDictionaryBuilder) - - listBldr.Append(true) - expected := []string{} - for _, a := range []byte("abc") { - for _, d := range []byte("def") { - for _, g := range []byte("ghi") { - for _, j := range []byte("jkl") { - for _, m := range []byte("mno") { - for _, p := range []byte("pqr") { - if a+d+g+j+m+p%16 == 0 { - listBldr.Append(true) - } - - str := string([]byte{a, d, g, j, m, p}) - dictBldr.AppendString(str) - expected = append(expected, str) - } - } - } - } - } - } - - strbldr := array.NewStringBuilder(mem) - defer strbldr.Release() - strbldr.AppendValues(expected, nil) - - expectedDict := strbldr.NewStringArray() - defer expectedDict.Release() - - arr := rootBuilder.NewArray() - defer arr.Release() - - actualDict := arr.(*array.List).ListValues().(*array.Dictionary) - assert.True(t, array.Equal(expectedDict, actualDict.Dictionary())) -} - -func TestDictionaryCanCompareIndices(t *testing.T) { - makeDict := func(mem memory.Allocator, idxType, valueType arrow.DataType, dictJSON string) *array.Dictionary { - indices, _, _ := array.FromJSON(mem, idxType, strings.NewReader("[]")) - defer indices.Release() - dict, _, _ := array.FromJSON(mem, valueType, strings.NewReader(dictJSON)) - defer dict.Release() - - out, _ := array.NewValidatedDictionaryArray(&arrow.DictionaryType{IndexType: idxType, ValueType: valueType}, indices, dict) - return out - } - - compareSwap := func(t *testing.T, l, r *array.Dictionary, expected bool) { - assert.Equalf(t, expected, l.CanCompareIndices(r), "left: %s\nright: %s\n", l, r) - assert.Equalf(t, expected, r.CanCompareIndices(l), "left: %s\nright: %s\n", r, l) - } - - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - t.Run("same", func(t *testing.T) { - arr := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "bar"]`) - defer arr.Release() - same := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "bar"]`) - defer same.Release() - compareSwap(t, arr, same, true) - }) - - t.Run("prefix dict", func(t *testing.T) { - arr := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "bar", "quux"]`) - defer arr.Release() - prefixDict := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "bar"]`) - defer prefixDict.Release() - compareSwap(t, arr, prefixDict, true) - }) - - t.Run("indices need cast", func(t *testing.T) { - arr := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "bar"]`) - defer arr.Release() - needcast := makeDict(mem, arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String, `["foo", "bar"]`) - defer needcast.Release() - compareSwap(t, arr, needcast, false) - }) - - t.Run("non prefix", func(t *testing.T) { - arr := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "bar", "quux"]`) - defer arr.Release() - nonPrefix := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "blink"]`) - defer nonPrefix.Release() - compareSwap(t, arr, nonPrefix, false) - }) -} - -func TestDictionaryGetValueIndex(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - indicesJson := "[5, 0, 1, 3, 2, 4]" - indices64, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(indicesJson)) - defer indices64.Release() - dict, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader("[10, 20, 30, 40, 50, 60]")) - defer dict.Release() - - dictIndexTypes := []arrow.DataType{ - arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Uint8, - arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Uint16, - arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Uint32, - arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Uint64, - } - i64Index := indices64.(*array.Int64) - for _, idxt := range dictIndexTypes { - t.Run(idxt.Name(), func(t *testing.T) { - indices, _, _ := array.FromJSON(mem, idxt, strings.NewReader(indicesJson)) - defer indices.Release() - dictType := &arrow.DictionaryType{IndexType: idxt, ValueType: arrow.PrimitiveTypes.Int32} - - dictArr := array.NewDictionaryArray(dictType, indices, dict) - defer dictArr.Release() - - const offset = 1 - slicedDictArr := array.NewSlice(dictArr, offset, int64(dictArr.Len())) - defer slicedDictArr.Release() - assert.EqualValues(t, "10", slicedDictArr.(*array.Dictionary).ValueStr(0)) - for i := 0; i < indices.Len(); i++ { - assert.EqualValues(t, i64Index.Value(i), dictArr.GetValueIndex(i)) - if i < slicedDictArr.Len() { - assert.EqualValues(t, i64Index.Value(i+offset), slicedDictArr.(*array.Dictionary).GetValueIndex(i)) - } - } - }) - } -} - -func checkTransposeMap(t *testing.T, b *memory.Buffer, exp []int32) bool { - got := arrow.Int32Traits.CastFromBytes(b.Bytes()) - return assert.Equal(t, exp, got) -} - -func TestDictionaryUnifierNumeric(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := arrow.PrimitiveTypes.Int64 - - d1, _, err := array.FromJSON(mem, dictType, strings.NewReader(`[3, 4, 7]`)) - require.NoError(t, err) - d2, _, err := array.FromJSON(mem, dictType, strings.NewReader(`[1, 7, 4, 8]`)) - require.NoError(t, err) - d3, _, err := array.FromJSON(mem, dictType, strings.NewReader(`[1, -200]`)) - require.NoError(t, err) - - expected := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: dictType} - expectedDict, _, err := array.FromJSON(mem, dictType, strings.NewReader(`[3, 4, 7, 1, 8, -200]`)) - require.NoError(t, err) - defer func() { - d1.Release() - d2.Release() - d3.Release() - expectedDict.Release() - }() - - unifier, err := array.NewDictionaryUnifier(mem, dictType) - assert.NoError(t, err) - defer unifier.Release() - - assert.NoError(t, unifier.Unify(d1)) - assert.NoError(t, unifier.Unify(d2)) - assert.NoError(t, unifier.Unify(d3)) - - invalid, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[1, -200]`)) - defer invalid.Release() - assert.EqualError(t, unifier.Unify(invalid), "dictionary type different from unifier: int32, expected: int64") - - outType, outDict, err := unifier.GetResult() - assert.NoError(t, err) - defer outDict.Release() - assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected) - assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict) - - b1, err := unifier.UnifyAndTranspose(d1) - assert.NoError(t, err) - b2, err := unifier.UnifyAndTranspose(d2) - assert.NoError(t, err) - b3, err := unifier.UnifyAndTranspose(d3) - assert.NoError(t, err) - - outType, outDict, err = unifier.GetResult() - assert.NoError(t, err) - defer func() { - outDict.Release() - b1.Release() - b2.Release() - b3.Release() - }() - - assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected) - assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict) - - checkTransposeMap(t, b1, []int32{0, 1, 2}) - checkTransposeMap(t, b2, []int32{3, 2, 1, 4}) - checkTransposeMap(t, b3, []int32{3, 5}) -} - -func TestDictionaryUnifierString(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := arrow.BinaryTypes.String - d1, _, err := array.FromJSON(mem, dictType, strings.NewReader(`["foo", "bar"]`)) - require.NoError(t, err) - defer d1.Release() - - d2, _, err := array.FromJSON(mem, dictType, strings.NewReader(`["quux", "foo"]`)) - require.NoError(t, err) - defer d2.Release() - - expected := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: dictType} - expectedDict, _, _ := array.FromJSON(mem, dictType, strings.NewReader(`["foo", "bar", "quux"]`)) - defer expectedDict.Release() - - unifier, err := array.NewDictionaryUnifier(mem, dictType) - assert.NoError(t, err) - defer unifier.Release() - - assert.NoError(t, unifier.Unify(d1)) - assert.NoError(t, unifier.Unify(d2)) - outType, outDict, err := unifier.GetResult() - assert.NoError(t, err) - defer outDict.Release() - - assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected) - assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict) - - b1, err := unifier.UnifyAndTranspose(d1) - assert.NoError(t, err) - b2, err := unifier.UnifyAndTranspose(d2) - assert.NoError(t, err) - - outType, outDict, err = unifier.GetResult() - assert.NoError(t, err) - defer func() { - outDict.Release() - b1.Release() - b2.Release() - }() - - assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected) - assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict) - - checkTransposeMap(t, b1, []int32{0, 1}) - checkTransposeMap(t, b2, []int32{2, 0}) -} - -func TestDictionaryUnifierBinary(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := arrow.BinaryTypes.Binary - d1, _, err := array.FromJSON(mem, dictType, strings.NewReader(`["Zm9vCg==", "YmFyCg=="]`)) // base64("foo\n"), base64("bar\n") - require.NoError(t, err) - defer d1.Release() - - d2, _, err := array.FromJSON(mem, dictType, strings.NewReader(`["cXV1eAo=", "Zm9vCg=="]`)) // base64("quux\n"), base64("foo\n") - require.NoError(t, err) - defer d2.Release() - - expected := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: dictType} - expectedDict, _, _ := array.FromJSON(mem, dictType, strings.NewReader(`["Zm9vCg==", "YmFyCg==", "cXV1eAo="]`)) - defer expectedDict.Release() - - unifier := array.NewBinaryDictionaryUnifier(mem) - defer unifier.Release() - - assert.NoError(t, unifier.Unify(d1)) - assert.NoError(t, unifier.Unify(d2)) - outType, outDict, err := unifier.GetResult() - assert.NoError(t, err) - defer outDict.Release() - - assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected) - assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict) - - b1, err := unifier.UnifyAndTranspose(d1) - assert.NoError(t, err) - b2, err := unifier.UnifyAndTranspose(d2) - assert.NoError(t, err) - - outType, outDict, err = unifier.GetResult() - assert.NoError(t, err) - defer func() { - outDict.Release() - b1.Release() - b2.Release() - }() - - assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected) - assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict) - - checkTransposeMap(t, b1, []int32{0, 1}) - checkTransposeMap(t, b2, []int32{2, 0}) -} - -func TestDictionaryUnifierFixedSizeBinary(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := &arrow.FixedSizeBinaryType{ByteWidth: 3} - data := memory.NewBufferBytes([]byte(`foobarbazqux`)) - - fsbData := array.NewData(dictType, 2, []*memory.Buffer{nil, memory.SliceBuffer(data, 0, 6)}, nil, 0, 0) - defer fsbData.Release() - d1 := array.NewFixedSizeBinaryData(fsbData) - fsbData = array.NewData(dictType, 3, []*memory.Buffer{nil, memory.SliceBuffer(data, 3, 9)}, nil, 0, 0) - defer fsbData.Release() - d2 := array.NewFixedSizeBinaryData(fsbData) - - fsbData = array.NewData(dictType, 4, []*memory.Buffer{nil, data}, nil, 0, 0) - defer fsbData.Release() - expectedDict := array.NewFixedSizeBinaryData(fsbData) - expected := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: dictType} - - unifier, err := array.NewDictionaryUnifier(mem, dictType) - assert.NoError(t, err) - - defer func() { - d1.Release() - d2.Release() - expectedDict.Release() - unifier.Release() - }() - - assert.NoError(t, unifier.Unify(d1)) - assert.NoError(t, unifier.Unify(d2)) - outType, outDict, err := unifier.GetResult() - assert.NoError(t, err) - defer outDict.Release() - - assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected) - assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict) - - b1, err := unifier.UnifyAndTranspose(d1) - assert.NoError(t, err) - b2, err := unifier.UnifyAndTranspose(d2) - assert.NoError(t, err) - - outType, outDict, err = unifier.GetResult() - assert.NoError(t, err) - defer func() { - outDict.Release() - b1.Release() - b2.Release() - }() - - assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected) - assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict) - - checkTransposeMap(t, b1, []int32{0, 1}) - checkTransposeMap(t, b2, []int32{1, 2, 3}) -} - -func TestDictionaryUnifierLarge(t *testing.T) { - // unifying larger dictionaries should choose the right index type - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - bldr := array.NewInt32Builder(mem) - defer bldr.Release() - bldr.Reserve(120) - for i := int32(0); i < 120; i++ { - bldr.UnsafeAppend(i) - } - - d1 := bldr.NewInt32Array() - defer d1.Release() - assert.EqualValues(t, 120, d1.Len()) - - bldr.Reserve(30) - for i := int32(110); i < 140; i++ { - bldr.UnsafeAppend(i) - } - - d2 := bldr.NewInt32Array() - defer d2.Release() - assert.EqualValues(t, 30, d2.Len()) - - bldr.Reserve(140) - for i := int32(0); i < 140; i++ { - bldr.UnsafeAppend(i) - } - - expectedDict := bldr.NewInt32Array() - defer expectedDict.Release() - assert.EqualValues(t, 140, expectedDict.Len()) - - // int8 would be too narrow to hold all the values - expected := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int16, ValueType: arrow.PrimitiveTypes.Int32} - - unifier, err := array.NewDictionaryUnifier(mem, arrow.PrimitiveTypes.Int32) - assert.NoError(t, err) - defer unifier.Release() - - assert.NoError(t, unifier.Unify(d1)) - assert.NoError(t, unifier.Unify(d2)) - outType, outDict, err := unifier.GetResult() - assert.NoError(t, err) - defer outDict.Release() - - assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected) - assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict) -} - -func checkDictionaryArray(t *testing.T, arr, expectedVals, expectedIndices arrow.Array) bool { - require.IsType(t, (*array.Dictionary)(nil), arr) - dictArr := arr.(*array.Dictionary) - ret := true - ret = ret && assert.Truef(t, array.Equal(expectedVals, dictArr.Dictionary()), "got: %s, expected: %s", dictArr.Dictionary(), expectedVals) - return ret && assert.Truef(t, array.Equal(expectedIndices, dictArr.Indices()), "got: %s, expected: %s", dictArr.Indices(), expectedIndices) -} - -func TestDictionaryUnifierSimpleChunkedArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.BinaryTypes.String} - chunk1, _, _ := array.FromJSON(mem, dictType, strings.NewReader(`["ab", "cd", null, "cd"]`)) - chunk2, _, _ := array.FromJSON(mem, dictType, strings.NewReader(`["ef", "cd", "ef"]`)) - chunk3, _, _ := array.FromJSON(mem, dictType, strings.NewReader(`["ef", "ab", null, "ab"]`)) - chunk4, _, _ := array.FromJSON(mem, dictType, strings.NewReader(`[]`)) - chunked := arrow.NewChunked(dictType, []arrow.Array{chunk1, chunk2, chunk3, chunk4}) - defer func() { - chunk1.Release() - chunk2.Release() - chunk3.Release() - chunk4.Release() - chunked.Release() - }() - - unified, err := array.UnifyChunkedDicts(mem, chunked) - assert.NoError(t, err) - defer unified.Release() - - assert.Len(t, unified.Chunks(), 4) - expectedDict, _, _ := array.FromJSON(mem, dictType.ValueType, strings.NewReader(`["ab", "cd", "ef"]`)) - defer expectedDict.Release() - - c1Indices, _, _ := array.FromJSON(mem, dictType.IndexType, strings.NewReader(`[0, 1, null, 1]`)) - defer c1Indices.Release() - c2Indices, _, _ := array.FromJSON(mem, dictType.IndexType, strings.NewReader(`[2, 1, 2]`)) - defer c2Indices.Release() - c3Indices, _, _ := array.FromJSON(mem, dictType.IndexType, strings.NewReader(`[2, 0, null, 0]`)) - defer c3Indices.Release() - c4Indices, _, _ := array.FromJSON(mem, dictType.IndexType, strings.NewReader(`[]`)) - defer c4Indices.Release() - checkDictionaryArray(t, unified.Chunk(0), expectedDict, c1Indices) - checkDictionaryArray(t, unified.Chunk(1), expectedDict, c2Indices) - checkDictionaryArray(t, unified.Chunk(2), expectedDict, c3Indices) - checkDictionaryArray(t, unified.Chunk(3), expectedDict, c4Indices) -} - -func TestDictionaryUnifierChunkedArrayZeroChunks(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.BinaryTypes.String} - chunked := arrow.NewChunked(dictType, []arrow.Array{}) - unified, err := array.UnifyChunkedDicts(mem, chunked) - assert.NoError(t, err) - assert.True(t, array.ChunkedEqual(unified, chunked)) -} - -func TestDictionaryUnifierChunkedArrayOneChunk(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.BinaryTypes.String} - chunk1, _, _ := array.FromJSON(mem, dictType, strings.NewReader(`["ab", "cd", null, "cd"]`)) - defer chunk1.Release() - - chunked := arrow.NewChunked(dictType, []arrow.Array{chunk1}) - defer chunked.Release() - - unified, err := array.UnifyChunkedDicts(mem, chunked) - assert.NoError(t, err) - defer unified.Release() - - assert.True(t, array.ChunkedEqual(unified, chunked)) - assert.Same(t, unified, chunked) -} - -func TestDictionaryUnifierChunkedArrayNoDict(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - typ := arrow.PrimitiveTypes.Int8 - chunk1, _, _ := array.FromJSON(mem, typ, strings.NewReader(`[1, 1, 2, 3]`)) - defer chunk1.Release() - - chunk2, _, _ := array.FromJSON(mem, typ, strings.NewReader(`[5, 8, 13]`)) - defer chunk2.Release() - - chunked := arrow.NewChunked(typ, []arrow.Array{chunk1, chunk2}) - defer chunked.Release() - - unified, err := array.UnifyChunkedDicts(mem, chunked) - assert.NoError(t, err) - defer unified.Release() - - assert.True(t, array.ChunkedEqual(unified, chunked)) - assert.Same(t, unified, chunked) -} - -func TestDictionaryUnifierChunkedArrayNested(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - typ := arrow.ListOf(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int16, ValueType: arrow.BinaryTypes.String}) - chunk1, _, err := array.FromJSON(mem, typ, strings.NewReader(`[["ab", "cd"], ["cd"]]`)) - assert.NoError(t, err) - // defer chunk1.Release() - chunk2, _, err := array.FromJSON(mem, typ, strings.NewReader(`[[], ["ef", "cd", "ef"]]`)) - assert.NoError(t, err) - // defer chunk2.Release() - chunked := arrow.NewChunked(typ, []arrow.Array{chunk1, chunk2}) - // defer chunked.Release() - - unified, err := array.UnifyChunkedDicts(mem, chunked) - assert.NoError(t, err) - // defer unified.Release() - assert.Len(t, unified.Chunks(), 2) - - expectedDict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["ab", "cd", "ef"]`)) - // defer expectedDict.Release() - - unified1 := unified.Chunk(0).(*array.List) - assert.Equal(t, []int32{0, 2, 3}, unified1.Offsets()) - expectedIndices1, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int16, strings.NewReader(`[0, 1, 1]`)) - // defer expectedIndices1.Release() - checkDictionaryArray(t, unified1.ListValues(), expectedDict, expectedIndices1) - - unified2 := unified.Chunk(1).(*array.List) - assert.Equal(t, []int32{0, 0, 3}, unified2.Offsets()) - expectedIndices2, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int16, strings.NewReader(`[2, 1, 2]`)) - // defer expectedIndices2.Release() - checkDictionaryArray(t, unified2.ListValues(), expectedDict, expectedIndices2) - defer func() { - expectedIndices1.Release() - expectedIndices2.Release() - expectedDict.Release() - unified.Release() - chunked.Release() - chunk2.Release() - chunk1.Release() - }() -} - -func TestDictionaryUnifierChunkedArrayExtension(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dt := types.NewDictExtensionType() - chunk1, _, err := array.FromJSON(mem, dt, strings.NewReader(`["ab", null, "cd", "ab"]`)) - assert.NoError(t, err) - defer chunk1.Release() - - chunk2, _, err := array.FromJSON(mem, dt, strings.NewReader(`["ef", "ab", "ab"]`)) - assert.NoError(t, err) - defer chunk2.Release() - - chunked := arrow.NewChunked(dt, []arrow.Array{chunk1, chunk2}) - defer chunked.Release() - unified, err := array.UnifyChunkedDicts(mem, chunked) - assert.NoError(t, err) - defer unified.Release() - assert.Len(t, unified.Chunks(), 2) - - expectedDict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["ab", "cd", "ef"]`)) - defer expectedDict.Release() - - unified1 := unified.Chunk(0).(array.ExtensionArray) - assert.Truef(t, arrow.TypeEqual(dt, unified1.DataType()), "expected: %s, got: %s", dt, unified1.DataType()) - indices, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[0, null, 1, 0]`)) - defer indices.Release() - checkDictionaryArray(t, unified1.Storage(), expectedDict, indices) - - unified2 := unified.Chunk(1).(array.ExtensionArray) - assert.Truef(t, arrow.TypeEqual(dt, unified2.DataType()), "expected: %s, got: %s", dt, unified1.DataType()) - indices, _, _ = array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[2, 0, 0]`)) - defer indices.Release() - checkDictionaryArray(t, unified2.Storage(), expectedDict, indices) -} - -func TestDictionaryUnifierChunkedArrayNestedDict(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - innerType := arrow.ListOf(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint32, ValueType: arrow.BinaryTypes.String}) - innerDict1, _, err := array.FromJSON(mem, innerType, strings.NewReader(`[["ab", "cd"], [], ["cd", null]]`)) - assert.NoError(t, err) - defer innerDict1.Release() - indices1, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[2, 1, 0, 1, 2]`)) - defer indices1.Release() - - chunk1 := array.NewDictionaryArray(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: innerType}, indices1, innerDict1) - defer chunk1.Release() - - innerDict2, _, err := array.FromJSON(mem, innerType, strings.NewReader(`[["cd", "ef"], ["cd", null], []]`)) - assert.NoError(t, err) - defer innerDict2.Release() - indices2, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[1, 2, 2, 0]`)) - defer indices2.Release() - - chunk2 := array.NewDictionaryArray(&arrow.DictionaryType{IndexType: indices2.DataType(), ValueType: innerType}, indices2, innerDict2) - defer chunk2.Release() - - chunked := arrow.NewChunked(chunk1.DataType(), []arrow.Array{chunk1, chunk2}) - defer chunked.Release() - - unified, err := array.UnifyChunkedDicts(mem, chunked) - assert.Nil(t, unified) - assert.EqualError(t, err, "unimplemented dictionary value type, list, nullable>") -} - -func TestDictionaryUnifierTableZeroColumns(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - schema := arrow.NewSchema([]arrow.Field{}, nil) - table := array.NewTable(schema, []arrow.Column{}, 42) - defer table.Release() - - unified, err := array.UnifyTableDicts(mem, table) - assert.NoError(t, err) - assert.True(t, schema.Equal(unified.Schema())) - assert.EqualValues(t, 42, unified.NumRows()) - assert.True(t, array.TableEqual(table, unified)) -} - -func TestDictionaryAppendIndices(t *testing.T) { - indexTypes := []arrow.DataType{ - arrow.PrimitiveTypes.Int8, - arrow.PrimitiveTypes.Uint8, - arrow.PrimitiveTypes.Int16, - arrow.PrimitiveTypes.Uint16, - arrow.PrimitiveTypes.Int32, - arrow.PrimitiveTypes.Uint32, - arrow.PrimitiveTypes.Int64, - arrow.PrimitiveTypes.Uint64, - } - - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dict, _, err := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["a", "b", "c", "d", "e", "f"]`)) - require.NoError(t, err) - defer dict.Release() - - indices := []int{3, 4, 0, 3, 1, 4, 4, 5} - - for _, typ := range indexTypes { - t.Run(typ.String(), func(t *testing.T) { - scoped := memory.NewCheckedAllocatorScope(mem) - defer scoped.CheckSize(t) - - dictType := &arrow.DictionaryType{ - IndexType: typ, ValueType: dict.DataType()} - bldr := array.NewDictionaryBuilderWithDict(mem, dictType, dict) - defer bldr.Release() - - bldr.AppendIndices(indices, nil) - - arr := bldr.NewDictionaryArray() - defer arr.Release() - - arrIndices := arr.Indices() - assert.EqualValues(t, len(indices), arr.Len()) - assert.EqualValues(t, len(indices), arrIndices.Len()) - - assert.Equal(t, fmt.Sprint(indices), arrIndices.String()) - }) - } -} - -type panicAllocator struct { - n int - paniced bool - memory.Allocator -} - -func (p *panicAllocator) Allocate(size int) []byte { - if size > p.n { - p.paniced = true - panic("panic allocator") - } - return p.Allocator.Allocate(size) -} - -func (p *panicAllocator) Reallocate(size int, b []byte) []byte { - return p.Allocator.Reallocate(size, b) -} - -func (p *panicAllocator) Free(b []byte) { - p.Allocator.Free(b) -} - -func TestBinaryDictionaryPanic(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - allocator := &panicAllocator{ - n: 400, - Allocator: mem, - } - - expectedType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: arrow.BinaryTypes.String} - bldr := array.NewDictionaryBuilder(allocator, expectedType) - defer bldr.Release() - - bldr.AppendNull() - allocator.n = 0 // force panic - func() { - defer func() { - recover() - }() - bldr.NewArray() - }() - assert.True(t, allocator.paniced) -} - -func BenchmarkBinaryDictionaryBuilder(b *testing.B) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(b, 0) - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int32Type{}, ValueType: arrow.BinaryTypes.String} - bldr := array.NewDictionaryBuilder(mem, dictType) - defer bldr.Release() - - randString := func() string { - return fmt.Sprintf("test-%d", rand.Intn(30)) - } - - builder := bldr.(*array.BinaryDictionaryBuilder) - for i := 0; i < b.N; i++ { - assert.NoError(b, builder.AppendString(randString())) - } -} diff --git a/go/arrow/array/diff.go b/go/arrow/array/diff.go deleted file mode 100644 index e5c1ce1521d95..0000000000000 --- a/go/arrow/array/diff.go +++ /dev/null @@ -1,315 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "fmt" - "strings" - - "github.com/apache/arrow/go/v18/arrow" -) - -// Edit represents one entry in the edit script to compare two arrays. -type Edit struct { - Insert bool - RunLength int64 -} - -// Edits is a slice of Edit structs that represents an edit script to compare two arrays. -// When applied to the base array, it produces the target array. -// Each element of "insert" determines whether an element was inserted into (true) -// or deleted from (false) base. Each insertion or deletion is followed by a run of -// elements which are unchanged from base to target; the length of this run is stored -// in RunLength. (Note that the edit script begins and ends with a run of shared -// elements but both fields of the struct must have the same length. To accommodate this -// the first element of "insert" should be ignored.) -// -// For example for base "hlloo" and target "hello", the edit script would be -// [ -// -// {"insert": false, "run_length": 1}, // leading run of length 1 ("h") -// {"insert": true, "run_length": 3}, // insert("e") then a run of length 3 ("llo") -// {"insert": false, "run_length": 0} // delete("o") then an empty run -// -// ] -type Edits []Edit - -// String returns a simple string representation of the edit script. -func (e Edits) String() string { - return fmt.Sprintf("%v", []Edit(e)) -} - -// UnifiedDiff returns a string representation of the diff of base and target in Unified Diff format. -func (e Edits) UnifiedDiff(base, target arrow.Array) string { - var s strings.Builder - baseIndex := int64(0) - targetIndex := int64(0) - wrotePosition := false - for i := 0; i < len(e); i++ { - if i > 0 { - if !wrotePosition { - s.WriteString(fmt.Sprintf("@@ -%d, +%d @@\n", baseIndex, targetIndex)) - wrotePosition = true - } - if e[i].Insert { - s.WriteString(fmt.Sprintf("+%v\n", stringAt(target, targetIndex))) - targetIndex++ - } else { - s.WriteString(fmt.Sprintf("-%v\n", stringAt(base, baseIndex))) - baseIndex++ - } - } - for j := int64(0); j < e[i].RunLength; j++ { - baseIndex++ - targetIndex++ - wrotePosition = false - } - } - return s.String() -} - -func stringAt(arr arrow.Array, i int64) string { - if arr.IsNull(int(i)) { - return "null" - } - dt := arr.DataType() - switch { - case arrow.TypeEqual(dt, arrow.PrimitiveTypes.Float32): - return fmt.Sprintf("%f", arr.(*Float32).Value(int(i))) - case arrow.TypeEqual(dt, arrow.PrimitiveTypes.Float64): - return fmt.Sprintf("%f", arr.(*Float64).Value(int(i))) - case arrow.TypeEqual(dt, arrow.PrimitiveTypes.Date32): - return arr.(*Date32).Value(int(i)).FormattedString() - case arrow.TypeEqual(dt, arrow.PrimitiveTypes.Date64): - return arr.(*Date64).Value(int(i)).FormattedString() - case arrow.TypeEqual(dt, arrow.FixedWidthTypes.Timestamp_s): - return arr.(*Timestamp).Value(int(i)).ToTime(arrow.Second).String() - case arrow.TypeEqual(dt, arrow.FixedWidthTypes.Timestamp_ms): - return arr.(*Timestamp).Value(int(i)).ToTime(arrow.Millisecond).String() - case arrow.TypeEqual(dt, arrow.FixedWidthTypes.Timestamp_us): - return arr.(*Timestamp).Value(int(i)).ToTime(arrow.Microsecond).String() - case arrow.TypeEqual(dt, arrow.FixedWidthTypes.Timestamp_ns): - return arr.(*Timestamp).Value(int(i)).ToTime(arrow.Nanosecond).String() - } - s := NewSlice(arr, i, i+1) - defer s.Release() - st, _ := s.MarshalJSON() - return strings.Trim(string(st[1:len(st)-1]), "\n") -} - -// Diff compares two arrays, returning an edit script which expresses the difference -// between them. The edit script can be applied to the base array to produce the target. -// 'base' is a baseline for comparison. -// 'target' is an array of identical type to base whose elements differ from base's. -func Diff(base, target arrow.Array) (edits Edits, err error) { - if !arrow.TypeEqual(base.DataType(), target.DataType()) { - return nil, fmt.Errorf("%w: only taking the diff of like-typed arrays is supported", arrow.ErrNotImplemented) - } - switch base.DataType().ID() { - case arrow.EXTENSION: - return Diff(base.(ExtensionArray).Storage(), target.(ExtensionArray).Storage()) - case arrow.DICTIONARY: - return nil, fmt.Errorf("%w: diffing arrays of type %s is not implemented", arrow.ErrNotImplemented, base.DataType()) - case arrow.RUN_END_ENCODED: - return nil, fmt.Errorf("%w: diffing arrays of type %s is not implemented", arrow.ErrNotImplemented, base.DataType()) - } - d := newQuadraticSpaceMyersDiff(base, target) - return d.Diff() -} - -// editPoint represents an intermediate state in the comparison of two arrays -type editPoint struct { - base int - target int -} - -type quadraticSpaceMyersDiff struct { - base arrow.Array - target arrow.Array - finishIndex int - editCount int - endpointBase []int - insert []bool - baseBegin int - targetBegin int - baseEnd int - targetEnd int -} - -func newQuadraticSpaceMyersDiff(base, target arrow.Array) *quadraticSpaceMyersDiff { - d := &quadraticSpaceMyersDiff{ - base: base, - target: target, - finishIndex: -1, - editCount: 0, - endpointBase: []int{}, - insert: []bool{}, - baseBegin: 0, - targetBegin: 0, - baseEnd: base.Len(), - targetEnd: target.Len(), - } - d.endpointBase = []int{d.extendFrom(editPoint{d.baseBegin, d.targetBegin}).base} - if d.baseEnd-d.baseBegin == d.targetEnd-d.targetBegin && d.endpointBase[0] == d.baseEnd { - // trivial case: base == target - d.finishIndex = 0 - } - return d -} - -func (d *quadraticSpaceMyersDiff) valuesEqual(baseIndex, targetIndex int) bool { - baseNull := d.base.IsNull(baseIndex) - targetNull := d.target.IsNull(targetIndex) - if baseNull || targetNull { - return baseNull && targetNull - } - return SliceEqual(d.base, int64(baseIndex), int64(baseIndex+1), d.target, int64(targetIndex), int64(targetIndex+1)) -} - -// increment the position within base and target (the elements skipped in this way were -// present in both sequences) -func (d *quadraticSpaceMyersDiff) extendFrom(p editPoint) editPoint { - for p.base != d.baseEnd && p.target != d.targetEnd { - if !d.valuesEqual(p.base, p.target) { - break - } - p.base++ - p.target++ - } - return p -} - -// increment the position within base (the element pointed to was deleted) -// then extend maximally -func (d *quadraticSpaceMyersDiff) deleteOne(p editPoint) editPoint { - if p.base != d.baseEnd { - p.base++ - } - return d.extendFrom(p) -} - -// increment the position within target (the element pointed to was inserted) -// then extend maximally -func (d *quadraticSpaceMyersDiff) insertOne(p editPoint) editPoint { - if p.target != d.targetEnd { - p.target++ - } - return d.extendFrom(p) -} - -// beginning of a range for storing per-edit state in endpointBase and insert -func storageOffset(editCount int) int { - return editCount * (editCount + 1) / 2 -} - -// given edit_count and index, augment endpointBase[index] with the corresponding -// position in target (which is only implicitly represented in editCount, index) -func (d *quadraticSpaceMyersDiff) getEditPoint(editCount, index int) editPoint { - insertionsMinusDeletions := 2*(index-storageOffset(editCount)) - editCount - maximalBase := d.endpointBase[index] - maximalTarget := min(d.targetBegin+((maximalBase-d.baseBegin)+insertionsMinusDeletions), d.targetEnd) - return editPoint{maximalBase, maximalTarget} -} - -func (d *quadraticSpaceMyersDiff) Next() { - d.editCount++ - if len(d.endpointBase) < storageOffset(d.editCount+1) { - d.endpointBase = append(d.endpointBase, make([]int, storageOffset(d.editCount+1)-len(d.endpointBase))...) - } - if len(d.insert) < storageOffset(d.editCount+1) { - d.insert = append(d.insert, make([]bool, storageOffset(d.editCount+1)-len(d.insert))...) - } - previousOffset := storageOffset(d.editCount - 1) - currentOffset := storageOffset(d.editCount) - - // try deleting from base first - for i, iOut := 0, 0; i < d.editCount; i, iOut = i+1, iOut+1 { - previousEndpoint := d.getEditPoint(d.editCount-1, i+previousOffset) - d.endpointBase[iOut+currentOffset] = d.deleteOne(previousEndpoint).base - } - - // check if inserting from target could do better - for i, iOut := 0, 1; i < d.editCount; i, iOut = i+1, iOut+1 { - // retrieve the previously computed best endpoint for (editCount, iOut) - // for comparison with the best endpoint achievable with an insertion - endpointAfterDeletion := d.getEditPoint(d.editCount, iOut+currentOffset) - - previousEndpoint := d.getEditPoint(d.editCount-1, i+previousOffset) - endpointAfterInsertion := d.insertOne(previousEndpoint) - - if endpointAfterInsertion.base-endpointAfterDeletion.base >= 0 { - // insertion was more efficient; keep it and mark the insertion in insert - d.insert[iOut+currentOffset] = true - d.endpointBase[iOut+currentOffset] = endpointAfterInsertion.base - } - } - - finish := editPoint{d.baseEnd, d.targetEnd} - for iOut := 0; iOut < d.editCount+1; iOut++ { - if d.getEditPoint(d.editCount, iOut+currentOffset) == finish { - d.finishIndex = iOut + currentOffset - return - } - } -} - -func (d *quadraticSpaceMyersDiff) Done() bool { - return d.finishIndex != -1 -} - -func (d *quadraticSpaceMyersDiff) GetEdits() (Edits, error) { - if !d.Done() { - panic("GetEdits called but Done() = false") - } - - length := d.editCount + 1 - edits := make(Edits, length) - index := d.finishIndex - endpoint := d.getEditPoint(d.editCount, d.finishIndex) - - for i := d.editCount; i > 0; i-- { - insert := d.insert[index] - edits[i].Insert = insert - insertionsMinusDeletions := (endpoint.base - d.baseBegin) - (endpoint.target - d.targetBegin) - if insert { - insertionsMinusDeletions++ - } else { - insertionsMinusDeletions-- - } - index = (i-1-insertionsMinusDeletions)/2 + storageOffset(i-1) - - // endpoint of previous edit - previous := d.getEditPoint(i-1, index) - in := 0 - if insert { - in = 1 - } - edits[i].RunLength = int64(endpoint.base - previous.base - (1 - in)) - endpoint = previous - } - edits[0].Insert = false - edits[0].RunLength = int64(endpoint.base - d.baseBegin) - - return edits, nil -} - -func (d *quadraticSpaceMyersDiff) Diff() (edits Edits, err error) { - for !d.Done() { - d.Next() - } - return d.GetEdits() -} diff --git a/go/arrow/array/diff_test.go b/go/arrow/array/diff_test.go deleted file mode 100644 index 9c9ce6a53aed0..0000000000000 --- a/go/arrow/array/diff_test.go +++ /dev/null @@ -1,878 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "fmt" - "math/rand" - "reflect" - "strings" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/extensions" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -type diffTestCase struct { - dataType arrow.DataType - - baseJSON string - targetJSON string - wantInsert []bool - wantRunLength []int64 -} - -func (s *diffTestCase) check(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - base, _, err := array.FromJSON(mem, s.dataType, strings.NewReader(s.baseJSON)) - if err != nil { - t.Fatal(err) - } - defer base.Release() - - target, _, err := array.FromJSON(mem, s.dataType, strings.NewReader(s.targetJSON)) - if err != nil { - t.Fatal(err) - } - defer target.Release() - - edits, err := array.Diff(base, target) - if err != nil { - t.Fatalf("got unexpected error %v", err) - } - - gotInserts := make([]bool, len(edits)) - gotRunLengths := make([]int64, len(edits)) - for i, edit := range edits { - gotInserts[i] = edit.Insert - gotRunLengths[i] = edit.RunLength - } - if !reflect.DeepEqual(gotInserts, s.wantInsert) { - t.Errorf("Diff(\n base=%v, \ntarget=%v\n) got insert %v, want %v", base, target, gotInserts, s.wantInsert) - } - if !reflect.DeepEqual(gotRunLengths, s.wantRunLength) { - t.Errorf("Diff(\n base=%v, \ntarget=%v\n) got run length %v, want %v", base, target, gotRunLengths, s.wantRunLength) - } -} - -func TestDiff_Trivial(t *testing.T) { - cases := []struct { - name string - base string - target string - wantInsert []bool - wantRunLength []int64 - }{ - { - name: "empty", - base: `[]`, - target: `[]`, - wantInsert: []bool{false}, - wantRunLength: []int64{0}, - }, - { - name: "nulls", - base: `[null, null]`, - target: `[null, null, null, null]`, - wantInsert: []bool{false, true, true}, - wantRunLength: []int64{2, 0, 0}, - }, - { - name: "equal", - base: `[1, 2, 3]`, - target: `[1, 2, 3]`, - wantInsert: []bool{false}, - wantRunLength: []int64{3}, - }, - } - for _, tc := range cases { - d := diffTestCase{ - dataType: arrow.PrimitiveTypes.Int32, - baseJSON: tc.base, - targetJSON: tc.target, - wantInsert: tc.wantInsert, - wantRunLength: tc.wantRunLength, - } - t.Run(tc.name, d.check) - } -} - -func TestDiff_Basics(t *testing.T) { - cases := []struct { - name string - base string - target string - wantInsert []bool - wantRunLength []int64 - }{ - { - name: "insert one", - base: `[1, 2, null, 5]`, - target: `[1, 2, 3, null, 5]`, - wantInsert: []bool{false, true}, - wantRunLength: []int64{2, 2}, - }, - { - name: "delete one", - base: `[1, 2, 3, null, 5]`, - target: `[1, 2, null, 5]`, - wantInsert: []bool{false, false}, - wantRunLength: []int64{2, 2}, - }, - { - name: "change one", - base: `[1, 2, 3, null, 5]`, - target: `[1, 2, 23, null, 5]`, - wantInsert: []bool{false, false, true}, - wantRunLength: []int64{2, 0, 2}, - }, - { - name: "null out one", - base: `[1, 2, 3, null, 5]`, - target: `[1, 2, null, null, 5]`, - wantInsert: []bool{false, false, true}, - wantRunLength: []int64{2, 1, 1}, - }, - { - name: "append some", - base: `[1, 2, 3, null, 5]`, - target: `[1, 2, 3, null, 5, 6, 7, 8, 9]`, - wantInsert: []bool{false, true, true, true, true}, - wantRunLength: []int64{5, 0, 0, 0, 0}, - }, - { - name: "prepend some", - base: `[1, 2, 3, null, 5]`, - target: `[6, 4, 2, 0, 1, 2, 3, null, 5]`, - wantInsert: []bool{false, true, true, true, true}, - wantRunLength: []int64{0, 0, 0, 0, 5}, - }, - } - for _, tc := range cases { - d := diffTestCase{ - dataType: arrow.PrimitiveTypes.Int32, - baseJSON: tc.base, - targetJSON: tc.target, - wantInsert: tc.wantInsert, - wantRunLength: tc.wantRunLength, - } - t.Run(tc.name, d.check) - } -} - -func TestDiff_BasicsWithBooleans(t *testing.T) { - cases := []struct { - name string - base string - target string - wantInsert []bool - wantRunLength []int64 - }{ - { - name: "insert one", - base: `[true, true, true]`, - target: `[true, false, true, true]`, - wantInsert: []bool{false, true}, - wantRunLength: []int64{1, 2}, - }, - { - name: "delete one", - base: `[true, false, true, true]`, - target: `[true, true, true]`, - wantInsert: []bool{false, false}, - wantRunLength: []int64{1, 2}, - }, - { - name: "change one", - base: `[false, false, true]`, - target: `[true, false, true]`, - wantInsert: []bool{false, false, true}, - wantRunLength: []int64{0, 0, 2}, - }, - { - name: "null out one", - base: `[true, false, true]`, - target: `[true, false, null]`, - wantInsert: []bool{false, false, true}, - wantRunLength: []int64{2, 0, 0}, - }, - } - for _, tc := range cases { - d := diffTestCase{ - dataType: &arrow.BooleanType{}, - baseJSON: tc.base, - targetJSON: tc.target, - wantInsert: tc.wantInsert, - wantRunLength: tc.wantRunLength, - } - t.Run(tc.name, d.check) - } -} - -func TestDiff_BasicsWithStrings(t *testing.T) { - cases := []struct { - name string - base string - target string - wantInsert []bool - wantRunLength []int64 - }{ - { - name: "insert one", - base: `["give", "a", "break"]`, - target: `["give", "me", "a", "break"]`, - wantInsert: []bool{false, true}, - wantRunLength: []int64{1, 2}, - }, - { - name: "delete one", - base: `["give", "me", "a", "break"]`, - target: `["give", "a", "break"]`, - wantInsert: []bool{false, false}, - wantRunLength: []int64{1, 2}, - }, - { - name: "change one", - base: `["give", "a", "break"]`, - target: `["gimme", "a", "break"]`, - wantInsert: []bool{false, false, true}, - wantRunLength: []int64{0, 0, 2}, - }, - { - name: "null out one", - base: `["give", "a", "break"]`, - target: `["give", "a", null]`, - wantInsert: []bool{false, false, true}, - wantRunLength: []int64{2, 0, 0}, - }, - } - for _, tc := range cases { - d := diffTestCase{ - dataType: &arrow.StringType{}, - baseJSON: tc.base, - targetJSON: tc.target, - wantInsert: tc.wantInsert, - wantRunLength: tc.wantRunLength, - } - t.Run(tc.name, d.check) - } -} - -func TestDiff_BasicsWithLists(t *testing.T) { - cases := []struct { - name string - base string - target string - wantInsert []bool - wantRunLength []int64 - }{ - { - name: "insert one", - base: `[[2, 3, 1], [], [13]]`, - target: `[[2, 3, 1], [5, 9], [], [13]]`, - wantInsert: []bool{false, true}, - wantRunLength: []int64{1, 2}, - }, - { - name: "delete one", - base: `[[2, 3, 1], [5, 9], [], [13]]`, - target: `[[2, 3, 1], [], [13]]`, - wantInsert: []bool{false, false}, - wantRunLength: []int64{1, 2}, - }, - { - name: "change one", - base: `[[2, 3, 1], [], [13]]`, - target: `[[3, 3, 3], [], [13]]`, - wantInsert: []bool{false, false, true}, - wantRunLength: []int64{0, 0, 2}, - }, - { - name: "null out one", - base: `[[2, 3, 1], [], [13]]`, - target: `[[2, 3, 1], [], null]`, - wantInsert: []bool{false, false, true}, - wantRunLength: []int64{2, 0, 0}, - }, - } - for _, tc := range cases { - d := diffTestCase{ - dataType: arrow.ListOf(arrow.PrimitiveTypes.Int32), - baseJSON: tc.base, - targetJSON: tc.target, - wantInsert: tc.wantInsert, - wantRunLength: tc.wantRunLength, - } - t.Run(tc.name, d.check) - } -} - -func TestDiff_BasicsWithStructs(t *testing.T) { - cases := []struct { - name string - base string - target string - wantInsert []bool - wantRunLength []int64 - }{ - { - name: "insert one", - base: `[{"foo": "!", "bar": 3}, {}, {"bar": 13}]`, - target: `[{"foo": "!", "bar": 3}, {"foo": "?"}, {}, {"bar": 13}]`, - wantInsert: []bool{false, true}, - wantRunLength: []int64{1, 2}, - }, - { - name: "delete one", - base: `[{"foo": "!", "bar": 3}, {"foo": "?"}, {}, {"bar": 13}]`, - target: `[{"foo": "!", "bar": 3}, {}, {"bar": 13}]`, - wantInsert: []bool{false, false}, - wantRunLength: []int64{1, 2}, - }, - { - name: "change one", - base: `[{"foo": "!", "bar": 3}, {}, {"bar": 13}]`, - target: `[{"foo": "!", "bar": 2}, {}, {"bar": 13}]`, - wantInsert: []bool{false, false, true}, - wantRunLength: []int64{0, 0, 2}, - }, - { - name: "null out one", - base: `[{"foo": "!", "bar": 3}, {}, {"bar": 13}]`, - target: `[{"foo": "!", "bar": 3}, {}, null]`, - wantInsert: []bool{false, false, true}, - wantRunLength: []int64{2, 0, 0}, - }, - } - for _, tc := range cases { - f1 := arrow.Field{Name: "foo", Type: arrow.BinaryTypes.String, Nullable: true} - f2 := arrow.Field{Name: "bar", Type: arrow.PrimitiveTypes.Int32, Nullable: true} - d := diffTestCase{ - dataType: arrow.StructOf(f1, f2), - baseJSON: tc.base, - targetJSON: tc.target, - wantInsert: tc.wantInsert, - wantRunLength: tc.wantRunLength, - } - t.Run(tc.name, d.check) - } -} - -func TestDiff_Random(t *testing.T) { - rng := rand.New(rand.NewSource(0xdeadbeef)) - for i := 0; i < 100; i++ { - t.Run(fmt.Sprintf("case-%d", i), func(t *testing.T) { - testRandomCase(t, rng) - }) - } -} - -func testRandomCase(t *testing.T, rng *rand.Rand) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dataType := arrow.PrimitiveTypes.Int32 - - baseValues := make([]int32, rng.Intn(10)) - for i := range baseValues { - baseValues[i] = rng.Int31() - } - baseJSON, err := json.Marshal(baseValues) - if err != nil { - t.Fatal(err) - } - - targetValues := make([]int32, rng.Intn(10)) - for i := range targetValues { - // create runs with some probability - if rng.Intn(2) == 0 && len(baseValues) > 0 { - targetValues[i] = baseValues[rng.Intn(len(baseValues))] - } else { - targetValues[i] = rng.Int31() - } - } - targetJSON, err := json.Marshal(targetValues) - if err != nil { - t.Fatal(err) - } - - base, _, err := array.FromJSON(mem, dataType, strings.NewReader(string(baseJSON))) - if err != nil { - t.Fatal(err) - } - defer base.Release() - - target, _, err := array.FromJSON(mem, dataType, strings.NewReader(string(targetJSON))) - if err != nil { - t.Fatal(err) - } - defer target.Release() - - edits, err := array.Diff(base, target) - if err != nil { - t.Fatalf("got unexpected error %v", err) - } - - validateEditScript(t, edits, base, target) -} - -// validateEditScript checks that the edit script produces target when applied to base. -func validateEditScript(t *testing.T, edits array.Edits, base, target arrow.Array) { - if len(edits) == 0 { - t.Fatalf("edit script has run length of zero") - } - - baseIndex := int64(0) - targetIndex := int64(0) - for i := 0; i < len(edits); i++ { - if i > 0 { - if edits[i].Insert { - targetIndex++ - } else { - baseIndex++ - } - } - for j := int64(0); j < edits[i].RunLength; j++ { - if !array.SliceEqual(base, baseIndex, baseIndex+1, target, targetIndex, targetIndex+1) { - t.Fatalf("edit script (%v) when applied to base %v does not produce target %v", edits, base, target) - } - baseIndex += 1 - targetIndex += 1 - } - } - if baseIndex != int64(base.Len()) || targetIndex != int64(target.Len()) { - t.Fatalf("edit script (%v) when applied to base %v does not produce target %v", edits, base, target) - } -} - -type diffStringTestCase struct { - dataType arrow.DataType - - name string - baseJSON string - targetJSON string - want string -} - -func (s *diffStringTestCase) check(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - base, _, err := array.FromJSON(mem, s.dataType, strings.NewReader(s.baseJSON)) - if err != nil { - t.Fatal(err) - } - defer base.Release() - - target, _, err := array.FromJSON(mem, s.dataType, strings.NewReader(s.targetJSON)) - if err != nil { - t.Fatal(err) - } - defer target.Release() - - edits, err := array.Diff(base, target) - if err != nil { - t.Fatalf("got unexpected error %v", err) - } - got := edits.UnifiedDiff(base, target) - if got != s.want { - t.Errorf("got:\n%v\n, want:\n%v", got, s.want) - } -} - -func TestEdits_UnifiedDiff(t *testing.T) { - msPerDay := 24 * 60 * 60 * 1000 - cases := []diffStringTestCase{ - { - name: "no changes", - dataType: arrow.BinaryTypes.String, - baseJSON: `["give", "me", "a", "break"]`, - targetJSON: `["give", "me", "a", "break"]`, - want: ``, - }, - { - name: "insert one", - dataType: arrow.BinaryTypes.String, - baseJSON: `["give", "a", "break"]`, - targetJSON: `["give", "me", "a", "break"]`, - want: `@@ -1, +1 @@ -+"me" -`, - }, - { - name: "delete one", - dataType: arrow.BinaryTypes.String, - baseJSON: `["give", "me", "a", "break"]`, - targetJSON: `["give", "a", "break"]`, - want: `@@ -1, +1 @@ --"me" -`, - }, - { - name: "change one", - dataType: arrow.BinaryTypes.String, - baseJSON: `["give", "a", "break"]`, - targetJSON: `["gimme", "a", "break"]`, - want: `@@ -0, +0 @@ --"give" -+"gimme" -`, - }, - { - name: "null out one", - dataType: arrow.BinaryTypes.String, - baseJSON: `["give", "a", "break"]`, - targetJSON: `["give", "a", null]`, - want: `@@ -2, +2 @@ --"break" -+null -`, - }, - { - name: "strings with escaped chars", - dataType: arrow.BinaryTypes.String, - baseJSON: `["newline:\\n", "quote:'", "backslash:\\\\"]`, - targetJSON: `["newline:\\n", "tab:\\t", "quote:\\\"", "backslash:\\\\"]`, - want: `@@ -1, +1 @@ --"quote:'" -+"tab:\\t" -+"quote:\\\"" -`, - }, - { - name: "date32", - dataType: arrow.PrimitiveTypes.Date32, - baseJSON: `[0, 1, 2, 31, 4]`, - targetJSON: `[0, 1, 31, 2, 4]`, - want: `@@ -2, +2 @@ --1970-01-03 -@@ -4, +3 @@ -+1970-01-03 -`, - }, - { - name: "date64", - dataType: arrow.PrimitiveTypes.Date64, - baseJSON: fmt.Sprintf(`[%d, %d, %d, %d, %d]`, 0*msPerDay, 1*msPerDay, 2*msPerDay, 31*msPerDay, 4*msPerDay), - targetJSON: fmt.Sprintf(`[%d, %d, %d, %d, %d]`, 0*msPerDay, 1*msPerDay, 31*msPerDay, 2*msPerDay, 4*msPerDay), - want: `@@ -2, +2 @@ --1970-01-03 -@@ -4, +3 @@ -+1970-01-03 -`, - }, - { - name: "timestamp_s", - dataType: arrow.FixedWidthTypes.Timestamp_s, - baseJSON: fmt.Sprintf(`[0, 1, %d, 2, 4]`, 678+(5+60*(4+60*(3+24*int64(1))))), - targetJSON: fmt.Sprintf(`[0, 1, 2, %d, 4]`, 678+(5+60*(4+60*(3+24*int64(1))))), - want: `@@ -2, +2 @@ --1970-01-02 03:15:23 +0000 UTC -@@ -4, +3 @@ -+1970-01-02 03:15:23 +0000 UTC -`, - }, - { - name: "timestamp_ms", - dataType: arrow.FixedWidthTypes.Timestamp_ms, - baseJSON: fmt.Sprintf(`[0, 1, %d, 2, 4]`, 678+1000*(5+60*(4+60*(3+24*int64(1))))), - targetJSON: fmt.Sprintf(`[0, 1, 2, %d, 4]`, 678+1000*(5+60*(4+60*(3+24*int64(1))))), - want: `@@ -2, +2 @@ --1970-01-02 03:04:05.678 +0000 UTC -@@ -4, +3 @@ -+1970-01-02 03:04:05.678 +0000 UTC -`, - }, - { - name: "timestamp_us", - dataType: arrow.FixedWidthTypes.Timestamp_us, - baseJSON: fmt.Sprintf(`[0, 1, %d, 2, 4]`, 678+1000000*(5+60*(4+60*(3+24*int64(1))))), - targetJSON: fmt.Sprintf(`[0, 1, 2, %d, 4]`, 678+1000000*(5+60*(4+60*(3+24*int64(1))))), - want: `@@ -2, +2 @@ --1970-01-02 03:04:05.000678 +0000 UTC -@@ -4, +3 @@ -+1970-01-02 03:04:05.000678 +0000 UTC -`, - }, - { - name: "timestamp_ns", - dataType: arrow.FixedWidthTypes.Timestamp_ns, - baseJSON: fmt.Sprintf(`[0, 1, %d, 2, 4]`, 678+1000000000*(5+60*(4+60*(3+24*int64(1))))), - targetJSON: fmt.Sprintf(`[0, 1, 2, %d, 4]`, 678+1000000000*(5+60*(4+60*(3+24*int64(1))))), - want: `@@ -2, +2 @@ --1970-01-02 03:04:05.000000678 +0000 UTC -@@ -4, +3 @@ -+1970-01-02 03:04:05.000000678 +0000 UTC -`, - }, - { - name: "lists", - dataType: arrow.ListOf(arrow.PrimitiveTypes.Int32), - baseJSON: `[[2, 3, 1], [], [13], []]`, - targetJSON: `[[2, 3, 1], [5, 9], [], [13]]`, - want: `@@ -1, +1 @@ -+[5,9] -@@ -3, +4 @@ --[] -`, - }, - { - name: "maps", - dataType: arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32), - baseJSON: `[ - [{"key": "foo", "value": 2}, {"key": "bar", "value": 3}, {"key": "baz", "value": 1}], - [{"key": "quux", "value": 13}], - [] - ]`, - targetJSON: `[ - [{"key": "foo", "value": 2}, {"key": "bar", "value": 3}, {"key": "baz", "value": 1}], - [{"key": "ytho", "value": 11}], - [{"key": "quux", "value": 13}], - [] - ]`, - want: `@@ -1, +1 @@ -+[{"key":"ytho","value":11}] -`, - }, - { - name: "structs", - dataType: arrow.StructOf( - []arrow.Field{ - {Name: "foo", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "bar", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, - }..., - ), - baseJSON: `[{"foo": "!", "bar": 3}, {}, {"bar": 13}]`, - targetJSON: `[{"foo": null, "bar": 2}, {}, {"bar": 13}]`, - want: `@@ -0, +0 @@ --{"bar":3,"foo":"!"} -+{"bar":2,"foo":null} -`, - }, - { - name: "unions", - dataType: arrow.UnionOf(arrow.SparseMode, - []arrow.Field{ - {Name: "foo", Type: arrow.BinaryTypes.String}, - {Name: "bar", Type: arrow.PrimitiveTypes.Int32}, - }, - []arrow.UnionTypeCode{2, 5}, - ), - baseJSON: `[[2, "!"], [5, 3], [5, 13]]`, - targetJSON: `[[2, "!"], [2, "3"], [5, 13]]`, - want: `@@ -1, +1 @@ --[5,3] -+[2,"3"] -`, - }, - { - name: "string", - dataType: arrow.BinaryTypes.String, - baseJSON: `["h", "l", "l", "o", "o"]`, - targetJSON: `["h", "e", "l", "l", "o", "0"]`, - want: `@@ -1, +1 @@ -+"e" -@@ -4, +5 @@ --"o" -+"0" -`, - }, - { - name: "int8", - dataType: arrow.PrimitiveTypes.Int8, - baseJSON: `[0, 1, 2, 3, 5, 8, 11, 13, 17]`, - targetJSON: `[2, 3, 5, 7, 11, 13, 17, 19]`, - want: `@@ -0, +0 @@ --0 --1 -@@ -5, +3 @@ --8 -+7 -@@ -9, +7 @@ -+19 -`, - }, - { - name: "int16", - dataType: arrow.PrimitiveTypes.Int16, - baseJSON: `[0, 1, 2, 3, 5, 8, 11, 13, 17]`, - targetJSON: `[2, 3, 5, 7, 11, 13, 17, 19]`, - want: `@@ -0, +0 @@ --0 --1 -@@ -5, +3 @@ --8 -+7 -@@ -9, +7 @@ -+19 -`, - }, - { - name: "int32", - dataType: arrow.PrimitiveTypes.Int32, - baseJSON: `[0, 1, 2, 3, 5, 8, 11, 13, 17]`, - targetJSON: `[2, 3, 5, 7, 11, 13, 17, 19]`, - want: `@@ -0, +0 @@ --0 --1 -@@ -5, +3 @@ --8 -+7 -@@ -9, +7 @@ -+19 -`, - }, - { - name: "int64", - dataType: arrow.PrimitiveTypes.Int64, - baseJSON: `[0, 1, 2, 3, 5, 8, 11, 13, 17]`, - targetJSON: `[2, 3, 5, 7, 11, 13, 17, 19]`, - want: `@@ -0, +0 @@ --0 --1 -@@ -5, +3 @@ --8 -+7 -@@ -9, +7 @@ -+19 -`, - }, - { - name: "uint8", - dataType: arrow.PrimitiveTypes.Uint8, - baseJSON: `[0, 1, 2, 3, 5, 8, 11, 13, 17]`, - targetJSON: `[2, 3, 5, 7, 11, 13, 17, 19]`, - want: `@@ -0, +0 @@ --0 --1 -@@ -5, +3 @@ --8 -+7 -@@ -9, +7 @@ -+19 -`, - }, - { - name: "uint16", - dataType: arrow.PrimitiveTypes.Uint16, - baseJSON: `[0, 1, 2, 3, 5, 8, 11, 13, 17]`, - targetJSON: `[2, 3, 5, 7, 11, 13, 17, 19]`, - want: `@@ -0, +0 @@ --0 --1 -@@ -5, +3 @@ --8 -+7 -@@ -9, +7 @@ -+19 -`, - }, - { - name: "uint32", - dataType: arrow.PrimitiveTypes.Uint32, - baseJSON: `[0, 1, 2, 3, 5, 8, 11, 13, 17]`, - targetJSON: `[2, 3, 5, 7, 11, 13, 17, 19]`, - want: `@@ -0, +0 @@ --0 --1 -@@ -5, +3 @@ --8 -+7 -@@ -9, +7 @@ -+19 -`, - }, - { - name: "uint64", - dataType: arrow.PrimitiveTypes.Uint64, - baseJSON: `[0, 1, 2, 3, 5, 8, 11, 13, 17]`, - targetJSON: `[2, 3, 5, 7, 11, 13, 17, 19]`, - want: `@@ -0, +0 @@ --0 --1 -@@ -5, +3 @@ --8 -+7 -@@ -9, +7 @@ -+19 -`, - }, - { - name: "float32", - dataType: arrow.PrimitiveTypes.Float32, - baseJSON: `[0.1, 0.3, -0.5]`, - targetJSON: `[0.1, -0.5, 0.3]`, - want: `@@ -1, +1 @@ --0.300000 -@@ -3, +2 @@ -+0.300000 -`, - }, - { - name: "float64", - dataType: arrow.PrimitiveTypes.Float64, - baseJSON: `[0.1, 0.3, -0.5]`, - targetJSON: `[0.1, -0.5, 0.3]`, - want: `@@ -1, +1 @@ --0.300000 -@@ -3, +2 @@ -+0.300000 -`, - }, - { - name: "equal nulls", - dataType: arrow.PrimitiveTypes.Int32, - baseJSON: `[null, null]`, - targetJSON: `[null, null]`, - want: ``, - }, - { - name: "nulls", - dataType: arrow.PrimitiveTypes.Int32, - baseJSON: `[1, null, null, null]`, - targetJSON: `[null, 1, null, 2]`, - want: `@@ -0, +0 @@ --1 -@@ -2, +1 @@ --null -+1 -@@ -4, +3 @@ -+2 -`, - }, - { - name: "extensions", - dataType: extensions.NewUUIDType(), - baseJSON: `["00000000-0000-0000-0000-000000000000", "00000000-0000-0000-0000-000000000001"]`, - targetJSON: `["00000000-0000-0000-0000-000000000001", "00000000-0000-0000-0000-000000000002"]`, - want: `@@ -0, +0 @@ --"00000000-0000-0000-0000-000000000000" -@@ -2, +1 @@ -+"00000000-0000-0000-0000-000000000002" -`, - }, - } - - for _, tc := range cases { - t.Run(tc.name, tc.check) - } -} diff --git a/go/arrow/array/doc.go b/go/arrow/array/doc.go deleted file mode 100644 index 5cf85408626ac..0000000000000 --- a/go/arrow/array/doc.go +++ /dev/null @@ -1,20 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -/* -Package array provides implementations of various Arrow array types. -*/ -package array diff --git a/go/arrow/array/encoded.go b/go/arrow/array/encoded.go deleted file mode 100644 index 748c4c1fec641..0000000000000 --- a/go/arrow/array/encoded.go +++ /dev/null @@ -1,520 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "fmt" - "math" - "reflect" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/encoded" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" - "github.com/apache/arrow/go/v18/internal/utils" -) - -// RunEndEncoded represents an array containing two children: -// an array of int32 values defining the ends of each run of values -// and an array of values -type RunEndEncoded struct { - array - - ends arrow.Array - values arrow.Array -} - -func NewRunEndEncodedArray(runEnds, values arrow.Array, logicalLength, offset int) *RunEndEncoded { - data := NewData(arrow.RunEndEncodedOf(runEnds.DataType(), values.DataType()), logicalLength, - []*memory.Buffer{nil}, []arrow.ArrayData{runEnds.Data(), values.Data()}, 0, offset) - defer data.Release() - return NewRunEndEncodedData(data) -} - -func NewRunEndEncodedData(data arrow.ArrayData) *RunEndEncoded { - r := &RunEndEncoded{} - r.refCount = 1 - r.setData(data.(*Data)) - return r -} - -func (r *RunEndEncoded) Values() arrow.Array { return r.values } -func (r *RunEndEncoded) RunEndsArr() arrow.Array { return r.ends } - -func (r *RunEndEncoded) Retain() { - r.array.Retain() - r.values.Retain() - r.ends.Retain() -} - -func (r *RunEndEncoded) Release() { - r.array.Release() - r.values.Release() - r.ends.Release() -} - -// LogicalValuesArray returns an array holding the values of each -// run, only over the range of run values inside the logical offset/length -// range of the parent array. -// -// # Example -// -// For this array: -// -// RunEndEncoded: { Offset: 150, Length: 1500 } -// RunEnds: [ 1, 2, 4, 6, 10, 1000, 1750, 2000 ] -// Values: [ "a", "b", "c", "d", "e", "f", "g", "h" ] -// -// LogicalValuesArray will return the following array: -// -// [ "f", "g" ] -// -// This is because the offset of 150 tells it to skip the values until -// "f" which corresponds with the logical offset (the run from 10 - 1000), -// and stops after "g" because the length + offset goes to 1650 which is -// within the run from 1000 - 1750, corresponding to the "g" value. -// -// # Note -// -// The return from this needs to be Released. -func (r *RunEndEncoded) LogicalValuesArray() arrow.Array { - physOffset := r.GetPhysicalOffset() - physLength := r.GetPhysicalLength() - data := NewSliceData(r.data.Children()[1], int64(physOffset), int64(physOffset+physLength)) - defer data.Release() - return MakeFromData(data) -} - -// LogicalRunEndsArray returns an array holding the logical indexes -// of each run end, only over the range of run end values relative -// to the logical offset/length range of the parent array. -// -// For arrays with an offset, this is not a slice of the existing -// internal run ends array. Instead a new array is created with run-ends -// that are adjusted so the new array can have an offset of 0. As a result -// this method can be expensive to call for an array with a non-zero offset. -// -// # Example -// -// For this array: -// -// RunEndEncoded: { Offset: 150, Length: 1500 } -// RunEnds: [ 1, 2, 4, 6, 10, 1000, 1750, 2000 ] -// Values: [ "a", "b", "c", "d", "e", "f", "g", "h" ] -// -// LogicalRunEndsArray will return the following array: -// -// [ 850, 1500 ] -// -// This is because the offset of 150 tells us to skip all run-ends less -// than 150 (by finding the physical offset), and we adjust the run-ends -// accordingly (1000 - 150 = 850). The logical length of the array is 1500, -// so we know we don't want to go past the 1750 run end. Thus the last -// run-end is determined by doing: min(1750 - 150, 1500) = 1500. -// -// # Note -// -// The return from this needs to be Released -func (r *RunEndEncoded) LogicalRunEndsArray(mem memory.Allocator) arrow.Array { - physOffset := r.GetPhysicalOffset() - physLength := r.GetPhysicalLength() - - if r.data.offset == 0 { - data := NewSliceData(r.data.childData[0], 0, int64(physLength)) - defer data.Release() - return MakeFromData(data) - } - - bldr := NewBuilder(mem, r.data.childData[0].DataType()) - defer bldr.Release() - bldr.Resize(physLength) - - switch e := r.ends.(type) { - case *Int16: - for _, v := range e.Int16Values()[physOffset : physOffset+physLength] { - v -= int16(r.data.offset) - v = int16(utils.Min(int(v), r.data.length)) - bldr.(*Int16Builder).Append(v) - } - case *Int32: - for _, v := range e.Int32Values()[physOffset : physOffset+physLength] { - v -= int32(r.data.offset) - v = int32(utils.Min(int(v), r.data.length)) - bldr.(*Int32Builder).Append(v) - } - case *Int64: - for _, v := range e.Int64Values()[physOffset : physOffset+physLength] { - v -= int64(r.data.offset) - v = int64(utils.Min(int(v), r.data.length)) - bldr.(*Int64Builder).Append(v) - } - } - - return bldr.NewArray() -} - -func (r *RunEndEncoded) setData(data *Data) { - if len(data.childData) != 2 { - panic(fmt.Errorf("%w: arrow/array: RLE array must have exactly 2 children", arrow.ErrInvalid)) - } - debug.Assert(data.dtype.ID() == arrow.RUN_END_ENCODED, "invalid type for RunLengthEncoded") - if !data.dtype.(*arrow.RunEndEncodedType).ValidRunEndsType(data.childData[0].DataType()) { - panic(fmt.Errorf("%w: arrow/array: run ends array must be int16, int32, or int64", arrow.ErrInvalid)) - } - if data.childData[0].NullN() > 0 { - panic(fmt.Errorf("%w: arrow/array: run ends array cannot contain nulls", arrow.ErrInvalid)) - } - - r.array.setData(data) - - r.ends = MakeFromData(r.data.childData[0]) - r.values = MakeFromData(r.data.childData[1]) -} - -func (r *RunEndEncoded) GetPhysicalOffset() int { - return encoded.FindPhysicalOffset(r.data) -} - -func (r *RunEndEncoded) GetPhysicalLength() int { - return encoded.GetPhysicalLength(r.data) -} - -// GetPhysicalIndex can be used to get the run-encoded value instead of costly LogicalValuesArray -// in the following way: -// -// r.Values().(valuetype).Value(r.GetPhysicalIndex(i)) -func (r *RunEndEncoded) GetPhysicalIndex(i int) int { - return encoded.FindPhysicalIndex(r.data, i+r.data.offset) -} - -// ValueStr will return the str representation of the value at the logical offset i. -func (r *RunEndEncoded) ValueStr(i int) string { - return r.values.ValueStr(r.GetPhysicalIndex(i)) -} - -func (r *RunEndEncoded) String() string { - var buf bytes.Buffer - buf.WriteByte('[') - for i := 0; i < r.ends.Len(); i++ { - if i != 0 { - buf.WriteByte(',') - } - - value := r.values.GetOneForMarshal(i) - if byts, ok := value.(json.RawMessage); ok { - value = string(byts) - } - fmt.Fprintf(&buf, "{%d -> %v}", r.ends.GetOneForMarshal(i), value) - } - - buf.WriteByte(']') - return buf.String() -} - -func (r *RunEndEncoded) GetOneForMarshal(i int) interface{} { - return r.values.GetOneForMarshal(r.GetPhysicalIndex(i)) -} - -func (r *RunEndEncoded) MarshalJSON() ([]byte, error) { - var buf bytes.Buffer - enc := json.NewEncoder(&buf) - buf.WriteByte('[') - for i := 0; i < r.Len(); i++ { - if i != 0 { - buf.WriteByte(',') - } - if err := enc.Encode(r.GetOneForMarshal(i)); err != nil { - return nil, err - } - } - buf.WriteByte(']') - return buf.Bytes(), nil -} - -func arrayRunEndEncodedEqual(l, r *RunEndEncoded) bool { - // types were already checked before getting here, so we know - // the encoded types are equal - mr := encoded.NewMergedRuns([2]arrow.Array{l, r}) - for mr.Next() { - lIndex := mr.IndexIntoArray(0) - rIndex := mr.IndexIntoArray(1) - if !SliceEqual(l.values, lIndex, lIndex+1, r.values, rIndex, rIndex+1) { - return false - } - } - return true -} - -func arrayRunEndEncodedApproxEqual(l, r *RunEndEncoded, opt equalOption) bool { - // types were already checked before getting here, so we know - // the encoded types are equal - mr := encoded.NewMergedRuns([2]arrow.Array{l, r}) - for mr.Next() { - lIndex := mr.IndexIntoArray(0) - rIndex := mr.IndexIntoArray(1) - if !sliceApproxEqual(l.values, lIndex, lIndex+1, r.values, rIndex, rIndex+1, opt) { - return false - } - } - return true -} - -type RunEndEncodedBuilder struct { - builder - - dt arrow.DataType - runEnds Builder - values Builder - maxRunEnd uint64 - - // currently, mixing AppendValueFromString & UnmarshalOne is unsupported - lastUnmarshalled interface{} - unmarshalled bool // tracks if Unmarshal was called (in case lastUnmarshalled is nil) - lastStr *string -} - -func NewRunEndEncodedBuilder(mem memory.Allocator, runEnds, encoded arrow.DataType) *RunEndEncodedBuilder { - dt := arrow.RunEndEncodedOf(runEnds, encoded) - if !dt.ValidRunEndsType(runEnds) { - panic("arrow/ree: invalid runEnds type for run length encoded array") - } - - var maxEnd uint64 - switch runEnds.ID() { - case arrow.INT16: - maxEnd = math.MaxInt16 - case arrow.INT32: - maxEnd = math.MaxInt32 - case arrow.INT64: - maxEnd = math.MaxInt64 - } - return &RunEndEncodedBuilder{ - builder: builder{refCount: 1, mem: mem}, - dt: dt, - runEnds: NewBuilder(mem, runEnds), - values: NewBuilder(mem, encoded), - maxRunEnd: maxEnd, - lastUnmarshalled: nil, - } -} - -func (b *RunEndEncodedBuilder) Type() arrow.DataType { - return b.dt -} - -func (b *RunEndEncodedBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - b.values.Release() - b.runEnds.Release() - } -} - -func (b *RunEndEncodedBuilder) addLength(n uint64) { - if uint64(b.length)+n > b.maxRunEnd { - panic(fmt.Errorf("%w: %s array length must fit be less than %d", arrow.ErrInvalid, b.dt, b.maxRunEnd)) - } - - b.length += int(n) -} - -func (b *RunEndEncodedBuilder) finishRun() { - b.lastUnmarshalled = nil - b.lastStr = nil - b.unmarshalled = false - if b.length == 0 { - return - } - - switch bldr := b.runEnds.(type) { - case *Int16Builder: - bldr.Append(int16(b.length)) - case *Int32Builder: - bldr.Append(int32(b.length)) - case *Int64Builder: - bldr.Append(int64(b.length)) - } -} - -func (b *RunEndEncodedBuilder) ValueBuilder() Builder { return b.values } - -func (b *RunEndEncodedBuilder) Append(n uint64) { - b.finishRun() - b.addLength(n) -} - -func (b *RunEndEncodedBuilder) AppendRuns(runs []uint64) { - for _, r := range runs { - b.finishRun() - b.addLength(r) - } -} - -func (b *RunEndEncodedBuilder) ContinueRun(n uint64) { - b.addLength(n) -} - -func (b *RunEndEncodedBuilder) AppendNull() { - b.finishRun() - b.values.AppendNull() - b.addLength(1) -} - -func (b *RunEndEncodedBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *RunEndEncodedBuilder) NullN() int { - return UnknownNullCount -} - -func (b *RunEndEncodedBuilder) AppendEmptyValue() { - b.AppendNull() -} - -func (b *RunEndEncodedBuilder) AppendEmptyValues(n int) { - b.AppendNulls(n) -} - -func (b *RunEndEncodedBuilder) Reserve(n int) { - b.values.Reserve(n) - b.runEnds.Reserve(n) -} - -func (b *RunEndEncodedBuilder) Resize(n int) { - b.values.Resize(n) - b.runEnds.Resize(n) -} - -func (b *RunEndEncodedBuilder) NewRunEndEncodedArray() *RunEndEncoded { - data := b.newData() - defer data.Release() - return NewRunEndEncodedData(data) -} - -func (b *RunEndEncodedBuilder) NewArray() arrow.Array { - return b.NewRunEndEncodedArray() -} - -func (b *RunEndEncodedBuilder) newData() (data *Data) { - b.finishRun() - values := b.values.NewArray() - defer values.Release() - runEnds := b.runEnds.NewArray() - defer runEnds.Release() - - data = NewData( - b.dt, b.length, []*memory.Buffer{}, - []arrow.ArrayData{runEnds.Data(), values.Data()}, 0, 0) - b.reset() - return -} - -// AppendValueFromString can't be used in conjunction with UnmarshalOne -func (b *RunEndEncodedBuilder) AppendValueFromString(s string) error { - // we don't support mixing AppendValueFromString & UnmarshalOne - if b.unmarshalled { - return fmt.Errorf("%w: mixing AppendValueFromString & UnmarshalOne not yet implemented", arrow.ErrNotImplemented) - } - - if s == NullValueStr { - b.AppendNull() - return nil - } - - if b.lastStr != nil && s == *b.lastStr { - b.ContinueRun(1) - return nil - } - - b.Append(1) - lastStr := s - b.lastStr = &lastStr - return b.ValueBuilder().AppendValueFromString(s) -} - -// UnmarshalOne can't be used in conjunction with AppendValueFromString -func (b *RunEndEncodedBuilder) UnmarshalOne(dec *json.Decoder) error { - // we don't support mixing AppendValueFromString & UnmarshalOne - if b.lastStr != nil { - return fmt.Errorf("%w: mixing AppendValueFromString & UnmarshalOne not yet implemented", arrow.ErrNotImplemented) - } - - var value interface{} - if err := dec.Decode(&value); err != nil { - return err - } - - // if we unmarshalled the same value as the previous one, we want to - // continue the run. However, there's an edge case. At the start of - // unmarshalling, lastUnmarshalled will be nil, but we might get - // nil as the first value we unmarshal. In that case we want to - // make sure we add a new run instead. We can detect that case by - // checking that the number of runEnds matches the number of values - // we have, which means no matter what we have to start a new run - if reflect.DeepEqual(value, b.lastUnmarshalled) && (value != nil || b.runEnds.Len() != b.values.Len()) { - b.ContinueRun(1) - return nil - } - - data, err := json.Marshal(value) - if err != nil { - return err - } - - b.Append(1) - b.lastUnmarshalled = value - b.unmarshalled = true - return b.ValueBuilder().UnmarshalOne(json.NewDecoder(bytes.NewReader(data))) -} - -// Unmarshal can't be used in conjunction with AppendValueFromString (as it calls UnmarshalOne) -func (b *RunEndEncodedBuilder) Unmarshal(dec *json.Decoder) error { - b.finishRun() - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -// UnmarshalJSON can't be used in conjunction with AppendValueFromString (as it calls UnmarshalOne) -func (b *RunEndEncodedBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("list builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -var ( - _ arrow.Array = (*RunEndEncoded)(nil) - _ Builder = (*RunEndEncodedBuilder)(nil) -) diff --git a/go/arrow/array/encoded_test.go b/go/arrow/array/encoded_test.go deleted file mode 100644 index 03352ec44177c..0000000000000 --- a/go/arrow/array/encoded_test.go +++ /dev/null @@ -1,459 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "strings" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -var ( - stringValues, _, _ = array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.String, strings.NewReader(`["Hello", "World", null]`)) - int32Values, _, _ = array.FromJSON(memory.DefaultAllocator, arrow.PrimitiveTypes.Int32, strings.NewReader(`[10, 20, 30]`)) - int32OnlyNull = array.MakeArrayOfNull(memory.DefaultAllocator, arrow.PrimitiveTypes.Int32, 3) -) - -func TestMakeRLEArray(t *testing.T) { - rleArr := array.NewRunEndEncodedArray(int32Values, stringValues, 3, 0) - defer rleArr.Release() - - arrData := rleArr.Data() - newArr := array.MakeFromData(arrData) - defer newArr.Release() - - assert.Same(t, newArr.Data(), arrData) - assert.IsType(t, (*array.RunEndEncoded)(nil), newArr) -} - -func TestRLEFromRunEndsAndValues(t *testing.T) { - rleArray := array.NewRunEndEncodedArray(int32Values, int32Values, 3, 0) - defer rleArray.Release() - - assert.EqualValues(t, 3, rleArray.Len()) - assert.Truef(t, array.Equal(int32Values, rleArray.Values()), "expected: %s\ngot: %s", int32Values, rleArray.Values()) - assert.Truef(t, array.Equal(int32Values, rleArray.RunEndsArr()), "expected: %s\ngot: %s", int32Values, rleArray.RunEndsArr()) - assert.Zero(t, rleArray.Offset()) - assert.Zero(t, rleArray.Data().NullN()) - // one dummy buffer, since code may assume there's at least one nil buffer - assert.Len(t, rleArray.Data().Buffers(), 1) - - // explicit offset - rleArray = array.NewRunEndEncodedArray(int32Values, stringValues, 2, 1) - defer rleArray.Release() - - assert.EqualValues(t, 2, rleArray.Len()) - assert.Truef(t, array.Equal(stringValues, rleArray.Values()), "expected: %s\ngot: %s", stringValues, rleArray.Values()) - assert.Truef(t, array.Equal(int32Values, rleArray.RunEndsArr()), "expected: %s\ngot: %s", int32Values, rleArray.RunEndsArr()) - assert.EqualValues(t, 1, rleArray.Offset()) - assert.Zero(t, rleArray.Data().NullN()) - - assert.PanicsWithError(t, "invalid: arrow/array: run ends array must be int16, int32, or int64", func() { - array.NewRunEndEncodedArray(stringValues, int32Values, 3, 0) - }) - assert.PanicsWithError(t, "invalid: arrow/array: run ends array cannot contain nulls", func() { - array.NewRunEndEncodedArray(int32OnlyNull, int32Values, 3, 0) - }) -} - -func TestRunLengthEncodedOffsetLength(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - runEnds, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[100, 200, 300, 400, 500]`)) - defer runEnds.Release() - - values, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["Hello", "beautiful", "world", "of", "RLE"]`)) - defer values.Release() - - rleArray := array.NewRunEndEncodedArray(runEnds, values, 500, 0) - defer rleArray.Release() - - assert.EqualValues(t, 5, rleArray.GetPhysicalLength()) - assert.EqualValues(t, 0, rleArray.GetPhysicalOffset()) - - slice := array.NewSlice(rleArray, 199, 204).(*array.RunEndEncoded) - defer slice.Release() - - assert.EqualValues(t, 2, slice.GetPhysicalLength()) - assert.EqualValues(t, 1, slice.GetPhysicalOffset()) - - slice2 := array.NewSlice(rleArray, 199, 300).(*array.RunEndEncoded) - defer slice2.Release() - - assert.EqualValues(t, 2, slice2.GetPhysicalLength()) - assert.EqualValues(t, 1, slice2.GetPhysicalOffset()) - - slice3 := array.NewSlice(rleArray, 400, 500).(*array.RunEndEncoded) - defer slice3.Release() - - assert.EqualValues(t, 1, slice3.GetPhysicalLength()) - assert.EqualValues(t, 4, slice3.GetPhysicalOffset()) - - slice4 := array.NewSlice(rleArray, 0, 150).(*array.RunEndEncoded) - defer slice4.Release() - - assert.EqualValues(t, 2, slice4.GetPhysicalLength()) - assert.EqualValues(t, 0, slice4.GetPhysicalOffset()) - - zeroLengthAtEnd := array.NewSlice(rleArray, 500, 500).(*array.RunEndEncoded) - defer zeroLengthAtEnd.Release() - - assert.EqualValues(t, 0, zeroLengthAtEnd.GetPhysicalLength()) - assert.EqualValues(t, 5, zeroLengthAtEnd.GetPhysicalOffset()) -} - -func TestRLECompare(t *testing.T) { - rleArray := array.NewRunEndEncodedArray(int32Values, stringValues, 30, 0) - // second that is a copy of the first - standardEquals := array.MakeFromData(rleArray.Data().(*array.Data).Copy()) - - defer rleArray.Release() - defer standardEquals.Release() - - assert.Truef(t, array.Equal(rleArray, standardEquals), "left: %s\nright: %s", rleArray, standardEquals) - assert.False(t, array.Equal(array.NewSlice(rleArray, 0, 29), array.NewSlice(rleArray, 1, 30))) - - // array that is logically the same as our rleArray, but has 2 small - // runs for the first value instead of one large run - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - t.Run("logical duplicate", func(t *testing.T) { - dupRunEnds, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[5, 10, 20, 30]`)) - defer dupRunEnds.Release() - strValues, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, - strings.NewReader(`["Hello", "Hello", "World", null]`)) - defer strValues.Release() - - dupArr := array.NewRunEndEncodedArray(dupRunEnds, strValues, 30, 0) - defer dupArr.Release() - - assert.Truef(t, array.Equal(rleArray, dupArr), "expected: %sgot: %s", rleArray, dupArr) - }) - - t.Run("emptyArr", func(t *testing.T) { - emptyRuns, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[]`)) - emptyVals, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`[]`)) - defer emptyRuns.Release() - defer emptyVals.Release() - - emptyArr := array.NewRunEndEncodedArray(emptyRuns, emptyVals, 0, 0) - defer emptyArr.Release() - - dataCopy := emptyArr.Data().(*array.Data).Copy() - defer dataCopy.Release() - emptyArr2 := array.MakeFromData(dataCopy) - defer emptyArr2.Release() - - assert.Truef(t, array.Equal(emptyArr, emptyArr2), "expected: %sgot: %s", emptyArr, emptyArr2) - }) - - t.Run("different offsets", func(t *testing.T) { - // three different slices that have the value [3, 3, 3, 4, 4, 4, 4] - offsetsa, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, - strings.NewReader(`[2, 5, 12, 58, 60]`)) - offsetsb, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, - strings.NewReader(`[81, 86, 99, 100]`)) - offsetsc, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, - strings.NewReader(`[3, 7]`)) - valsa, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, - strings.NewReader(`[1, 2, 3, 4, 5]`)) - valsb, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, - strings.NewReader(`[2, 3, 4, 5]`)) - valsc, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, - strings.NewReader(`[3, 4]`)) - defer func() { - offsetsa.Release() - offsetsb.Release() - offsetsc.Release() - valsa.Release() - valsb.Release() - valsc.Release() - }() - - differentOffsetsA := array.NewRunEndEncodedArray(offsetsa, valsa, 60, 0) - defer differentOffsetsA.Release() - differentOffsetsB := array.NewRunEndEncodedArray(offsetsb, valsb, 100, 0) - defer differentOffsetsB.Release() - differentOffsetsC := array.NewRunEndEncodedArray(offsetsc, valsc, 7, 0) - defer differentOffsetsC.Release() - - sliceA := array.NewSlice(differentOffsetsA, 9, 16) - defer sliceA.Release() - sliceB := array.NewSlice(differentOffsetsB, 83, 90) - defer sliceB.Release() - - assert.True(t, array.Equal(sliceA, sliceB)) - assert.True(t, array.Equal(sliceA, differentOffsetsC)) - assert.True(t, array.Equal(sliceB, differentOffsetsC)) - }) -} - -func TestRunEndEncodedBuilder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - bldr := array.NewBuilder(mem, arrow.RunEndEncodedOf(arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String)) - defer bldr.Release() - - assert.IsType(t, (*array.RunEndEncodedBuilder)(nil), bldr) - reeBldr := bldr.(*array.RunEndEncodedBuilder) - - valBldr := reeBldr.ValueBuilder().(*array.StringBuilder) - - reeBldr.Append(100) - valBldr.Append("Hello") - reeBldr.Append(100) - valBldr.Append("beautiful") - reeBldr.Append(50) - valBldr.Append("world") - reeBldr.ContinueRun(50) - reeBldr.Append(100) - valBldr.Append("of") - reeBldr.Append(100) - valBldr.Append("RLE") - reeBldr.AppendNull() - - rleArray := reeBldr.NewRunEndEncodedArray() - defer rleArray.Release() - - assert.EqualValues(t, 501, rleArray.Len()) - assert.EqualValues(t, 6, rleArray.GetPhysicalLength()) - assert.Equal(t, arrow.INT16, rleArray.RunEndsArr().DataType().ID()) - assert.Equal(t, []int16{100, 200, 300, 400, 500, 501}, rleArray.RunEndsArr().(*array.Int16).Int16Values()) - - strValues := rleArray.Values().(*array.String) - assert.Equal(t, "Hello", strValues.Value(0)) - assert.Equal(t, "beautiful", strValues.Value(1)) - assert.Equal(t, "world", strValues.Value(2)) - assert.Equal(t, "of", strValues.Value(3)) - assert.Equal(t, "RLE", strValues.Value(4)) - assert.True(t, strValues.IsNull(5)) - assert.Equal(t, "Hello", strValues.ValueStr(0)) -} - -func TestRunEndEncodedStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - b := array.NewRunEndEncodedBuilder(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String) - defer b.Release() - - valBldr := b.ValueBuilder().(*array.StringBuilder) - - b.Append(100) - valBldr.Append("Hello") - b.Append(100) - valBldr.Append("beautiful") - b.Append(50) - valBldr.Append("world") - b.ContinueRun(50) - b.Append(100) - valBldr.Append("of") - b.Append(100) - valBldr.Append("RLE") - b.AppendNull() - - arr := b.NewArray().(*array.RunEndEncoded) - defer arr.Release() - logical := arr.LogicalValuesArray() - defer logical.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewRunEndEncodedBuilder(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.RunEndEncoded) - defer arr1.Release() - logical1 := arr1.LogicalValuesArray() - defer logical1.Release() - - assert.True(t, array.Equal(arr, arr1)) - assert.True(t, array.Equal(logical, logical1)) -} - -func TestREEBuilderOverflow(t *testing.T) { - for _, typ := range []arrow.DataType{arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int64} { - t.Run("run_ends="+typ.String(), func(t *testing.T) { - - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - bldr := array.NewRunEndEncodedBuilder(mem, typ, arrow.BinaryTypes.String) - defer bldr.Release() - - valBldr := bldr.ValueBuilder().(*array.StringBuilder) - assert.Panics(t, func() { - valBldr.Append("Foo") - - maxVal := uint64(1< 0 { - o.WriteString(" ") - } - if !a.IsValid(i) { - o.WriteString(NullValueStr) - continue - } - sub := a.newListValue(i) - fmt.Fprintf(o, "%v", sub) - sub.Release() - } - o.WriteString("]") - return o.String() -} - -func (a *FixedSizeList) newListValue(i int) arrow.Array { - beg, end := a.ValueOffsets(i) - return NewSlice(a.values, beg, end) -} - -func (a *FixedSizeList) setData(data *Data) { - a.array.setData(data) - a.n = a.DataType().(*arrow.FixedSizeListType).Len() - a.values = MakeFromData(data.childData[0]) -} - -func arrayEqualFixedSizeList(left, right *FixedSizeList) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - o := func() bool { - l := left.newListValue(i) - defer l.Release() - r := right.newListValue(i) - defer r.Release() - return Equal(l, r) - }() - if !o { - return false - } - } - return true -} - -// Len returns the number of elements in the array. -func (a *FixedSizeList) Len() int { return a.array.Len() } - -func (a *FixedSizeList) ValueOffsets(i int) (start, end int64) { - n := int64(a.n) - off := int64(a.array.data.offset) - start, end = (off+int64(i))*n, (off+int64(i+1))*n - return -} - -func (a *FixedSizeList) Retain() { - a.array.Retain() - a.values.Retain() -} - -func (a *FixedSizeList) Release() { - a.array.Release() - a.values.Release() -} - -func (a *FixedSizeList) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - slice := a.newListValue(i) - defer slice.Release() - v, err := json.Marshal(slice) - if err != nil { - panic(err) - } - - return json.RawMessage(v) -} - -func (a *FixedSizeList) MarshalJSON() ([]byte, error) { - var buf bytes.Buffer - enc := json.NewEncoder(&buf) - - buf.WriteByte('[') - for i := 0; i < a.Len(); i++ { - if i != 0 { - buf.WriteByte(',') - } - if a.IsNull(i) { - enc.Encode(nil) - continue - } - - slice := a.newListValue(i) - if err := enc.Encode(slice); err != nil { - return nil, err - } - slice.Release() - } - buf.WriteByte(']') - return buf.Bytes(), nil -} - -type FixedSizeListBuilder struct { - baseListBuilder - n int32 // number of elements in the fixed-size list. -} - -// NewFixedSizeListBuilder returns a builder, using the provided memory allocator. -// The created list builder will create a list whose elements will be of type etype. -func NewFixedSizeListBuilder(mem memory.Allocator, n int32, etype arrow.DataType) *FixedSizeListBuilder { - return &FixedSizeListBuilder{ - baseListBuilder{ - builder: builder{refCount: 1, mem: mem}, - values: NewBuilder(mem, etype), - dt: arrow.FixedSizeListOf(n, etype), - }, - n, - } -} - -// NewFixedSizeListBuilderWithField returns a builder similarly to -// NewFixedSizeListBuilder, but it accepts a child rather than just a datatype -// to ensure nullability context is preserved. -func NewFixedSizeListBuilderWithField(mem memory.Allocator, n int32, field arrow.Field) *FixedSizeListBuilder { - return &FixedSizeListBuilder{ - baseListBuilder{ - builder: builder{refCount: 1, mem: mem}, - values: NewBuilder(mem, field.Type), - dt: arrow.FixedSizeListOfField(n, field), - }, - n, - } -} - -func (b *FixedSizeListBuilder) Type() arrow.DataType { return b.dt } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *FixedSizeListBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.values != nil { - b.values.Release() - b.values = nil - } - } -} - -func (b *FixedSizeListBuilder) Append(v bool) { - b.Reserve(1) - b.unsafeAppendBoolToBitmap(v) -} - -// AppendNull will append null values to the underlying values by itself -func (b *FixedSizeListBuilder) AppendNull() { - b.Reserve(1) - b.unsafeAppendBoolToBitmap(false) - // require to append this due to value indexes - for i := int32(0); i < b.n; i++ { - b.values.AppendNull() - } -} - -// AppendNulls will append n null values to the underlying values by itself -func (b *FixedSizeListBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *FixedSizeListBuilder) AppendEmptyValue() { - b.Append(true) - for i := int32(0); i < b.n; i++ { - b.values.AppendEmptyValue() - } -} - -func (b *FixedSizeListBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *FixedSizeListBuilder) AppendValues(valid []bool) { - b.Reserve(len(valid)) - b.builder.unsafeAppendBoolsToBitmap(valid, len(valid)) -} - -func (b *FixedSizeListBuilder) unsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -func (b *FixedSizeListBuilder) init(capacity int) { - b.builder.init(capacity) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *FixedSizeListBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *FixedSizeListBuilder) Resize(n int) { - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(n, b.builder.init) - } -} - -func (b *FixedSizeListBuilder) ValueBuilder() Builder { - return b.values -} - -// NewArray creates a List array from the memory buffers used by the builder and resets the FixedSizeListBuilder -// so it can be used to build a new array. -func (b *FixedSizeListBuilder) NewArray() arrow.Array { - return b.NewListArray() -} - -// NewListArray creates a List array from the memory buffers used by the builder and resets the FixedSizeListBuilder -// so it can be used to build a new array. -func (b *FixedSizeListBuilder) NewListArray() (a *FixedSizeList) { - data := b.newData() - a = NewFixedSizeListData(data) - data.Release() - return -} - -func (b *FixedSizeListBuilder) newData() (data *Data) { - values := b.values.NewArray() - defer values.Release() - - data = NewData( - b.dt, b.length, - []*memory.Buffer{b.nullBitmap}, - []arrow.ArrayData{values.Data()}, - b.nulls, - 0, - ) - b.reset() - - return -} - -func (b *FixedSizeListBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - dec := json.NewDecoder(strings.NewReader(s)) - return b.UnmarshalOne(dec) -} - -func (b *FixedSizeListBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch t { - case json.Delim('['): - b.Append(true) - if err := b.values.Unmarshal(dec); err != nil { - return err - } - // consume ']' - _, err := dec.Token() - return err - case nil: - b.AppendNull() - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Struct: b.dt.String(), - } - } - - return nil -} - -func (b *FixedSizeListBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *FixedSizeListBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("fixed size list builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -var ( - _ arrow.Array = (*FixedSizeList)(nil) - _ Builder = (*FixedSizeListBuilder)(nil) -) diff --git a/go/arrow/array/fixed_size_list_test.go b/go/arrow/array/fixed_size_list_test.go deleted file mode 100644 index e0edb9868cffd..0000000000000 --- a/go/arrow/array/fixed_size_list_test.go +++ /dev/null @@ -1,257 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "reflect" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestFixedSizeListArray(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - vs = []int32{0, 1, 2, 3, 4, 5, 6} - lengths = []int{3, 0, 4} - isValid = []bool{true, false, true} - ) - - lb := array.NewFixedSizeListBuilder(pool, int32(len(vs)), arrow.PrimitiveTypes.Int32) - defer lb.Release() - - for i := 0; i < 10; i++ { - vb := lb.ValueBuilder().(*array.Int32Builder) - vb.Reserve(len(vs)) - - pos := 0 - for i, length := range lengths { - lb.Append(isValid[i]) - for j := 0; j < length; j++ { - vb.Append(vs[pos]) - pos++ - } - } - - arr := lb.NewArray().(*array.FixedSizeList) - defer arr.Release() - - arr.Retain() - arr.Release() - - if got, want := arr.DataType().ID(), arrow.FIXED_SIZE_LIST; got != want { - t.Fatalf("got=%v, want=%v", got, want) - } - - if got, want := arr.Len(), len(isValid); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - for i := range lengths { - if got, want := arr.IsValid(i), isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - if got, want := arr.IsNull(i), lengths[i] == 0; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - } - - varr := arr.ListValues().(*array.Int32) - if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - } -} - -func TestFixedSizeListArrayEmpty(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - lb := array.NewFixedSizeListBuilder(pool, 3, arrow.PrimitiveTypes.Int32) - defer lb.Release() - arr := lb.NewArray().(*array.FixedSizeList) - defer arr.Release() - if got, want := arr.Len(), 0; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } -} - -func TestFixedSizeListArrayBulkAppend(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - vs = []int32{0, 1, 2, 3, 4, 5, 6} - lengths = []int{3, 0, 4} - isValid = []bool{true, false, true} - ) - - lb := array.NewFixedSizeListBuilder(pool, int32(len(vs)), arrow.PrimitiveTypes.Int32) - defer lb.Release() - vb := lb.ValueBuilder().(*array.Int32Builder) - vb.Reserve(len(vs)) - - lb.AppendValues(isValid) - for _, v := range vs { - vb.Append(v) - } - - arr := lb.NewArray().(*array.FixedSizeList) - defer arr.Release() - - if got, want := arr.DataType().ID(), arrow.FIXED_SIZE_LIST; got != want { - t.Fatalf("got=%v, want=%v", got, want) - } - - if got, want := arr.Len(), len(isValid); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - for i := range lengths { - if got, want := arr.IsValid(i), isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - if got, want := arr.IsNull(i), lengths[i] == 0; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - } - - varr := arr.ListValues().(*array.Int32) - if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } -} - -func TestFixedSizeListArrayStringer(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - const N = 3 - var ( - vs = [][N]int32{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, -9, -8}} - isValid = []bool{true, false, true, true} - ) - - lb := array.NewFixedSizeListBuilder(pool, N, arrow.PrimitiveTypes.Int32) - defer lb.Release() - - vb := lb.ValueBuilder().(*array.Int32Builder) - vb.Reserve(len(vs)) - - for i, v := range vs { - lb.Append(isValid[i]) - vb.AppendValues(v[:], nil) - } - - arr := lb.NewArray().(*array.FixedSizeList) - defer arr.Release() - - arr.Retain() - arr.Release() - - want := `[[0 1 2] (null) [6 7 8] [9 -9 -8]]` - if got, want := arr.String(), want; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - assert.Equal(t, "[0,1,2]", arr.ValueStr(0)) - assert.Equal(t, array.NullValueStr, arr.ValueStr(1)) -} - -func TestFixedSizeListArraySlice(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - const N = 3 - var ( - vs = [][N]int32{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, -9, -8}} - isValid = []bool{true, false, true, true} - ) - - lb := array.NewFixedSizeListBuilder(pool, N, arrow.PrimitiveTypes.Int32) - defer lb.Release() - - vb := lb.ValueBuilder().(*array.Int32Builder) - vb.Reserve(len(vs)) - - for i, v := range vs { - lb.Append(isValid[i]) - vb.AppendValues(v[:], nil) - } - - arr := lb.NewArray().(*array.FixedSizeList) - defer arr.Release() - - arr.Retain() - arr.Release() - - want := `[[0 1 2] (null) [6 7 8] [9 -9 -8]]` - if got, want := arr.String(), want; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - sub := array.NewSlice(arr, 1, 3).(*array.FixedSizeList) - defer sub.Release() - - want = `[(null) [6 7 8]]` - if got, want := sub.String(), want; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } -} - -func TestFixedSizeListStringRoundTrip(t *testing.T) { - // 1. create array - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - const N = 3 - var ( - values = [][N]int32{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, -9, -8}} - valid = []bool{true, false, true, true} - ) - - b := array.NewFixedSizeListBuilder(pool, N, arrow.PrimitiveTypes.Int32) - defer b.Release() - - vb := b.ValueBuilder().(*array.Int32Builder) - vb.Reserve(len(values)) - - for i, v := range values { - b.Append(valid[i]) - vb.AppendValues(v[:], nil) - } - - arr := b.NewArray().(*array.FixedSizeList) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewFixedSizeListBuilder(pool, N, arrow.PrimitiveTypes.Int32) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.FixedSizeList) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} diff --git a/go/arrow/array/fixedsize_binary.go b/go/arrow/array/fixedsize_binary.go deleted file mode 100644 index f4d16c6386d60..0000000000000 --- a/go/arrow/array/fixedsize_binary.go +++ /dev/null @@ -1,123 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "encoding/base64" - "fmt" - "strings" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/internal/json" -) - -// A type which represents an immutable sequence of fixed-length binary strings. -type FixedSizeBinary struct { - array - - valueBytes []byte - bytewidth int32 -} - -// NewFixedSizeBinaryData constructs a new fixed-size binary array from data. -func NewFixedSizeBinaryData(data arrow.ArrayData) *FixedSizeBinary { - a := &FixedSizeBinary{bytewidth: int32(data.DataType().(arrow.FixedWidthDataType).BitWidth() / 8)} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Value returns the fixed-size slice at index i. This value should not be mutated. -func (a *FixedSizeBinary) Value(i int) []byte { - i += a.array.data.offset - var ( - bw = int(a.bytewidth) - beg = i * bw - end = (i + 1) * bw - ) - return a.valueBytes[beg:end] -} -func (a *FixedSizeBinary) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return base64.StdEncoding.EncodeToString(a.Value(i)) -} - -func (a *FixedSizeBinary) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - o.WriteString(" ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%q", a.Value(i)) - } - } - o.WriteString("]") - return o.String() -} - -func (a *FixedSizeBinary) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.valueBytes = vals.Bytes() - } - -} - -func (a *FixedSizeBinary) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - return a.Value(i) -} - -func (a *FixedSizeBinary) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if a.IsValid(i) { - vals[i] = a.Value(i) - } else { - vals[i] = nil - } - } - return json.Marshal(vals) -} - -func arrayEqualFixedSizeBinary(left, right *FixedSizeBinary) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if !bytes.Equal(left.Value(i), right.Value(i)) { - return false - } - } - return true -} - -var ( - _ arrow.Array = (*FixedSizeBinary)(nil) -) diff --git a/go/arrow/array/fixedsize_binary_test.go b/go/arrow/array/fixedsize_binary_test.go deleted file mode 100644 index 4a32cb9692a06..0000000000000 --- a/go/arrow/array/fixedsize_binary_test.go +++ /dev/null @@ -1,189 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "testing" - - "github.com/stretchr/testify/assert" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" -) - -func TestFixedSizeBinary(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := arrow.FixedSizeBinaryType{ByteWidth: 7} - b := array.NewFixedSizeBinaryBuilder(mem, &dtype) - - zero := make([]byte, dtype.ByteWidth) - - values := [][]byte{ - []byte("7654321"), - nil, - []byte("AZERTYU"), - } - valid := []bool{true, false, true} - b.AppendValues(values, valid) - // encoded abcdefg base64 - assert.NoError(t, b.AppendValueFromString("YWJjZGVmZw==")) - - b.Retain() - b.Release() - - a := b.NewFixedSizeBinaryArray() - assert.Equal(t, 4, a.Len()) - assert.Equal(t, 1, a.NullN()) - assert.Equal(t, []byte("7654321"), a.Value(0)) - assert.Equal(t, "YWJjZGVmZw==", a.ValueStr(3)) - assert.Equal(t, zero, a.Value(1)) - assert.Equal(t, true, a.IsNull(1)) - assert.Equal(t, false, a.IsValid(1)) - assert.Equal(t, []byte("AZERTYU"), a.Value(2)) - a.Release() - - // Test builder reset and NewArray API. - b.AppendValues(values, valid) - a = b.NewArray().(*array.FixedSizeBinary) - assert.Equal(t, 3, a.Len()) - assert.Equal(t, 1, a.NullN()) - assert.Equal(t, []byte("7654321"), a.Value(0)) - assert.Equal(t, zero, a.Value(1)) - assert.Equal(t, []byte("AZERTYU"), a.Value(2)) - a.Release() - - b.Release() -} - -func TestFixedSizeBinarySlice(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.FixedSizeBinaryType{ByteWidth: 4} - b := array.NewFixedSizeBinaryBuilder(mem, dtype) - defer b.Release() - - var data = [][]byte{ - []byte("ABCD"), - []byte("1234"), - nil, - []byte("AZER"), - } - b.AppendValues(data[:2], nil) - b.AppendNull() - b.Append(data[3]) - - arr := b.NewFixedSizeBinaryArray() - defer arr.Release() - - slice := array.NewSliceData(arr.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.FixedSizeBinary) - if !ok { - t.Fatalf("could not type-assert to array.String") - } - - if got, want := v.String(), `[(null) "AZER"]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - if got, want := v.NullN(), 1; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } -} - -func TestFixedSizeBinary_MarshalUnmarshalJSON(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.FixedSizeBinaryType{ByteWidth: 4} - b := array.NewFixedSizeBinaryBuilder(mem, dtype) - defer b.Release() - - var data = [][]byte{ - []byte("ABCD"), - []byte("1234"), - nil, - []byte("AZER"), - } - b.AppendValues(data[:2], nil) - b.AppendNull() - b.Append(data[3]) - - arr := b.NewFixedSizeBinaryArray() - defer arr.Release() - - jsonBytes, err := arr.MarshalJSON() - if err != nil { - t.Fatalf("failed to marshal json: %v", err) - } - - err = b.UnmarshalJSON(jsonBytes) - if err != nil { - t.Fatalf("failed to unmarshal json: %v", err) - } - gotArr := b.NewFixedSizeBinaryArray() - defer gotArr.Release() - - gotString := gotArr.String() - wantString := arr.String() - if gotString != wantString { - t.Fatalf("got=%q, want=%q", gotString, wantString) - } -} - -func TestFixedSizeBinaryStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dt := &arrow.FixedSizeBinaryType{ByteWidth: 7} - b := array.NewFixedSizeBinaryBuilder(mem, dt) - - values := [][]byte{ - []byte("7654321"), - nil, - []byte("AZERTYU"), - } - valid := []bool{true, false, true} - b.AppendValues(values, valid) - // encoded abcdefg base64 - assert.NoError(t, b.AppendValueFromString("YWJjZGVmZw==")) - - arr := b.NewArray().(*array.FixedSizeBinary) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewFixedSizeBinaryBuilder(mem, dt) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.FixedSizeBinary) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} diff --git a/go/arrow/array/fixedsize_binarybuilder.go b/go/arrow/array/fixedsize_binarybuilder.go deleted file mode 100644 index 96d58632ab8c8..0000000000000 --- a/go/arrow/array/fixedsize_binarybuilder.go +++ /dev/null @@ -1,261 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "encoding/base64" - "fmt" - "reflect" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -// A FixedSizeBinaryBuilder is used to build a FixedSizeBinary array using the Append methods. -type FixedSizeBinaryBuilder struct { - builder - - dtype *arrow.FixedSizeBinaryType - values *byteBufferBuilder -} - -func NewFixedSizeBinaryBuilder(mem memory.Allocator, dtype *arrow.FixedSizeBinaryType) *FixedSizeBinaryBuilder { - b := &FixedSizeBinaryBuilder{ - builder: builder{refCount: 1, mem: mem}, - dtype: dtype, - values: newByteBufferBuilder(mem), - } - return b -} - -func (b *FixedSizeBinaryBuilder) Type() arrow.DataType { return b.dtype } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -// Release may be called simultaneously from multiple goroutines. -func (b *FixedSizeBinaryBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.values != nil { - b.values.Release() - b.values = nil - } - } -} - -func (b *FixedSizeBinaryBuilder) Append(v []byte) { - if len(v) != b.dtype.ByteWidth { - // TODO(alexandre): should we return an error instead? - panic("len(v) != b.dtype.ByteWidth") - } - - b.Reserve(1) - b.values.Append(v) - b.UnsafeAppendBoolToBitmap(true) -} - -func (b *FixedSizeBinaryBuilder) AppendNull() { - b.Reserve(1) - b.values.Advance(b.dtype.ByteWidth) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *FixedSizeBinaryBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *FixedSizeBinaryBuilder) AppendEmptyValue() { - b.Reserve(1) - b.values.Advance(b.dtype.ByteWidth) - b.UnsafeAppendBoolToBitmap(true) -} - -func (b *FixedSizeBinaryBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *FixedSizeBinaryBuilder) UnsafeAppend(v []byte) { - b.values.unsafeAppend(v) - b.UnsafeAppendBoolToBitmap(true) -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *FixedSizeBinaryBuilder) AppendValues(v [][]byte, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - for _, vv := range v { - switch len(vv) { - case 0: - b.values.Advance(b.dtype.ByteWidth) - case b.dtype.ByteWidth: - b.values.Append(vv) - default: - panic(fmt.Errorf("array: invalid binary length (got=%d, want=%d)", len(vv), b.dtype.ByteWidth)) - } - } - - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *FixedSizeBinaryBuilder) init(capacity int) { - b.builder.init(capacity) - b.values.resize(capacity * b.dtype.ByteWidth) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *FixedSizeBinaryBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *FixedSizeBinaryBuilder) Resize(n int) { - b.builder.resize(n, b.init) -} - -// NewArray creates a FixedSizeBinary array from the memory buffers used by the -// builder and resets the FixedSizeBinaryBuilder so it can be used to build a new array. -func (b *FixedSizeBinaryBuilder) NewArray() arrow.Array { - return b.NewFixedSizeBinaryArray() -} - -// NewFixedSizeBinaryArray creates a FixedSizeBinary array from the memory buffers used by the builder and resets the FixedSizeBinaryBuilder -// so it can be used to build a new array. -func (b *FixedSizeBinaryBuilder) NewFixedSizeBinaryArray() (a *FixedSizeBinary) { - data := b.newData() - a = NewFixedSizeBinaryData(data) - data.Release() - return -} - -func (b *FixedSizeBinaryBuilder) newData() (data *Data) { - values := b.values.Finish() - data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, values}, nil, b.nulls, 0) - - if values != nil { - values.Release() - } - - b.builder.reset() - - return -} - -func (b *FixedSizeBinaryBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - - data, err := base64.StdEncoding.DecodeString(s) - if err != nil { - b.AppendNull() - return err - } - b.Append(data) - return nil -} - -func (b *FixedSizeBinaryBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - var val []byte - switch v := t.(type) { - case string: - data, err := base64.StdEncoding.DecodeString(v) - if err != nil { - return err - } - val = data - case []byte: - val = v - case nil: - b.AppendNull() - return nil - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf([]byte{}), - Offset: dec.InputOffset(), - Struct: fmt.Sprintf("FixedSizeBinary[%d]", b.dtype.ByteWidth), - } - } - - if len(val) != b.dtype.ByteWidth { - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(val), - Type: reflect.TypeOf([]byte{}), - Offset: dec.InputOffset(), - Struct: fmt.Sprintf("FixedSizeBinary[%d]", b.dtype.ByteWidth), - } - } - b.Append(val) - return nil -} - -func (b *FixedSizeBinaryBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *FixedSizeBinaryBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("fixed size binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -var ( - _ Builder = (*FixedSizeBinaryBuilder)(nil) -) diff --git a/go/arrow/array/fixedsize_binarybuilder_test.go b/go/arrow/array/fixedsize_binarybuilder_test.go deleted file mode 100644 index 0c58c65ecb02e..0000000000000 --- a/go/arrow/array/fixedsize_binarybuilder_test.go +++ /dev/null @@ -1,107 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestFixedSizeBinaryBuilder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := arrow.FixedSizeBinaryType{ByteWidth: 7} - b := NewFixedSizeBinaryBuilder(mem, &dtype) - - b.Append([]byte("1234567")) - b.AppendNull() - b.Append([]byte("ABCDEFG")) - b.AppendNull() - - assert.Equal(t, 4, b.Len(), "unexpected Len()") - assert.Equal(t, 2, b.NullN(), "unexpected NullN()") - - values := [][]byte{ - []byte("7654321"), - nil, - []byte("AZERTYU"), - } - b.AppendValues(values, []bool{true, false, true}) - - assert.Equal(t, 7, b.Len(), "unexpected Len()") - assert.Equal(t, 3, b.NullN(), "unexpected NullN()") - - a := b.NewFixedSizeBinaryArray() - - // check state of builder after NewFixedSizeBinaryArray - assert.Zero(t, b.Len(), "unexpected ArrayBuilder.Len(), NewFixedSizeBinaryArray did not reset state") - assert.Zero(t, b.Cap(), "unexpected ArrayBuilder.Cap(), NewFixedSizeBinaryArray did not reset state") - assert.Zero(t, b.NullN(), "unexpected ArrayBuilder.NullN(), NewFixedSizeBinaryArray did not reset state") - assert.Equal(t, a.String(), `["1234567" (null) "ABCDEFG" (null) "7654321" (null) "AZERTYU"]`) - - b.Release() - a.Release() -} - -func TestFixedSizeBinaryBuilder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := arrow.FixedSizeBinaryType{ByteWidth: 7} - ab := NewFixedSizeBinaryBuilder(mem, &dtype) - defer ab.Release() - - want := [][]byte{ - []byte("1234567"), - []byte("AZERTYU"), - []byte("7654321"), - } - - fixedSizeValues := func(a *FixedSizeBinary) [][]byte { - vs := make([][]byte, a.Len()) - for i := range vs { - vs[i] = a.Value(i) - } - return vs - } - - ab.AppendValues([][]byte{}, nil) - a := ab.NewFixedSizeBinaryArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewFixedSizeBinaryArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([][]byte{}, nil) - ab.AppendValues(want, nil) - a = ab.NewFixedSizeBinaryArray() - assert.Equal(t, want, fixedSizeValues(a)) - a.Release() - - ab.AppendValues(want, nil) - ab.AppendValues([][]byte{}, nil) - a = ab.NewFixedSizeBinaryArray() - assert.Equal(t, want, fixedSizeValues(a)) - a.Release() -} diff --git a/go/arrow/array/float16.go b/go/arrow/array/float16.go deleted file mode 100644 index 757b658a9150d..0000000000000 --- a/go/arrow/array/float16.go +++ /dev/null @@ -1,123 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "fmt" - "strings" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/float16" - "github.com/apache/arrow/go/v18/internal/json" -) - -// A type which represents an immutable sequence of Float16 values. -type Float16 struct { - array - values []float16.Num -} - -func NewFloat16Data(data arrow.ArrayData) *Float16 { - a := &Float16{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *Float16) Value(i int) float16.Num { return a.values[i] } -func (a *Float16) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return a.Value(i).String() -} - -func (a *Float16) Values() []float16.Num { return a.values } - -func (a *Float16) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", a.values[i].Float32()) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Float16) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Float16Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Float16) GetOneForMarshal(i int) interface{} { - if a.IsValid(i) { - return a.values[i].Float32() - } - return nil -} - -func (a *Float16) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i, v := range a.values { - if !a.IsValid(i) { - vals[i] = nil - continue - } - - switch { - case v.IsNaN(): - vals[i] = "NaN" - case v.IsInf() && !v.Signbit(): - vals[i] = "+Inf" - case v.IsInf() && v.Signbit(): - vals[i] = "-Inf" - default: - vals[i] = v.Float32() - } - } - return json.Marshal(vals) -} - -func arrayEqualFloat16(left, right *Float16) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -var ( - _ arrow.Array = (*Float16)(nil) -) diff --git a/go/arrow/array/float16_builder.go b/go/arrow/array/float16_builder.go deleted file mode 100644 index 7543f2b6f96dd..0000000000000 --- a/go/arrow/array/float16_builder.go +++ /dev/null @@ -1,263 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "fmt" - "reflect" - "strconv" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/float16" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -type Float16Builder struct { - builder - - data *memory.Buffer - rawData []float16.Num -} - -func NewFloat16Builder(mem memory.Allocator) *Float16Builder { - return &Float16Builder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *Float16Builder) Type() arrow.DataType { return arrow.FixedWidthTypes.Float16 } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Float16Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Float16Builder) Append(v float16.Num) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Float16Builder) UnsafeAppend(v float16.Num) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Float16Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Float16Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Float16Builder) AppendEmptyValue() { - b.Reserve(1) - b.UnsafeAppend(float16.Num{}) -} - -func (b *Float16Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Float16Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Float16Builder) AppendValues(v []float16.Num, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - if len(v) > 0 { - arrow.Float16Traits.Copy(b.rawData[b.length:], v) - } - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Float16Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Uint16Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Float16Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Float16Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Float16Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Float16Traits.BytesRequired(n)) - b.rawData = arrow.Float16Traits.CastFromBytes(b.data.Bytes()) - } -} - -// NewArray creates a Float16 array from the memory buffers used by the builder and resets the Float16Builder -// so it can be used to build a new array. -func (b *Float16Builder) NewArray() arrow.Array { - return b.NewFloat16Array() -} - -// NewFloat16Array creates a Float16 array from the memory buffers used by the builder and resets the Float16Builder -// so it can be used to build a new array. -func (b *Float16Builder) NewFloat16Array() (a *Float16) { - data := b.newData() - a = NewFloat16Data(data) - data.Release() - return -} - -func (b *Float16Builder) newData() (data *Data) { - bytesRequired := arrow.Float16Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.FixedWidthTypes.Float16, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Float16Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - v, err := strconv.ParseFloat(s, 32) - if err != nil { - b.AppendNull() - return err - } - b.Append(float16.New(float32(v))) - return nil -} - -func (b *Float16Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case float64: - b.Append(float16.New(float32(v))) - case string: - f, err := strconv.ParseFloat(v, 32) - if err != nil { - return err - } - // this will currently silently truncate if it is too large - b.Append(float16.New(float32(f))) - case json.Number: - f, err := v.Float64() - if err != nil { - return err - } - b.Append(float16.New(float32(f))) - case nil: - b.AppendNull() - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(float16.Num{}), - Offset: dec.InputOffset(), - } - } - return nil -} - -func (b *Float16Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -// UnmarshalJSON will add values to this builder from unmarshalling the -// array of values. Currently values that are larger than a float16 will -// be silently truncated. -func (b *Float16Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("float16 builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} diff --git a/go/arrow/array/float16_builder_test.go b/go/arrow/array/float16_builder_test.go deleted file mode 100644 index ab25e544ed833..0000000000000 --- a/go/arrow/array/float16_builder_test.go +++ /dev/null @@ -1,156 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/float16" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func float32Values(a *array.Float16) []float32 { - values := make([]float32, a.Len()) - for i, v := range a.Values() { - values[i] = v.Float32() - } - return values -} - -func TestNewFloat16Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewFloat16Builder(mem) - - ab.Append(float16.New(1)) - ab.Append(float16.New(2)) - ab.Append(float16.New(3)) - ab.AppendNull() - ab.Append(float16.New(5)) - ab.Append(float16.New(6)) - ab.AppendNull() - ab.Append(float16.New(8)) - ab.Append(float16.New(9)) - ab.Append(float16.New(10)) - assert.NoError(t, ab.AppendValueFromString("11.0")) - - // check state of builder before NewFloat16Array - assert.Equal(t, 11, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewFloat16Array() - assert.Equal(t, "1", a.ValueStr(0)) - // check state of builder after NewFloat16Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewFloat16Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewFloat16Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewFloat16Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - - assert.Equal(t, []float32{1, 2, 3, 0, 5, 6, 0, 8, 9, 10, 11}, float32Values(a), "unexpected Float16Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Values(), 11, "unexpected length of Float16Values") - - a.Release() - ab.Append(float16.New(7)) - ab.Append(float16.New(8)) - - a = ab.NewFloat16Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []float32{7, 8}, float32Values(a)) - assert.Len(t, a.Values(), 2) - - a.Release() -} - -func TestFloat16Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewFloat16Builder(mem) - defer ab.Release() - - want := []float16.Num{float16.New(3), float16.New(4)} - - ab.AppendValues([]float16.Num{}, nil) - a := ab.NewFloat16Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewFloat16Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(want, nil) - a = ab.NewFloat16Array() - assert.Equal(t, want, a.Values()) - a.Release() - - ab.AppendValues([]float16.Num{}, nil) - ab.AppendValues(want, nil) - a = ab.NewFloat16Array() - assert.Equal(t, want, a.Values()) - a.Release() - - ab.AppendValues(want, nil) - ab.AppendValues([]float16.Num{}, nil) - a = ab.NewFloat16Array() - assert.Equal(t, want, a.Values()) - a.Release() -} - -func TestFloat16StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewFloat16Builder(mem) - defer b.Release() - - b.Append(float16.New(1)) - b.Append(float16.New(2)) - b.Append(float16.New(3)) - b.AppendNull() - b.Append(float16.New(5)) - b.Append(float16.New(6)) - b.AppendNull() - b.Append(float16.New(8)) - b.Append(float16.New(9)) - b.Append(float16.New(10)) - - arr := b.NewArray().(*array.Float16) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewFloat16Builder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Float16) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} diff --git a/go/arrow/array/interval.go b/go/arrow/array/interval.go deleted file mode 100644 index 66c6eca21bca5..0000000000000 --- a/go/arrow/array/interval.go +++ /dev/null @@ -1,953 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "fmt" - "strconv" - "strings" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -func NewIntervalData(data arrow.ArrayData) arrow.Array { - switch data.DataType().(type) { - case *arrow.MonthIntervalType: - return NewMonthIntervalData(data.(*Data)) - case *arrow.DayTimeIntervalType: - return NewDayTimeIntervalData(data.(*Data)) - case *arrow.MonthDayNanoIntervalType: - return NewMonthDayNanoIntervalData(data.(*Data)) - default: - panic(fmt.Errorf("arrow/array: unknown interval data type %T", data.DataType())) - } -} - -// A type which represents an immutable sequence of arrow.MonthInterval values. -type MonthInterval struct { - array - values []arrow.MonthInterval -} - -func NewMonthIntervalData(data arrow.ArrayData) *MonthInterval { - a := &MonthInterval{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *MonthInterval) Value(i int) arrow.MonthInterval { return a.values[i] } -func (a *MonthInterval) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return fmt.Sprintf("%v", a.Value(i)) -} -func (a *MonthInterval) MonthIntervalValues() []arrow.MonthInterval { return a.values } - -func (a *MonthInterval) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *MonthInterval) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.MonthIntervalTraits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *MonthInterval) GetOneForMarshal(i int) interface{} { - if a.IsValid(i) { - return a.values[i] - } - return nil -} - -// MarshalJSON will create a json array out of a MonthInterval array, -// each value will be an object of the form {"months": #} where -// # is the numeric value of that index -func (a *MonthInterval) MarshalJSON() ([]byte, error) { - if a.NullN() == 0 { - return json.Marshal(a.values) - } - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if a.IsValid(i) { - vals[i] = a.values[i] - } else { - vals[i] = nil - } - } - - return json.Marshal(vals) -} - -func arrayEqualMonthInterval(left, right *MonthInterval) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -type MonthIntervalBuilder struct { - builder - - data *memory.Buffer - rawData []arrow.MonthInterval -} - -func NewMonthIntervalBuilder(mem memory.Allocator) *MonthIntervalBuilder { - return &MonthIntervalBuilder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *MonthIntervalBuilder) Type() arrow.DataType { return arrow.FixedWidthTypes.MonthInterval } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *MonthIntervalBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *MonthIntervalBuilder) Append(v arrow.MonthInterval) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *MonthIntervalBuilder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *MonthIntervalBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *MonthIntervalBuilder) AppendEmptyValue() { - b.Append(arrow.MonthInterval(0)) -} - -func (b *MonthIntervalBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *MonthIntervalBuilder) UnsafeAppend(v arrow.MonthInterval) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *MonthIntervalBuilder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *MonthIntervalBuilder) AppendValues(v []arrow.MonthInterval, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.MonthIntervalTraits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *MonthIntervalBuilder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.MonthIntervalTraits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.MonthIntervalTraits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *MonthIntervalBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *MonthIntervalBuilder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.MonthIntervalTraits.BytesRequired(n)) - b.rawData = arrow.MonthIntervalTraits.CastFromBytes(b.data.Bytes()) - } -} - -// NewArray creates a MonthInterval array from the memory buffers used by the builder and resets the MonthIntervalBuilder -// so it can be used to build a new array. -func (b *MonthIntervalBuilder) NewArray() arrow.Array { - return b.NewMonthIntervalArray() -} - -// NewMonthIntervalArray creates a MonthInterval array from the memory buffers used by the builder and resets the MonthIntervalBuilder -// so it can be used to build a new array. -func (b *MonthIntervalBuilder) NewMonthIntervalArray() (a *MonthInterval) { - data := b.newData() - a = NewMonthIntervalData(data) - data.Release() - return -} - -func (b *MonthIntervalBuilder) newData() (data *Data) { - bytesRequired := arrow.MonthIntervalTraits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.FixedWidthTypes.MonthInterval, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *MonthIntervalBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - v, err := strconv.ParseInt(s, 10, 32) - if err != nil { - b.AppendNull() - return err - } - b.Append(arrow.MonthInterval(v)) - return nil -} - -func (b *MonthIntervalBuilder) UnmarshalOne(dec *json.Decoder) error { - var v *arrow.MonthInterval - if err := dec.Decode(&v); err != nil { - return err - } - - if v == nil { - b.AppendNull() - } else { - b.Append(*v) - } - return nil -} - -func (b *MonthIntervalBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -// UnmarshalJSON will add the unmarshalled values of an array to the builder, -// values are expected to be strings of the form "#months" where # is the int32 -// value that will be added to the builder. -func (b *MonthIntervalBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("month interval builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -// A type which represents an immutable sequence of arrow.DayTimeInterval values. -type DayTimeInterval struct { - array - values []arrow.DayTimeInterval -} - -func NewDayTimeIntervalData(data arrow.ArrayData) *DayTimeInterval { - a := &DayTimeInterval{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *DayTimeInterval) Value(i int) arrow.DayTimeInterval { return a.values[i] } -func (a *DayTimeInterval) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - data, err := json.Marshal(a.GetOneForMarshal(i)) - if err != nil { - panic(err) - } - return string(data) -} - -func (a *DayTimeInterval) DayTimeIntervalValues() []arrow.DayTimeInterval { return a.values } - -func (a *DayTimeInterval) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *DayTimeInterval) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.DayTimeIntervalTraits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *DayTimeInterval) GetOneForMarshal(i int) interface{} { - if a.IsValid(i) { - return a.values[i] - } - return nil -} - -// MarshalJSON will marshal this array to JSON as an array of objects, -// consisting of the form {"days": #, "milliseconds": #} for each element. -func (a *DayTimeInterval) MarshalJSON() ([]byte, error) { - if a.NullN() == 0 { - return json.Marshal(a.values) - } - vals := make([]interface{}, a.Len()) - for i, v := range a.values { - if a.IsValid(i) { - vals[i] = v - } else { - vals[i] = nil - } - } - return json.Marshal(vals) -} - -func arrayEqualDayTimeInterval(left, right *DayTimeInterval) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -type DayTimeIntervalBuilder struct { - builder - - data *memory.Buffer - rawData []arrow.DayTimeInterval -} - -func NewDayTimeIntervalBuilder(mem memory.Allocator) *DayTimeIntervalBuilder { - return &DayTimeIntervalBuilder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *DayTimeIntervalBuilder) Type() arrow.DataType { return arrow.FixedWidthTypes.DayTimeInterval } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *DayTimeIntervalBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *DayTimeIntervalBuilder) Append(v arrow.DayTimeInterval) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *DayTimeIntervalBuilder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *DayTimeIntervalBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *DayTimeIntervalBuilder) AppendEmptyValue() { - b.Append(arrow.DayTimeInterval{}) -} - -func (b *DayTimeIntervalBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *DayTimeIntervalBuilder) UnsafeAppend(v arrow.DayTimeInterval) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *DayTimeIntervalBuilder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *DayTimeIntervalBuilder) AppendValues(v []arrow.DayTimeInterval, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.DayTimeIntervalTraits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *DayTimeIntervalBuilder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.DayTimeIntervalTraits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.DayTimeIntervalTraits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *DayTimeIntervalBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *DayTimeIntervalBuilder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.DayTimeIntervalTraits.BytesRequired(n)) - b.rawData = arrow.DayTimeIntervalTraits.CastFromBytes(b.data.Bytes()) - } -} - -// NewArray creates a DayTimeInterval array from the memory buffers used by the builder and resets the DayTimeIntervalBuilder -// so it can be used to build a new array. -func (b *DayTimeIntervalBuilder) NewArray() arrow.Array { - return b.NewDayTimeIntervalArray() -} - -// NewDayTimeIntervalArray creates a DayTimeInterval array from the memory buffers used by the builder and resets the DayTimeIntervalBuilder -// so it can be used to build a new array. -func (b *DayTimeIntervalBuilder) NewDayTimeIntervalArray() (a *DayTimeInterval) { - data := b.newData() - a = NewDayTimeIntervalData(data) - data.Release() - return -} - -func (b *DayTimeIntervalBuilder) newData() (data *Data) { - bytesRequired := arrow.DayTimeIntervalTraits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.FixedWidthTypes.DayTimeInterval, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *DayTimeIntervalBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - var v arrow.DayTimeInterval - if err := json.Unmarshal([]byte(s), &v); err != nil { - b.AppendNull() - return err - } - b.Append(v) - return nil -} - -func (b *DayTimeIntervalBuilder) UnmarshalOne(dec *json.Decoder) error { - var v *arrow.DayTimeInterval - if err := dec.Decode(&v); err != nil { - return err - } - - if v == nil { - b.AppendNull() - } else { - b.Append(*v) - } - return nil -} - -func (b *DayTimeIntervalBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -// UnmarshalJSON will add the values unmarshalled from an array to the builder, -// with the values expected to be objects of the form {"days": #, "milliseconds": #} -func (b *DayTimeIntervalBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("day_time interval builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -// A type which represents an immutable sequence of arrow.DayTimeInterval values. -type MonthDayNanoInterval struct { - array - values []arrow.MonthDayNanoInterval -} - -func NewMonthDayNanoIntervalData(data arrow.ArrayData) *MonthDayNanoInterval { - a := &MonthDayNanoInterval{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *MonthDayNanoInterval) Value(i int) arrow.MonthDayNanoInterval { return a.values[i] } -func (a *MonthDayNanoInterval) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - data, err := json.Marshal(a.GetOneForMarshal(i)) - if err != nil { - panic(err) - } - return string(data) -} - -func (a *MonthDayNanoInterval) MonthDayNanoIntervalValues() []arrow.MonthDayNanoInterval { - return a.values -} - -func (a *MonthDayNanoInterval) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *MonthDayNanoInterval) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.MonthDayNanoIntervalTraits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *MonthDayNanoInterval) GetOneForMarshal(i int) interface{} { - if a.IsValid(i) { - return a.values[i] - } - return nil -} - -// MarshalJSON will marshal this array to a JSON array with elements -// marshalled to the form {"months": #, "days": #, "nanoseconds": #} -func (a *MonthDayNanoInterval) MarshalJSON() ([]byte, error) { - if a.NullN() == 0 { - return json.Marshal(a.values) - } - vals := make([]interface{}, a.Len()) - for i, v := range a.values { - if a.IsValid(i) { - vals[i] = v - } else { - vals[i] = nil - } - } - return json.Marshal(vals) -} - -func arrayEqualMonthDayNanoInterval(left, right *MonthDayNanoInterval) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -type MonthDayNanoIntervalBuilder struct { - builder - - data *memory.Buffer - rawData []arrow.MonthDayNanoInterval -} - -func NewMonthDayNanoIntervalBuilder(mem memory.Allocator) *MonthDayNanoIntervalBuilder { - return &MonthDayNanoIntervalBuilder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *MonthDayNanoIntervalBuilder) Type() arrow.DataType { - return arrow.FixedWidthTypes.MonthDayNanoInterval -} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *MonthDayNanoIntervalBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *MonthDayNanoIntervalBuilder) Append(v arrow.MonthDayNanoInterval) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *MonthDayNanoIntervalBuilder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *MonthDayNanoIntervalBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *MonthDayNanoIntervalBuilder) AppendEmptyValue() { - b.Append(arrow.MonthDayNanoInterval{}) -} - -func (b *MonthDayNanoIntervalBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *MonthDayNanoIntervalBuilder) UnsafeAppend(v arrow.MonthDayNanoInterval) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *MonthDayNanoIntervalBuilder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *MonthDayNanoIntervalBuilder) AppendValues(v []arrow.MonthDayNanoInterval, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.MonthDayNanoIntervalTraits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *MonthDayNanoIntervalBuilder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.MonthDayNanoIntervalTraits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.MonthDayNanoIntervalTraits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *MonthDayNanoIntervalBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *MonthDayNanoIntervalBuilder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.MonthDayNanoIntervalTraits.BytesRequired(n)) - b.rawData = arrow.MonthDayNanoIntervalTraits.CastFromBytes(b.data.Bytes()) - } -} - -// NewArray creates a MonthDayNanoInterval array from the memory buffers used by the builder and resets the MonthDayNanoIntervalBuilder -// so it can be used to build a new array. -func (b *MonthDayNanoIntervalBuilder) NewArray() arrow.Array { - return b.NewMonthDayNanoIntervalArray() -} - -// NewMonthDayNanoIntervalArray creates a MonthDayNanoInterval array from the memory buffers used by the builder and resets the MonthDayNanoIntervalBuilder -// so it can be used to build a new array. -func (b *MonthDayNanoIntervalBuilder) NewMonthDayNanoIntervalArray() (a *MonthDayNanoInterval) { - data := b.newData() - a = NewMonthDayNanoIntervalData(data) - data.Release() - return -} - -func (b *MonthDayNanoIntervalBuilder) newData() (data *Data) { - bytesRequired := arrow.MonthDayNanoIntervalTraits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.FixedWidthTypes.MonthDayNanoInterval, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *MonthDayNanoIntervalBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - var v arrow.MonthDayNanoInterval - if err := json.Unmarshal([]byte(s), &v); err != nil { - return err - } - b.Append(v) - return nil -} - -func (b *MonthDayNanoIntervalBuilder) UnmarshalOne(dec *json.Decoder) error { - var v *arrow.MonthDayNanoInterval - if err := dec.Decode(&v); err != nil { - return err - } - - if v == nil { - b.AppendNull() - } else { - b.Append(*v) - } - return nil -} - -func (b *MonthDayNanoIntervalBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -// UnmarshalJSON unmarshals a JSON array of objects and adds them to this builder, -// each element of the array is expected to be an object of the form -// {"months": #, "days": #, "nanoseconds": #} -func (b *MonthDayNanoIntervalBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("month_day_nano interval builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -var ( - _ arrow.Array = (*MonthInterval)(nil) - _ arrow.Array = (*DayTimeInterval)(nil) - _ arrow.Array = (*MonthDayNanoInterval)(nil) - - _ Builder = (*MonthIntervalBuilder)(nil) - _ Builder = (*DayTimeIntervalBuilder)(nil) - _ Builder = (*MonthDayNanoIntervalBuilder)(nil) -) diff --git a/go/arrow/array/interval_test.go b/go/arrow/array/interval_test.go deleted file mode 100644 index 6d36885a627d9..0000000000000 --- a/go/arrow/array/interval_test.go +++ /dev/null @@ -1,524 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "math" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestMonthIntervalArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - var ( - want = []arrow.MonthInterval{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - b := array.NewMonthIntervalBuilder(mem) - defer b.Release() - - b.Retain() - b.Release() - - b.AppendValues(want[:2], nil) - b.AppendNull() - b.Append(want[3]) - - if got, want := b.Len(), len(want); got != want { - t.Fatalf("invalid len: got=%d, want=%d", got, want) - } - - if got, want := b.NullN(), 1; got != want { - t.Fatalf("invalid nulls: got=%d, want=%d", got, want) - } - - arr := b.NewMonthIntervalArray() - defer arr.Release() - - arr.Retain() - arr.Release() - - if got, want := arr.Len(), len(want); got != want { - t.Fatalf("invalid len: got=%d, want=%d", got, want) - } - - if got, want := arr.NullN(), 1; got != want { - t.Fatalf("invalid nulls: got=%d, want=%d", got, want) - } - - for i := range want { - if arr.IsNull(i) != !valids[i] { - t.Fatalf("arr[%d]-validity: got=%v want=%v", i, !arr.IsNull(i), valids[i]) - } - switch { - case arr.IsNull(i): - default: - got := arr.Value(i) - if got != want[i] { - t.Fatalf("arr[%d]: got=%q, want=%q", i, got, want[i]) - } - } - } - - sub := array.MakeFromData(arr.Data()) - defer sub.Release() - - if sub.DataType().ID() != arrow.INTERVAL_MONTHS { - t.Fatalf("invalid type: got=%q, want=interval_months", sub.DataType().Name()) - } - - if _, ok := sub.(*array.MonthInterval); !ok { - t.Fatalf("could not type-assert to array.MonthInterval") - } - - if got, want := arr.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - slice := array.NewSliceData(arr.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.MonthInterval) - if !ok { - t.Fatalf("could not type-assert to array.MonthInterval") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } -} - -func TestMonthIntervalBuilder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - want := []arrow.MonthInterval{1, 2, 3, 4} - - b := array.NewMonthIntervalBuilder(mem) - defer b.Release() - - miValues := func(a *array.MonthInterval) []arrow.MonthInterval { - vs := make([]arrow.MonthInterval, a.Len()) - for i := range vs { - vs[i] = a.Value(i) - } - return vs - } - - b.AppendValues([]arrow.MonthInterval{}, nil) - arr := b.NewMonthIntervalArray() - assert.Zero(t, arr.Len()) - arr.Release() - - b.AppendValues(nil, nil) - arr = b.NewMonthIntervalArray() - assert.Zero(t, arr.Len()) - arr.Release() - - b.AppendValues([]arrow.MonthInterval{}, nil) - b.AppendValues(want, nil) - arr = b.NewMonthIntervalArray() - assert.Equal(t, want, miValues(arr)) - arr.Release() - - b.AppendValues(want, nil) - b.AppendValues([]arrow.MonthInterval{}, nil) - arr = b.NewMonthIntervalArray() - assert.Equal(t, want, miValues(arr)) - arr.Release() -} - -func TestMonthIntervalStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - var ( - values = []arrow.MonthInterval{1, 2, 3, 4} - valid = []bool{true, true, false, true} - ) - - b := array.NewMonthIntervalBuilder(mem) - defer b.Release() - - b.AppendValues(values, valid) - - arr := b.NewArray().(*array.MonthInterval) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewMonthIntervalBuilder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.MonthInterval) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestDayTimeArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - var ( - want = []arrow.DayTimeInterval{ - {Days: 1, Milliseconds: 1}, {Days: 2, Milliseconds: 2}, - {Days: 3, Milliseconds: 3}, {Days: 4, Milliseconds: 4}} - valids = []bool{true, true, false, true} - ) - - b := array.NewDayTimeIntervalBuilder(mem) - defer b.Release() - - b.Retain() - b.Release() - - b.AppendValues(want[:2], nil) - b.AppendNull() - b.Append(want[3]) - - if got, want := b.Len(), len(want); got != want { - t.Fatalf("invalid len: got=%d, want=%d", got, want) - } - - if got, want := b.NullN(), 1; got != want { - t.Fatalf("invalid nulls: got=%d, want=%d", got, want) - } - - arr := b.NewDayTimeIntervalArray() - defer arr.Release() - - arr.Retain() - arr.Release() - - if got, want := arr.Len(), len(want); got != want { - t.Fatalf("invalid len: got=%d, want=%d", got, want) - } - - if got, want := arr.NullN(), 1; got != want { - t.Fatalf("invalid nulls: got=%d, want=%d", got, want) - } - - for i := range want { - if arr.IsNull(i) != !valids[i] { - t.Fatalf("arr[%d]-validity: got=%v want=%v", i, !arr.IsNull(i), valids[i]) - } - switch { - case arr.IsNull(i): - default: - got := arr.Value(i) - if got != want[i] { - t.Fatalf("arr[%d]: got=%q, want=%q", i, got, want[i]) - } - } - } - - sub := array.MakeFromData(arr.Data()) - defer sub.Release() - - if sub.DataType().ID() != arrow.INTERVAL_DAY_TIME { - t.Fatalf("invalid type: got=%q, want=interval_day_time", sub.DataType().Name()) - } - - if _, ok := sub.(*array.DayTimeInterval); !ok { - t.Fatalf("could not type-assert to array.DayTimeInterval") - } - - if got, want := arr.String(), `[{1 1} {2 2} (null) {4 4}]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - slice := array.NewSliceData(arr.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.DayTimeInterval) - if !ok { - t.Fatalf("could not type-assert to array.DayInterval") - } - - if got, want := v.String(), `[(null) {4 4}]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } -} - -func TestDayTimeIntervalBuilder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - want := []arrow.DayTimeInterval{ - {Days: 1, Milliseconds: 1}, {Days: 2, Milliseconds: 2}, - {Days: 3, Milliseconds: 3}, {Days: 4, Milliseconds: 4}} - - b := array.NewDayTimeIntervalBuilder(mem) - defer b.Release() - - dtValues := func(a *array.DayTimeInterval) []arrow.DayTimeInterval { - vs := make([]arrow.DayTimeInterval, a.Len()) - for i := range vs { - vs[i] = a.Value(i) - } - return vs - } - - b.AppendValues([]arrow.DayTimeInterval{}, nil) - arr := b.NewDayTimeIntervalArray() - assert.Zero(t, arr.Len()) - arr.Release() - - b.AppendValues(nil, nil) - arr = b.NewDayTimeIntervalArray() - assert.Zero(t, arr.Len()) - arr.Release() - - b.AppendValues([]arrow.DayTimeInterval{}, nil) - b.AppendValues(want, nil) - arr = b.NewDayTimeIntervalArray() - assert.Equal(t, want, dtValues(arr)) - arr.Release() - - b.AppendValues(want, nil) - b.AppendValues([]arrow.DayTimeInterval{}, nil) - arr = b.NewDayTimeIntervalArray() - assert.Equal(t, want, dtValues(arr)) - arr.Release() -} - -func TestDayTimeIntervalStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - var ( - values = []arrow.DayTimeInterval{ - {Days: 1, Milliseconds: 1}, - {Days: 2, Milliseconds: 2}, - {Days: 3, Milliseconds: 3}, - {Days: 4, Milliseconds: 4}, - } - valid = []bool{true, true, false, true} - ) - - b := array.NewDayTimeIntervalBuilder(mem) - defer b.Release() - - b.AppendValues(values, valid) - - arr := b.NewArray().(*array.DayTimeInterval) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewDayTimeIntervalBuilder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.DayTimeInterval) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestMonthDayNanoArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - var ( - want = []arrow.MonthDayNanoInterval{ - {Months: 1, Days: 1, Nanoseconds: 1000}, {Months: 2, Days: 2, Nanoseconds: 2000}, - {Months: 3, Days: 3, Nanoseconds: 3000}, {Months: 4, Days: 4, Nanoseconds: 4000}, - {Months: 0, Days: 0, Nanoseconds: 0}, {Months: -1, Days: -2, Nanoseconds: -300}, - {Months: math.MaxInt32, Days: math.MinInt32, Nanoseconds: math.MaxInt64}, - {Months: math.MinInt32, Days: math.MaxInt32, Nanoseconds: math.MinInt64}, - } - valids = []bool{true, true, false, true, true, true, false, true} - ) - - b := array.NewMonthDayNanoIntervalBuilder(mem) - defer b.Release() - - b.Retain() - b.Release() - - b.AppendValues(want[:2], nil) - b.AppendNull() - b.Append(want[3]) - b.AppendValues(want[4:], valids[4:]) - - if got, want := b.Len(), len(want); got != want { - t.Fatalf("invalid len: got=%d, want=%d", got, want) - } - - if got, want := b.NullN(), 2; got != want { - t.Fatalf("invalid nulls: got=%d, want=%d", got, want) - } - - arr := b.NewMonthDayNanoIntervalArray() - defer arr.Release() - - arr.Retain() - arr.Release() - - if got, want := arr.Len(), len(want); got != want { - t.Fatalf("invalid len: got=%d, want=%d", got, want) - } - - if got, want := arr.NullN(), 2; got != want { - t.Fatalf("invalid nulls: got=%d, want=%d", got, want) - } - - for i := range want { - if arr.IsNull(i) != !valids[i] { - t.Fatalf("arr[%d]-validity: got=%v want=%v", i, !arr.IsNull(i), valids[i]) - } - switch { - case arr.IsNull(i): - default: - got := arr.Value(i) - if got != want[i] { - t.Fatalf("arr[%d]: got=%q, want=%q", i, got, want[i]) - } - } - } - - sub := array.MakeFromData(arr.Data()) - defer sub.Release() - - if sub.DataType().ID() != arrow.INTERVAL_MONTH_DAY_NANO { - t.Fatalf("invalid type: got=%q, want=interval", sub.DataType().Name()) - } - - if _, ok := sub.(*array.MonthDayNanoInterval); !ok { - t.Fatalf("could not type-assert to array.MonthDayNanoInterval") - } - - if got, want := arr.String(), `[{1 1 1000} {2 2 2000} (null) {4 4 4000} {0 0 0} {-1 -2 -300} (null) {-2147483648 2147483647 -9223372036854775808}]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - slice := array.NewSliceData(arr.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.MonthDayNanoInterval) - if !ok { - t.Fatalf("could not type-assert to array.MonthDayNanoInterval") - } - - if got, want := v.String(), `[(null) {4 4 4000}]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } -} - -func TestMonthDayNanoIntervalBuilder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - want := []arrow.MonthDayNanoInterval{ - {Months: 1, Days: 1, Nanoseconds: 1000}, - {Months: 2, Days: 2, Nanoseconds: 2000}, - {Months: 3, Days: 3, Nanoseconds: 3000}, - {Months: 4, Days: 4, Nanoseconds: 4000}} - - b := array.NewMonthDayNanoIntervalBuilder(mem) - defer b.Release() - - dtValues := func(a *array.MonthDayNanoInterval) []arrow.MonthDayNanoInterval { - vs := make([]arrow.MonthDayNanoInterval, a.Len()) - for i := range vs { - vs[i] = a.Value(i) - } - return vs - } - - b.AppendValues([]arrow.MonthDayNanoInterval{}, nil) - arr := b.NewMonthDayNanoIntervalArray() - assert.Zero(t, arr.Len()) - arr.Release() - - b.AppendValues(nil, nil) - arr = b.NewMonthDayNanoIntervalArray() - assert.Zero(t, arr.Len()) - arr.Release() - - b.AppendValues([]arrow.MonthDayNanoInterval{}, nil) - b.AppendValues(want, nil) - arr = b.NewMonthDayNanoIntervalArray() - assert.Equal(t, want, dtValues(arr)) - arr.Release() - - b.AppendValues(want, nil) - b.AppendValues([]arrow.MonthDayNanoInterval{}, nil) - arr = b.NewMonthDayNanoIntervalArray() - assert.Equal(t, want, dtValues(arr)) - arr.Release() -} - -func TestMonthDayNanoIntervalStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - var ( - values = []arrow.MonthDayNanoInterval{ - {Months: 1, Days: 1, Nanoseconds: 1000}, {Months: 2, Days: 2, Nanoseconds: 2000}, - {Months: 3, Days: 3, Nanoseconds: 3000}, {Months: 4, Days: 4, Nanoseconds: 4000}, - {Months: 0, Days: 0, Nanoseconds: 0}, {Months: -1, Days: -2, Nanoseconds: -300}, - {Months: math.MaxInt32, Days: math.MinInt32, Nanoseconds: math.MaxInt64}, - {Months: math.MinInt32, Days: math.MaxInt32, Nanoseconds: math.MinInt64}, - } - valid = []bool{true, true, false, true, true, true, false, true} - ) - - b := array.NewMonthDayNanoIntervalBuilder(mem) - defer b.Release() - - b.AppendValues(values, valid) - - arr := b.NewArray().(*array.MonthDayNanoInterval) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewMonthDayNanoIntervalBuilder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.MonthDayNanoInterval) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} diff --git a/go/arrow/array/json_reader.go b/go/arrow/array/json_reader.go deleted file mode 100644 index 2944151a5f63c..0000000000000 --- a/go/arrow/array/json_reader.go +++ /dev/null @@ -1,205 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "errors" - "fmt" - "io" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -type Option func(config) -type config interface{} - -// WithChunk sets the chunk size for reading in json records. The default is to -// read in one row per record batch as a single object. If chunk size is set to -// a negative value, then the entire file is read as a single record batch. -// Otherwise a record batch is read in with chunk size rows per record batch until -// it reaches EOF. -func WithChunk(n int) Option { - return func(cfg config) { - switch cfg := cfg.(type) { - case *JSONReader: - cfg.chunk = n - default: - panic(fmt.Errorf("arrow/json): unknown config type %T", cfg)) - } - } -} - -// WithAllocator specifies the allocator to use for creating the record batches, -// if it is not called, then memory.DefaultAllocator will be used. -func WithAllocator(mem memory.Allocator) Option { - return func(cfg config) { - switch cfg := cfg.(type) { - case *JSONReader: - cfg.mem = mem - default: - panic(fmt.Errorf("arrow/json): unknown config type %T", cfg)) - } - } -} - -// JSONReader is a json reader that meets the RecordReader interface definition. -// -// To read in an array of objects as a record, you can use RecordFromJSON -// which is equivalent to reading the json as a struct array whose fields are -// the columns of the record. This primarily exists to fit the RecordReader -// interface as a matching reader for the csv reader. -type JSONReader struct { - r *json.Decoder - schema *arrow.Schema - - bldr *RecordBuilder - - refs int64 - cur arrow.Record - err error - - chunk int - done bool - - mem memory.Allocator - next func() bool -} - -// NewJSONReader returns a json RecordReader which expects to find one json object -// per row of dataset. Using WithChunk can control how many rows are processed -// per record, which is how many objects become a single record from the file. -// -// If it is desired to write out an array of rows, then simply use RecordToStructArray -// and json.Marshal the struct array for the same effect. -func NewJSONReader(r io.Reader, schema *arrow.Schema, opts ...Option) *JSONReader { - rr := &JSONReader{ - r: json.NewDecoder(r), - schema: schema, - refs: 1, - chunk: 1, - } - for _, o := range opts { - o(rr) - } - - if rr.mem == nil { - rr.mem = memory.DefaultAllocator - } - - rr.bldr = NewRecordBuilder(rr.mem, schema) - switch { - case rr.chunk < 0: - rr.next = rr.nextall - case rr.chunk > 1: - rr.next = rr.nextn - default: - rr.next = rr.next1 - } - return rr -} - -// Err returns the last encountered error -func (r *JSONReader) Err() error { return r.err } - -func (r *JSONReader) Schema() *arrow.Schema { return r.schema } - -// Record returns the last read in record. The returned record is only valid -// until the next call to Next unless Retain is called on the record itself. -func (r *JSONReader) Record() arrow.Record { return r.cur } - -func (r *JSONReader) Retain() { - atomic.AddInt64(&r.refs, 1) -} - -func (r *JSONReader) Release() { - debug.Assert(atomic.LoadInt64(&r.refs) > 0, "too many releases") - - if atomic.AddInt64(&r.refs, -1) == 0 { - if r.cur != nil { - r.cur.Release() - r.bldr.Release() - r.r = nil - } - } -} - -// Next returns true if it read in a record, which will be available via Record -// and false if there is either an error or the end of the reader. -func (r *JSONReader) Next() bool { - if r.cur != nil { - r.cur.Release() - r.cur = nil - } - - if r.err != nil || r.done { - return false - } - - return r.next() -} - -func (r *JSONReader) readNext() bool { - r.err = r.r.Decode(r.bldr) - if r.err != nil { - r.done = true - if errors.Is(r.err, io.EOF) { - r.err = nil - } - return false - } - return true -} - -func (r *JSONReader) nextall() bool { - for r.readNext() { - } - - r.cur = r.bldr.NewRecord() - return r.cur.NumRows() > 0 -} - -func (r *JSONReader) next1() bool { - if !r.readNext() { - return false - } - - r.cur = r.bldr.NewRecord() - return true -} - -func (r *JSONReader) nextn() bool { - var n = 0 - - for i := 0; i < r.chunk && !r.done; i, n = i+1, n+1 { - if !r.readNext() { - break - } - } - - if n > 0 { - r.cur = r.bldr.NewRecord() - } - return n > 0 -} - -var ( - _ RecordReader = (*JSONReader)(nil) -) diff --git a/go/arrow/array/json_reader_test.go b/go/arrow/array/json_reader_test.go deleted file mode 100644 index 5e258dfdc07b1..0000000000000 --- a/go/arrow/array/json_reader_test.go +++ /dev/null @@ -1,141 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "strings" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -const jsondata = ` - {"region": "NY", "model": "3", "sales": 742.0} - {"region": "NY", "model": "S", "sales": 304.125} - {"region": "NY", "model": "X", "sales": 136.25} - {"region": "NY", "model": "Y", "sales": 27.5} - {"region": "CA", "model": "3", "sales": 512} - {"region": "CA", "model": "S", "sales": 978} - {"region": "CA", "model": "X", "sales": 1.0} - {"region": "CA", "model": "Y", "sales": 69} - {"region": "QC", "model": "3", "sales": 273.5} - {"region": "QC", "model": "S", "sales": 13} - {"region": "QC", "model": "X", "sales": 54} - {"region": "QC", "model": "Y", "sales": 21} - {"region": "QC", "model": "3", "sales": 152.25} - {"region": "QC", "model": "S", "sales": 10} - {"region": "QC", "model": "X", "sales": 42} - {"region": "QC", "model": "Y", "sales": 37}` - -func TestJSONReader(t *testing.T) { - schema := arrow.NewSchema([]arrow.Field{ - {Name: "region", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "model", Type: arrow.BinaryTypes.String}, - {Name: "sales", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - }, nil) - - rdr := array.NewJSONReader(strings.NewReader(jsondata), schema) - defer rdr.Release() - - n := 0 - for rdr.Next() { - n++ - rec := rdr.Record() - assert.NotNil(t, rec) - assert.EqualValues(t, 1, rec.NumRows()) - assert.EqualValues(t, 3, rec.NumCols()) - } - - assert.NoError(t, rdr.Err()) - assert.Equal(t, 16, n) -} - -func TestJSONReaderAll(t *testing.T) { - schema := arrow.NewSchema([]arrow.Field{ - {Name: "region", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "model", Type: arrow.BinaryTypes.String}, - {Name: "sales", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - }, nil) - - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - rdr := array.NewJSONReader(strings.NewReader(jsondata), schema, array.WithAllocator(mem), array.WithChunk(-1)) - defer rdr.Release() - - assert.True(t, rdr.Next()) - rec := rdr.Record() - assert.NotNil(t, rec) - assert.NoError(t, rdr.Err()) - - assert.EqualValues(t, 16, rec.NumRows()) - assert.EqualValues(t, 3, rec.NumCols()) - assert.False(t, rdr.Next()) -} - -func TestJSONReaderChunked(t *testing.T) { - schema := arrow.NewSchema([]arrow.Field{ - {Name: "region", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "model", Type: arrow.BinaryTypes.String}, - {Name: "sales", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - }, nil) - - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - rdr := array.NewJSONReader(strings.NewReader(jsondata), schema, array.WithAllocator(mem), array.WithChunk(4)) - defer rdr.Release() - - n := 0 - for rdr.Next() { - n++ - rec := rdr.Record() - assert.NotNil(t, rec) - assert.NoError(t, rdr.Err()) - assert.EqualValues(t, 4, rec.NumRows()) - } - - assert.Equal(t, 4, n) - assert.NoError(t, rdr.Err()) -} - -func TestUnmarshalJSON(t *testing.T) { - schema := arrow.NewSchema([]arrow.Field{ - {Name: "region", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "model", Type: arrow.BinaryTypes.String}, - {Name: "sales", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - }, nil) - - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - recordBuilder := array.NewRecordBuilder(mem, schema) - defer recordBuilder.Release() - - jsondata := `{"region": "NY", "model": "3", "sales": 742.0, "extra": 1234}` - - err := recordBuilder.UnmarshalJSON([]byte(jsondata)) - assert.NoError(t, err) - - record := recordBuilder.NewRecord() - defer record.Release() - - assert.NotNil(t, record) -} diff --git a/go/arrow/array/list.go b/go/arrow/array/list.go deleted file mode 100644 index 1e2191f2cfc3a..0000000000000 --- a/go/arrow/array/list.go +++ /dev/null @@ -1,1574 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "fmt" - "strings" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -type ListLike interface { - arrow.Array - ListValues() arrow.Array - ValueOffsets(i int) (start, end int64) -} - -type VarLenListLike interface { - ListLike -} - -// List represents an immutable sequence of array values. -type List struct { - array - values arrow.Array - offsets []int32 -} - -var _ ListLike = (*List)(nil) - -// NewListData returns a new List array value, from data. -func NewListData(data arrow.ArrayData) *List { - a := &List{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *List) ListValues() arrow.Array { return a.values } - -func (a *List) ValueStr(i int) string { - if !a.IsValid(i) { - return NullValueStr - } - return string(a.GetOneForMarshal(i).(json.RawMessage)) -} - -func (a *List) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - o.WriteString(" ") - } - if a.IsNull(i) { - o.WriteString(NullValueStr) - continue - } - sub := a.newListValue(i) - fmt.Fprintf(o, "%v", sub) - sub.Release() - } - o.WriteString("]") - return o.String() -} - -func (a *List) newListValue(i int) arrow.Array { - beg, end := a.ValueOffsets(i) - return NewSlice(a.values, beg, end) -} - -func (a *List) setData(data *Data) { - debug.Assert(len(data.buffers) >= 2, "list data should have 2 buffers") - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.offsets = arrow.Int32Traits.CastFromBytes(vals.Bytes()) - } - a.values = MakeFromData(data.childData[0]) -} - -func (a *List) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - slice := a.newListValue(i) - defer slice.Release() - v, err := json.Marshal(slice) - if err != nil { - panic(err) - } - return json.RawMessage(v) -} - -func (a *List) MarshalJSON() ([]byte, error) { - var buf bytes.Buffer - enc := json.NewEncoder(&buf) - - buf.WriteByte('[') - for i := 0; i < a.Len(); i++ { - if i != 0 { - buf.WriteByte(',') - } - if err := enc.Encode(a.GetOneForMarshal(i)); err != nil { - return nil, err - } - } - buf.WriteByte(']') - return buf.Bytes(), nil -} - -func arrayEqualList(left, right *List) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - o := func() bool { - l := left.newListValue(i) - defer l.Release() - r := right.newListValue(i) - defer r.Release() - return Equal(l, r) - }() - if !o { - return false - } - } - return true -} - -// Len returns the number of elements in the array. -func (a *List) Len() int { return a.array.Len() } - -func (a *List) Offsets() []int32 { return a.offsets } - -func (a *List) Retain() { - a.array.Retain() - a.values.Retain() -} - -func (a *List) Release() { - a.array.Release() - a.values.Release() -} - -func (a *List) ValueOffsets(i int) (start, end int64) { - debug.Assert(i >= 0 && i < a.array.data.length, "index out of range") - j := i + a.array.data.offset - start, end = int64(a.offsets[j]), int64(a.offsets[j+1]) - return -} - -// LargeList represents an immutable sequence of array values. -type LargeList struct { - array - values arrow.Array - offsets []int64 -} - -var _ ListLike = (*LargeList)(nil) - -// NewLargeListData returns a new LargeList array value, from data. -func NewLargeListData(data arrow.ArrayData) *LargeList { - a := new(LargeList) - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *LargeList) ListValues() arrow.Array { return a.values } - -func (a *LargeList) ValueStr(i int) string { - if !a.IsValid(i) { - return NullValueStr - } - return string(a.GetOneForMarshal(i).(json.RawMessage)) -} - -func (a *LargeList) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - o.WriteString(" ") - } - if a.IsNull(i) { - o.WriteString(NullValueStr) - continue - } - sub := a.newListValue(i) - fmt.Fprintf(o, "%v", sub) - sub.Release() - } - o.WriteString("]") - return o.String() -} - -func (a *LargeList) newListValue(i int) arrow.Array { - beg, end := a.ValueOffsets(i) - return NewSlice(a.values, beg, end) -} - -func (a *LargeList) setData(data *Data) { - debug.Assert(len(data.buffers) >= 2, "list data should have 2 buffers") - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.offsets = arrow.Int64Traits.CastFromBytes(vals.Bytes()) - } - a.values = MakeFromData(data.childData[0]) -} - -func (a *LargeList) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - slice := a.newListValue(i) - defer slice.Release() - v, err := json.Marshal(slice) - if err != nil { - panic(err) - } - return json.RawMessage(v) -} - -func (a *LargeList) MarshalJSON() ([]byte, error) { - var buf bytes.Buffer - enc := json.NewEncoder(&buf) - - buf.WriteByte('[') - for i := 0; i < a.Len(); i++ { - if i != 0 { - buf.WriteByte(',') - } - if err := enc.Encode(a.GetOneForMarshal(i)); err != nil { - return nil, err - } - } - buf.WriteByte(']') - return buf.Bytes(), nil -} - -func arrayEqualLargeList(left, right *LargeList) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - o := func() bool { - l := left.newListValue(i) - defer l.Release() - r := right.newListValue(i) - defer r.Release() - return Equal(l, r) - }() - if !o { - return false - } - } - return true -} - -// Len returns the number of elements in the array. -func (a *LargeList) Len() int { return a.array.Len() } - -func (a *LargeList) Offsets() []int64 { return a.offsets } - -func (a *LargeList) ValueOffsets(i int) (start, end int64) { - debug.Assert(i >= 0 && i < a.array.data.length, "index out of range") - j := i + a.array.data.offset - start, end = a.offsets[j], a.offsets[j+1] - return -} - -func (a *LargeList) Retain() { - a.array.Retain() - a.values.Retain() -} - -func (a *LargeList) Release() { - a.array.Release() - a.values.Release() -} - -type baseListBuilder struct { - builder - - values Builder // value builder for the list's elements. - offsets Builder - - // actual list type - dt arrow.DataType - appendOffsetVal func(int) -} - -type ListLikeBuilder interface { - Builder - ValueBuilder() Builder - Append(bool) -} - -type VarLenListLikeBuilder interface { - ListLikeBuilder - AppendWithSize(bool, int) -} - -type ListBuilder struct { - baseListBuilder -} - -type LargeListBuilder struct { - baseListBuilder -} - -// NewListBuilder returns a builder, using the provided memory allocator. -// The created list builder will create a list whose elements will be of type etype. -func NewListBuilder(mem memory.Allocator, etype arrow.DataType) *ListBuilder { - offsetBldr := NewInt32Builder(mem) - return &ListBuilder{ - baseListBuilder{ - builder: builder{refCount: 1, mem: mem}, - values: NewBuilder(mem, etype), - offsets: offsetBldr, - dt: arrow.ListOf(etype), - appendOffsetVal: func(o int) { offsetBldr.Append(int32(o)) }, - }, - } -} - -// NewListBuilderWithField takes a field to use for the child rather than just -// a datatype to allow for more customization. -func NewListBuilderWithField(mem memory.Allocator, field arrow.Field) *ListBuilder { - offsetBldr := NewInt32Builder(mem) - return &ListBuilder{ - baseListBuilder{ - builder: builder{refCount: 1, mem: mem}, - values: NewBuilder(mem, field.Type), - offsets: offsetBldr, - dt: arrow.ListOfField(field), - appendOffsetVal: func(o int) { offsetBldr.Append(int32(o)) }, - }, - } -} - -func (b *baseListBuilder) Type() arrow.DataType { - switch dt := b.dt.(type) { - case *arrow.ListType: - f := dt.ElemField() - f.Type = b.values.Type() - return arrow.ListOfField(f) - case *arrow.LargeListType: - f := dt.ElemField() - f.Type = b.values.Type() - return arrow.LargeListOfField(f) - } - return nil -} - -// NewLargeListBuilder returns a builder, using the provided memory allocator. -// The created list builder will create a list whose elements will be of type etype. -func NewLargeListBuilder(mem memory.Allocator, etype arrow.DataType) *LargeListBuilder { - offsetBldr := NewInt64Builder(mem) - return &LargeListBuilder{ - baseListBuilder{ - builder: builder{refCount: 1, mem: mem}, - values: NewBuilder(mem, etype), - offsets: offsetBldr, - dt: arrow.LargeListOf(etype), - appendOffsetVal: func(o int) { offsetBldr.Append(int64(o)) }, - }, - } -} - -// NewLargeListBuilderWithField takes a field rather than just an element type -// to allow for more customization of the final type of the LargeList Array -func NewLargeListBuilderWithField(mem memory.Allocator, field arrow.Field) *LargeListBuilder { - offsetBldr := NewInt64Builder(mem) - return &LargeListBuilder{ - baseListBuilder{ - builder: builder{refCount: 1, mem: mem}, - values: NewBuilder(mem, field.Type), - offsets: offsetBldr, - dt: arrow.LargeListOfField(field), - appendOffsetVal: func(o int) { offsetBldr.Append(int64(o)) }, - }, - } -} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *baseListBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - b.values.Release() - b.offsets.Release() - } - -} - -func (b *baseListBuilder) appendNextOffset() { - b.appendOffsetVal(b.values.Len()) -} - -func (b *baseListBuilder) Append(v bool) { - b.Reserve(1) - b.unsafeAppendBoolToBitmap(v) - b.appendNextOffset() -} - -func (b *baseListBuilder) AppendWithSize(v bool, _ int) { - b.Append(v) -} - -func (b *baseListBuilder) AppendNull() { - b.Reserve(1) - b.unsafeAppendBoolToBitmap(false) - b.appendNextOffset() -} - -func (b *baseListBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *baseListBuilder) AppendEmptyValue() { - b.Append(true) -} - -func (b *baseListBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *ListBuilder) AppendValues(offsets []int32, valid []bool) { - b.Reserve(len(valid)) - b.offsets.(*Int32Builder).AppendValues(offsets, nil) - b.builder.unsafeAppendBoolsToBitmap(valid, len(valid)) -} - -func (b *LargeListBuilder) AppendValues(offsets []int64, valid []bool) { - b.Reserve(len(valid)) - b.offsets.(*Int64Builder).AppendValues(offsets, nil) - b.builder.unsafeAppendBoolsToBitmap(valid, len(valid)) -} - -func (b *baseListBuilder) unsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -func (b *baseListBuilder) init(capacity int) { - b.builder.init(capacity) - b.offsets.init(capacity + 1) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *baseListBuilder) Reserve(n int) { - b.builder.reserve(n, b.resizeHelper) - b.offsets.Reserve(n) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *baseListBuilder) Resize(n int) { - b.resizeHelper(n) - b.offsets.Resize(n) -} - -func (b *baseListBuilder) resizeHelper(n int) { - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(n, b.builder.init) - } -} - -func (b *baseListBuilder) ValueBuilder() Builder { - return b.values -} - -// NewArray creates a List array from the memory buffers used by the builder and resets the ListBuilder -// so it can be used to build a new array. -func (b *ListBuilder) NewArray() arrow.Array { - return b.NewListArray() -} - -// NewArray creates a LargeList array from the memory buffers used by the builder and resets the LargeListBuilder -// so it can be used to build a new array. -func (b *LargeListBuilder) NewArray() arrow.Array { - return b.NewLargeListArray() -} - -// NewListArray creates a List array from the memory buffers used by the builder and resets the ListBuilder -// so it can be used to build a new array. -func (b *ListBuilder) NewListArray() (a *List) { - data := b.newData() - a = NewListData(data) - data.Release() - return -} - -// NewLargeListArray creates a List array from the memory buffers used by the builder and resets the LargeListBuilder -// so it can be used to build a new array. -func (b *LargeListBuilder) NewLargeListArray() (a *LargeList) { - data := b.newData() - a = NewLargeListData(data) - data.Release() - return -} - -func (b *baseListBuilder) newData() (data *Data) { - if b.offsets.Len() != b.length+1 { - b.appendNextOffset() - } - values := b.values.NewArray() - defer values.Release() - - var offsets *memory.Buffer - if b.offsets != nil { - arr := b.offsets.NewArray() - defer arr.Release() - offsets = arr.Data().Buffers()[1] - } - - data = NewData( - b.Type(), b.length, - []*memory.Buffer{ - b.nullBitmap, - offsets, - }, - []arrow.ArrayData{values.Data()}, - b.nulls, - 0, - ) - b.reset() - - return -} - -func (b *baseListBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - - return b.UnmarshalOne(json.NewDecoder(strings.NewReader(s))) -} - -func (b *baseListBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch t { - case json.Delim('['): - b.Append(true) - if err := b.values.Unmarshal(dec); err != nil { - return err - } - // consume ']' - _, err := dec.Token() - return err - case nil: - b.AppendNull() - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Struct: b.dt.String(), - } - } - - return nil -} - -func (b *baseListBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *baseListBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("list builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -// ListView represents an immutable sequence of array values defined by an -// offset into a child array and a length. -type ListView struct { - array - values arrow.Array - offsets []int32 - sizes []int32 -} - -var _ VarLenListLike = (*ListView)(nil) - -func NewListViewData(data arrow.ArrayData) *ListView { - a := &ListView{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *ListView) ListValues() arrow.Array { return a.values } - -func (a *ListView) ValueStr(i int) string { - if !a.IsValid(i) { - return NullValueStr - } - return string(a.GetOneForMarshal(i).(json.RawMessage)) -} - -func (a *ListView) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - o.WriteString(" ") - } - if a.IsNull(i) { - o.WriteString(NullValueStr) - continue - } - sub := a.newListValue(i) - fmt.Fprintf(o, "%v", sub) - sub.Release() - } - o.WriteString("]") - return o.String() -} - -func (a *ListView) newListValue(i int) arrow.Array { - beg, end := a.ValueOffsets(i) - return NewSlice(a.values, beg, end) -} - -func (a *ListView) setData(data *Data) { - debug.Assert(len(data.buffers) >= 3, "list-view data should have 3 buffers") - a.array.setData(data) - offsets := data.buffers[1] - if offsets != nil { - a.offsets = arrow.Int32Traits.CastFromBytes(offsets.Bytes()) - } - sizes := data.buffers[2] - if sizes != nil { - a.sizes = arrow.Int32Traits.CastFromBytes(sizes.Bytes()) - } - a.values = MakeFromData(data.childData[0]) -} - -func (a *ListView) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - slice := a.newListValue(i) - defer slice.Release() - v, err := json.Marshal(slice) - if err != nil { - panic(err) - } - return json.RawMessage(v) -} - -func (a *ListView) MarshalJSON() ([]byte, error) { - var buf bytes.Buffer - enc := json.NewEncoder(&buf) - - buf.WriteByte('[') - for i := 0; i < a.Len(); i++ { - if i != 0 { - buf.WriteByte(',') - } - if err := enc.Encode(a.GetOneForMarshal(i)); err != nil { - return nil, err - } - } - buf.WriteByte(']') - return buf.Bytes(), nil -} - -func arrayEqualListView(left, right *ListView) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - o := func() bool { - l := left.newListValue(i) - defer l.Release() - r := right.newListValue(i) - defer r.Release() - return Equal(l, r) - }() - if !o { - return false - } - } - return true -} - -// Len returns the number of elements in the array. -func (a *ListView) Len() int { return a.array.Len() } - -func (a *ListView) Offsets() []int32 { return a.offsets } - -func (a *ListView) Sizes() []int32 { return a.sizes } - -func (a *ListView) Retain() { - a.array.Retain() - a.values.Retain() -} - -func (a *ListView) Release() { - a.array.Release() - a.values.Release() -} - -func (a *ListView) ValueOffsets(i int) (start, end int64) { - debug.Assert(i >= 0 && i < a.array.data.length, "index out of range") - j := i + a.array.data.offset - size := int64(a.sizes[j]) - // If size is 0, skip accessing offsets. - if size == 0 { - start, end = 0, 0 - return - } - start = int64(a.offsets[j]) - end = start + size - return -} - -// LargeListView represents an immutable sequence of array values defined by an -// offset into a child array and a length. -type LargeListView struct { - array - values arrow.Array - offsets []int64 - sizes []int64 -} - -var _ VarLenListLike = (*LargeListView)(nil) - -// NewLargeListViewData returns a new LargeListView array value, from data. -func NewLargeListViewData(data arrow.ArrayData) *LargeListView { - a := new(LargeListView) - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *LargeListView) ListValues() arrow.Array { return a.values } - -func (a *LargeListView) ValueStr(i int) string { - if !a.IsValid(i) { - return NullValueStr - } - return string(a.GetOneForMarshal(i).(json.RawMessage)) -} - -func (a *LargeListView) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - o.WriteString(" ") - } - if a.IsNull(i) { - o.WriteString(NullValueStr) - continue - } - sub := a.newListValue(i) - fmt.Fprintf(o, "%v", sub) - sub.Release() - } - o.WriteString("]") - return o.String() -} - -func (a *LargeListView) newListValue(i int) arrow.Array { - beg, end := a.ValueOffsets(i) - return NewSlice(a.values, beg, end) -} - -func (a *LargeListView) setData(data *Data) { - debug.Assert(len(data.buffers) >= 3, "list-view data should have 3 buffers") - a.array.setData(data) - offsets := data.buffers[1] - if offsets != nil { - a.offsets = arrow.Int64Traits.CastFromBytes(offsets.Bytes()) - } - sizes := data.buffers[2] - if sizes != nil { - a.sizes = arrow.Int64Traits.CastFromBytes(sizes.Bytes()) - } - a.values = MakeFromData(data.childData[0]) -} - -func (a *LargeListView) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - slice := a.newListValue(i) - defer slice.Release() - v, err := json.Marshal(slice) - if err != nil { - panic(err) - } - return json.RawMessage(v) -} - -func (a *LargeListView) MarshalJSON() ([]byte, error) { - var buf bytes.Buffer - enc := json.NewEncoder(&buf) - - buf.WriteByte('[') - for i := 0; i < a.Len(); i++ { - if i != 0 { - buf.WriteByte(',') - } - if err := enc.Encode(a.GetOneForMarshal(i)); err != nil { - return nil, err - } - } - buf.WriteByte(']') - return buf.Bytes(), nil -} - -func arrayEqualLargeListView(left, right *LargeListView) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - o := func() bool { - l := left.newListValue(i) - defer l.Release() - r := right.newListValue(i) - defer r.Release() - return Equal(l, r) - }() - if !o { - return false - } - } - return true -} - -// Len returns the number of elements in the array. -func (a *LargeListView) Len() int { return a.array.Len() } - -func (a *LargeListView) Offsets() []int64 { return a.offsets } - -func (a *LargeListView) Sizes() []int64 { return a.sizes } - -func (a *LargeListView) ValueOffsets(i int) (start, end int64) { - debug.Assert(i >= 0 && i < a.array.data.length, "index out of range") - j := i + a.array.data.offset - size := a.sizes[j] - // If size is 0, skip accessing offsets. - if size == 0 { - return 0, 0 - } - start = a.offsets[j] - end = start + size - return -} - -func (a *LargeListView) Retain() { - a.array.Retain() - a.values.Retain() -} - -func (a *LargeListView) Release() { - a.array.Release() - a.values.Release() -} - -// Accessors for offsets and sizes to make ListView and LargeListView validation generic. -type offsetsAndSizes interface { - offsetAt(slot int64) int64 - sizeAt(slot int64) int64 -} - -var _ offsetsAndSizes = (*ListView)(nil) -var _ offsetsAndSizes = (*LargeListView)(nil) - -func (a *ListView) offsetAt(slot int64) int64 { return int64(a.offsets[int64(a.data.offset)+slot]) } - -func (a *ListView) sizeAt(slot int64) int64 { return int64(a.sizes[int64(a.data.offset)+slot]) } - -func (a *LargeListView) offsetAt(slot int64) int64 { return a.offsets[int64(a.data.offset)+slot] } - -func (a *LargeListView) sizeAt(slot int64) int64 { return a.sizes[int64(a.data.offset)+slot] } - -func outOfBoundsListViewOffset(l offsetsAndSizes, slot int64, offsetLimit int64) error { - offset := l.offsetAt(slot) - return fmt.Errorf("%w: Offset invariant failure: offset for slot %d out of bounds. Expected %d to be at least 0 and less than %d", arrow.ErrInvalid, slot, offset, offsetLimit) -} - -func outOfBoundsListViewSize(l offsetsAndSizes, slot int64, offsetLimit int64) error { - size := l.sizeAt(slot) - if size < 0 { - return fmt.Errorf("%w: Offset invariant failure: size for slot %d out of bounds: %d < 0", arrow.ErrInvalid, slot, size) - } - offset := l.offsetAt(slot) - return fmt.Errorf("%w: Offset invariant failure: size for slot %d out of bounds: %d + %d > %d", arrow.ErrInvalid, slot, offset, size, offsetLimit) -} - -// Pre-condition: Basic validation has already been performed -func (a *array) fullyValidateOffsetsAndSizes(l offsetsAndSizes, offsetLimit int64) error { - for slot := int64(0); slot < int64(a.Len()); slot += 1 { - size := l.sizeAt(slot) - if size > 0 { - offset := l.offsetAt(slot) - if offset < 0 || offset > offsetLimit { - return outOfBoundsListViewOffset(l, slot, offsetLimit) - } - if size > offsetLimit-int64(offset) { - return outOfBoundsListViewSize(l, slot, offsetLimit) - } - } else if size < 0 { - return outOfBoundsListViewSize(l, slot, offsetLimit) - } - } - - return nil -} - -func (a *array) validateOffsetsAndMaybeSizes(l offsetsAndSizes, offsetByteWidth int, isListView bool, offsetLimit int64, fullValidation bool) error { - nonEmpty := a.Len() > 0 - if a.data.buffers[1] == nil { - // For length 0, an empty offsets buffer is accepted (ARROW-544). - if nonEmpty { - return fmt.Errorf("non-empty array but offsets are null") - } - return nil - } - if isListView && a.data.buffers[2] == nil { - if nonEmpty { - return fmt.Errorf("non-empty array but sizes are null") - } - return nil - } - - var requiredOffsets int - if nonEmpty { - requiredOffsets = a.Len() + a.Offset() - if !isListView { - requiredOffsets += 1 - } - } else { - requiredOffsets = 0 - } - offsetsByteSize := a.data.buffers[1].Len() - if offsetsByteSize/offsetByteWidth < requiredOffsets { - return fmt.Errorf("offsets buffer size (bytes): %d isn't large enough for length: %d and offset: %d", - offsetsByteSize, a.Len(), a.Offset()) - } - if isListView { - requiredSizes := a.Len() + a.Offset() - sizesBytesSize := a.data.buffers[2].Len() - if sizesBytesSize/offsetByteWidth < requiredSizes { - return fmt.Errorf("sizes buffer size (bytes): %d isn't large enough for length: %d and offset: %d", - sizesBytesSize, a.Len(), a.Offset()) - } - } - - if fullValidation && requiredOffsets > 0 { - if isListView { - return a.fullyValidateOffsetsAndSizes(l, offsetLimit) - } - // TODO: implement validation of List and LargeList - // return fullyValidateOffsets(offset_limit) - return nil - } - return nil -} - -func (a *ListView) validate(fullValidation bool) error { - values := a.array.data.childData[0] - offsetLimit := values.Len() - return a.array.validateOffsetsAndMaybeSizes(a, 4, true, int64(offsetLimit), fullValidation) -} - -func (a *ListView) Validate() error { - return a.validate(false) -} - -func (a *ListView) ValidateFull() error { - return a.validate(true) -} - -func (a *LargeListView) validate(fullValidation bool) error { - values := a.array.data.childData[0] - offsetLimit := values.Len() - return a.array.validateOffsetsAndMaybeSizes(a, 8, true, int64(offsetLimit), fullValidation) -} - -func (a *LargeListView) Validate() error { - return a.validate(false) -} - -func (a *LargeListView) ValidateFull() error { - return a.validate(true) -} - -type baseListViewBuilder struct { - builder - - values Builder // value builder for the list-view's elements. - offsets Builder - sizes Builder - - // actual list-view type - dt arrow.DataType - appendOffsetVal func(int) - appendSizeVal func(int) -} - -type ListViewBuilder struct { - baseListViewBuilder -} - -type LargeListViewBuilder struct { - baseListViewBuilder -} - -// NewListViewBuilder returns a builder, using the provided memory allocator. -// The created list-view builder will create a list whose elements will be -// of type etype. -func NewListViewBuilder(mem memory.Allocator, etype arrow.DataType) *ListViewBuilder { - offsetBldr := NewInt32Builder(mem) - sizeBldr := NewInt32Builder(mem) - return &ListViewBuilder{ - baseListViewBuilder{ - builder: builder{refCount: 1, mem: mem}, - values: NewBuilder(mem, etype), - offsets: offsetBldr, - sizes: sizeBldr, - dt: arrow.ListViewOf(etype), - appendOffsetVal: func(o int) { offsetBldr.Append(int32(o)) }, - appendSizeVal: func(s int) { sizeBldr.Append(int32(s)) }, - }, - } -} - -// NewListViewBuilderWithField takes a field to use for the child rather than just -// a datatype to allow for more customization. -func NewListViewBuilderWithField(mem memory.Allocator, field arrow.Field) *ListViewBuilder { - offsetBldr := NewInt32Builder(mem) - sizeBldr := NewInt32Builder(mem) - return &ListViewBuilder{ - baseListViewBuilder{ - builder: builder{refCount: 1, mem: mem}, - values: NewBuilder(mem, field.Type), - offsets: offsetBldr, - sizes: sizeBldr, - dt: arrow.ListViewOfField(field), - appendOffsetVal: func(o int) { offsetBldr.Append(int32(o)) }, - appendSizeVal: func(s int) { sizeBldr.Append(int32(s)) }, - }, - } -} - -func (b *baseListViewBuilder) Type() arrow.DataType { - switch dt := b.dt.(type) { - case *arrow.ListViewType: - f := dt.ElemField() - f.Type = b.values.Type() - return arrow.ListViewOfField(f) - case *arrow.LargeListViewType: - f := dt.ElemField() - f.Type = b.values.Type() - return arrow.LargeListViewOfField(f) - } - return nil -} - -// NewLargeListViewBuilder returns a builder, using the provided memory allocator. -// The created list-view builder will create a list whose elements will be of type etype. -func NewLargeListViewBuilder(mem memory.Allocator, etype arrow.DataType) *LargeListViewBuilder { - offsetBldr := NewInt64Builder(mem) - sizeBldr := NewInt64Builder(mem) - return &LargeListViewBuilder{ - baseListViewBuilder{ - builder: builder{refCount: 1, mem: mem}, - values: NewBuilder(mem, etype), - offsets: offsetBldr, - sizes: sizeBldr, - dt: arrow.LargeListViewOf(etype), - appendOffsetVal: func(o int) { offsetBldr.Append(int64(o)) }, - appendSizeVal: func(s int) { sizeBldr.Append(int64(s)) }, - }, - } -} - -// NewLargeListViewBuilderWithField takes a field rather than just an element type -// to allow for more customization of the final type of the LargeListView Array -func NewLargeListViewBuilderWithField(mem memory.Allocator, field arrow.Field) *LargeListViewBuilder { - offsetBldr := NewInt64Builder(mem) - sizeBldr := NewInt64Builder(mem) - return &LargeListViewBuilder{ - baseListViewBuilder{ - builder: builder{refCount: 1, mem: mem}, - values: NewBuilder(mem, field.Type), - offsets: offsetBldr, - sizes: sizeBldr, - dt: arrow.LargeListViewOfField(field), - appendOffsetVal: func(o int) { offsetBldr.Append(int64(o)) }, - appendSizeVal: func(o int) { sizeBldr.Append(int64(o)) }, - }, - } -} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *baseListViewBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - b.values.Release() - b.offsets.Release() - b.sizes.Release() - } -} - -func (b *baseListViewBuilder) AppendDimensions(offset int, listSize int) { - b.Reserve(1) - b.unsafeAppendBoolToBitmap(true) - b.appendOffsetVal(offset) - b.appendSizeVal(listSize) -} - -func (b *baseListViewBuilder) Append(v bool) { - debug.Assert(false, "baseListViewBuilder.Append should never be called -- use AppendWithSize instead") -} - -func (b *baseListViewBuilder) AppendWithSize(v bool, listSize int) { - debug.Assert(v || listSize == 0, "invalid list-view should have size 0") - b.Reserve(1) - b.unsafeAppendBoolToBitmap(v) - b.appendOffsetVal(b.values.Len()) - b.appendSizeVal(listSize) -} - -func (b *baseListViewBuilder) AppendNull() { - b.AppendWithSize(false, 0) -} - -func (b *baseListViewBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *baseListViewBuilder) AppendEmptyValue() { - b.AppendWithSize(true, 0) -} - -func (b *baseListViewBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *ListViewBuilder) AppendValuesWithSizes(offsets []int32, sizes []int32, valid []bool) { - b.Reserve(len(valid)) - b.offsets.(*Int32Builder).AppendValues(offsets, nil) - b.sizes.(*Int32Builder).AppendValues(sizes, nil) - b.builder.unsafeAppendBoolsToBitmap(valid, len(valid)) -} - -func (b *LargeListViewBuilder) AppendValuesWithSizes(offsets []int64, sizes []int64, valid []bool) { - b.Reserve(len(valid)) - b.offsets.(*Int64Builder).AppendValues(offsets, nil) - b.sizes.(*Int64Builder).AppendValues(sizes, nil) - b.builder.unsafeAppendBoolsToBitmap(valid, len(valid)) -} - -func (b *baseListViewBuilder) unsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -func (b *baseListViewBuilder) init(capacity int) { - b.builder.init(capacity) - b.offsets.init(capacity) - b.sizes.init(capacity) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *baseListViewBuilder) Reserve(n int) { - b.builder.reserve(n, b.resizeHelper) - b.offsets.Reserve(n) - b.sizes.Reserve(n) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *baseListViewBuilder) Resize(n int) { - b.resizeHelper(n) - b.offsets.Resize(n) - b.sizes.Resize(n) -} - -func (b *baseListViewBuilder) resizeHelper(n int) { - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(n, b.builder.init) - } -} - -func (b *baseListViewBuilder) ValueBuilder() Builder { - return b.values -} - -// NewArray creates a ListView array from the memory buffers used by the builder and -// resets the ListViewBuilder so it can be used to build a new array. -func (b *ListViewBuilder) NewArray() arrow.Array { - return b.NewListViewArray() -} - -// NewArray creates a LargeListView array from the memory buffers used by the builder -// and resets the LargeListViewBuilder so it can be used to build a new array. -func (b *LargeListViewBuilder) NewArray() arrow.Array { - return b.NewLargeListViewArray() -} - -// NewListViewArray creates a ListView array from the memory buffers used by the builder -// and resets the ListViewBuilder so it can be used to build a new array. -func (b *ListViewBuilder) NewListViewArray() (a *ListView) { - data := b.newData() - a = NewListViewData(data) - data.Release() - return -} - -// NewLargeListViewArray creates a ListView array from the memory buffers used by the -// builder and resets the LargeListViewBuilder so it can be used to build a new array. -func (b *LargeListViewBuilder) NewLargeListViewArray() (a *LargeListView) { - data := b.newData() - a = NewLargeListViewData(data) - data.Release() - return -} - -func (b *baseListViewBuilder) newData() (data *Data) { - values := b.values.NewArray() - defer values.Release() - - var offsets *memory.Buffer - if b.offsets != nil { - arr := b.offsets.NewArray() - defer arr.Release() - offsets = arr.Data().Buffers()[1] - } - - var sizes *memory.Buffer - if b.sizes != nil { - arr := b.sizes.NewArray() - defer arr.Release() - sizes = arr.Data().Buffers()[1] - } - - data = NewData( - b.Type(), b.length, - []*memory.Buffer{ - b.nullBitmap, - offsets, - sizes, - }, - []arrow.ArrayData{values.Data()}, - b.nulls, - 0, - ) - b.reset() - - return -} - -func (b *baseListViewBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - - return b.UnmarshalOne(json.NewDecoder(strings.NewReader(s))) -} - -func (b *baseListViewBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch t { - case json.Delim('['): - offset := b.values.Len() - // 0 is a placeholder size as we don't know the actual size yet - b.AppendWithSize(true, 0) - if err := b.values.Unmarshal(dec); err != nil { - return err - } - // consume ']' - _, err := dec.Token() - // replace the last size with the actual size - switch b.sizes.(type) { - case *Int32Builder: - b.sizes.(*Int32Builder).rawData[b.sizes.Len()-1] = int32(b.values.Len() - offset) - case *Int64Builder: - b.sizes.(*Int64Builder).rawData[b.sizes.Len()-1] = int64(b.values.Len() - offset) - } - return err - case nil: - b.AppendNull() - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Struct: b.dt.String(), - } - } - - return nil -} - -func (b *baseListViewBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *baseListViewBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("list-view builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -// Find the minimum offset+size in a LIST_VIEW/LARGE_LIST_VIEW array. -// -// Pre-conditions: -// -// input.DataType() is ListViewType if Offset=int32 or LargeListViewType if Offset=int64 -// input.Len() > 0 && input.NullN() != input.Len() -func minListViewOffset[Offset int32 | int64](input arrow.ArrayData) Offset { - var bitmap []byte - if input.Buffers()[0] != nil { - bitmap = input.Buffers()[0].Bytes() - } - offsets := arrow.GetData[Offset](input.Buffers()[1].Bytes())[input.Offset():] - sizes := arrow.GetData[Offset](input.Buffers()[2].Bytes())[input.Offset():] - - isNull := func(i int) bool { - return bitmap != nil && bitutil.BitIsNotSet(bitmap, input.Offset()+i) - } - - // It's very likely that the first non-null non-empty list-view starts at - // offset 0 of the child array. - i := 0 - for i < input.Len() && (isNull(i) || sizes[i] == 0) { - i += 1 - } - if i >= input.Len() { - return 0 - } - minOffset := offsets[i] - if minOffset == 0 { - // early exit: offset 0 found already - return 0 - } - - // Slow path: scan the buffers entirely. - i += 1 - for ; i < input.Len(); i += 1 { - if isNull(i) { - continue - } - offset := offsets[i] - if offset < minOffset && sizes[i] > 0 { - minOffset = offset - } - } - return minOffset -} - -// Find the maximum offset+size in a LIST_VIEW/LARGE_LIST_VIEW array. -// -// Pre-conditions: -// -// input.DataType() is ListViewType if Offset=int32 or LargeListViewType if Offset=int64 -// input.Len() > 0 && input.NullN() != input.Len() -func maxListViewEnd[Offset int32 | int64](input arrow.ArrayData) Offset { - inputOffset := input.Offset() - var bitmap []byte - if input.Buffers()[0] != nil { - bitmap = input.Buffers()[0].Bytes() - } - offsets := arrow.GetData[Offset](input.Buffers()[1].Bytes())[inputOffset:] - sizes := arrow.GetData[Offset](input.Buffers()[2].Bytes())[inputOffset:] - - isNull := func(i int) bool { - return bitmap != nil && bitutil.BitIsNotSet(bitmap, inputOffset+i) - } - - i := input.Len() - 1 // safe because input.Len() > 0 - for i != 0 && (isNull(i) || sizes[i] == 0) { - i -= 1 - } - offset := offsets[i] - size := sizes[i] - if i == 0 { - if isNull(i) || sizes[i] == 0 { - return 0 - } else { - return offset + size - } - } - - values := input.Children()[0] - maxEnd := offsets[i] + sizes[i] - if maxEnd == Offset(values.Len()) { - // Early-exit: maximum possible view-end found already. - return maxEnd - } - - // Slow path: scan the buffers entirely. - for ; i >= 0; i -= 1 { - offset := offsets[i] - size := sizes[i] - if size > 0 && !isNull(i) { - if offset+size > maxEnd { - maxEnd = offset + size - if maxEnd == Offset(values.Len()) { - return maxEnd - } - } - } - } - return maxEnd -} - -func rangeOfValuesUsed(input arrow.ArrayData) (int, int) { - if input.Len() == 0 || input.NullN() == input.Len() { - return 0, 0 - } - var minOffset, maxEnd int - switch input.DataType().(type) { - case *arrow.ListViewType: - minOffset = int(minListViewOffset[int32](input)) - maxEnd = int(maxListViewEnd[int32](input)) - case *arrow.LargeListViewType: - minOffset = int(minListViewOffset[int64](input)) - maxEnd = int(maxListViewEnd[int64](input)) - case *arrow.ListType: - offsets := arrow.Int32Traits.CastFromBytes(input.Buffers()[1].Bytes())[input.Offset():] - minOffset = int(offsets[0]) - maxEnd = int(offsets[len(offsets)-1]) - case *arrow.LargeListType: - offsets := arrow.Int64Traits.CastFromBytes(input.Buffers()[1].Bytes())[input.Offset():] - minOffset = int(offsets[0]) - maxEnd = int(offsets[len(offsets)-1]) - case *arrow.MapType: - offsets := arrow.Int32Traits.CastFromBytes(input.Buffers()[1].Bytes())[input.Offset():] - minOffset = int(offsets[0]) - maxEnd = int(offsets[len(offsets)-1]) - } - return minOffset, maxEnd - minOffset -} - -// Returns the smallest contiguous range of values of the child array that are -// referenced by all the list values in the input array. -func RangeOfValuesUsed(input VarLenListLike) (int, int) { - return rangeOfValuesUsed(input.Data()) -} - -var ( - _ arrow.Array = (*List)(nil) - _ arrow.Array = (*LargeList)(nil) - _ arrow.Array = (*ListView)(nil) - _ arrow.Array = (*LargeListView)(nil) - - _ Builder = (*ListBuilder)(nil) - _ Builder = (*LargeListBuilder)(nil) - _ Builder = (*ListViewBuilder)(nil) - _ Builder = (*LargeListViewBuilder)(nil) - - _ VarLenListLike = (*List)(nil) - _ VarLenListLike = (*LargeList)(nil) - _ VarLenListLike = (*Map)(nil) - _ VarLenListLike = (*ListView)(nil) - _ VarLenListLike = (*LargeListView)(nil) - _ ListLike = (*FixedSizeList)(nil) - - _ VarLenListLikeBuilder = (*ListBuilder)(nil) - _ VarLenListLikeBuilder = (*LargeListBuilder)(nil) - _ VarLenListLikeBuilder = (*ListBuilder)(nil) - _ VarLenListLikeBuilder = (*LargeListBuilder)(nil) - _ VarLenListLikeBuilder = (*MapBuilder)(nil) - _ ListLikeBuilder = (*FixedSizeListBuilder)(nil) -) diff --git a/go/arrow/array/list_test.go b/go/arrow/array/list_test.go deleted file mode 100644 index f6f42a31299e4..0000000000000 --- a/go/arrow/array/list_test.go +++ /dev/null @@ -1,864 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "reflect" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestListArray(t *testing.T) { - tests := []struct { - typeID arrow.Type - offsets interface{} - sizes interface{} - dt arrow.DataType - }{ - {arrow.LIST, []int32{0, 3, 3, 3, 7}, nil, arrow.ListOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LARGE_LIST, []int64{0, 3, 3, 3, 7}, nil, arrow.LargeListOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LIST, []int32{0, 3, 3, 3, 7}, nil, arrow.ListOfField(arrow.Field{Name: "item", Type: arrow.PrimitiveTypes.Int32, Nullable: true})}, - {arrow.LARGE_LIST, []int64{0, 3, 3, 3, 7}, nil, arrow.LargeListOfField(arrow.Field{Name: "item", Type: arrow.PrimitiveTypes.Int32, Nullable: true})}, - {arrow.LIST_VIEW, []int32{0, 3, 3, 3}, []int32{3, 0, 0, 4}, arrow.ListViewOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LARGE_LIST_VIEW, []int64{0, 3, 3, 3}, []int64{3, 0, 0, 4}, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32)}, - } - - for _, tt := range tests { - t.Run(tt.typeID.String(), func(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - vs = []int32{0, 1, 2, 3, 4, 5, 6} - lengths = []int{3, 0, 0, 4} - isValid = []bool{true, false, true, true} - ) - - lb := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) - defer lb.Release() - - for i := 0; i < 10; i++ { - vb := lb.ValueBuilder().(*array.Int32Builder) - vb.Reserve(len(vs)) - - pos := 0 - for i, length := range lengths { - lb.AppendWithSize(isValid[i], length) - for j := 0; j < length; j++ { - vb.Append(vs[pos]) - pos++ - } - } - - arr := lb.NewArray().(array.ListLike) - defer arr.Release() - - arr.Retain() - arr.Release() - - if got, want := arr.DataType().ID(), tt.typeID; got != want { - t.Fatalf("got=%v, want=%v", got, want) - } - - if got, want := arr.Len(), len(isValid); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - for i := range lengths { - if got, want := arr.IsValid(i), isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - if got, want := arr.IsNull(i), !isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - } - - var gotOffsets, gotSizes interface{} - switch tt.typeID { - case arrow.LIST: - arr := arr.(*array.List) - gotOffsets = arr.Offsets() - case arrow.LARGE_LIST: - arr := arr.(*array.LargeList) - gotOffsets = arr.Offsets() - case arrow.LIST_VIEW: - arr := arr.(*array.ListView) - gotOffsets = arr.Offsets() - gotSizes = arr.Sizes() - case arrow.LARGE_LIST_VIEW: - arr := arr.(*array.LargeListView) - gotOffsets = arr.Offsets() - gotSizes = arr.Sizes() - } - - if !reflect.DeepEqual(gotOffsets, tt.offsets) { - t.Fatalf("got=%v, want=%v", gotOffsets, tt.offsets) - } - - if tt.typeID == arrow.LIST_VIEW || tt.typeID == arrow.LARGE_LIST_VIEW { - if !reflect.DeepEqual(gotSizes, tt.sizes) { - t.Fatalf("got=%v, want=%v", gotSizes, tt.sizes) - } - } - - varr := arr.ListValues().(*array.Int32) - if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - } - }) - } -} - -// Like the list-view tests in TestListArray, but with out-of-order offsets. -func TestListViewArray(t *testing.T) { - tests := []struct { - typeID arrow.Type - offsets interface{} - sizes interface{} - dt arrow.DataType - }{ - {arrow.LIST_VIEW, []int32{5, 0, 0, 1}, []int32{3, 0, 0, 4}, arrow.ListViewOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LARGE_LIST_VIEW, []int64{5, 0, 0, 1}, []int64{3, 0, 0, 4}, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32)}, - } - - for _, tt := range tests { - t.Run(tt.typeID.String(), func(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - vs = []int32{-1, 3, 4, 5, 6, 0, 1, 2} - lengths = []int{3, 0, 0, 4} - isValid = []bool{true, false, true, true} - ) - - lb := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) - defer lb.Release() - - for i := 0; i < 10; i++ { - switch lvb := lb.(type) { - case *array.ListViewBuilder: - lvb.AppendDimensions(5, 3) - lb.AppendNull() - lvb.AppendDimensions(0, 0) - lvb.AppendDimensions(1, 4) - case *array.LargeListViewBuilder: - lvb.AppendDimensions(5, 3) - lb.AppendNull() - lvb.AppendDimensions(0, 0) - lvb.AppendDimensions(1, 4) - } - - vb := lb.ValueBuilder().(*array.Int32Builder) - vb.Reserve(len(vs)) - vb.AppendValues(vs, []bool{false, true, true, true, true, true, true, true}) - - arr := lb.NewArray().(array.ListLike) - defer arr.Release() - - arr.Retain() - arr.Release() - - if got, want := arr.DataType().ID(), tt.typeID; got != want { - t.Fatalf("got=%v, want=%v", got, want) - } - - if got, want := arr.Len(), len(isValid); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - for i := range lengths { - if got, want := arr.IsValid(i), isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - if got, want := arr.IsNull(i), !isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - } - - var gotOffsets, gotSizes interface{} - switch tt.typeID { - case arrow.LIST_VIEW: - arr := arr.(*array.ListView) - gotOffsets = arr.Offsets() - gotSizes = arr.Sizes() - case arrow.LARGE_LIST_VIEW: - arr := arr.(*array.LargeListView) - gotOffsets = arr.Offsets() - gotSizes = arr.Sizes() - } - - if !reflect.DeepEqual(gotOffsets, tt.offsets) { - t.Fatalf("got=%v, want=%v", gotOffsets, tt.offsets) - } - - if !reflect.DeepEqual(gotSizes, tt.sizes) { - t.Fatalf("got=%v, want=%v", gotSizes, tt.sizes) - } - - varr := arr.ListValues().(*array.Int32) - if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - } - }) - } -} - -func TestListArrayEmpty(t *testing.T) { - typ := []arrow.DataType{ - arrow.ListOf(arrow.PrimitiveTypes.Int32), - arrow.LargeListOf(arrow.PrimitiveTypes.Int32), - arrow.ListViewOf(arrow.PrimitiveTypes.Int32), - arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32), - } - - for _, dt := range typ { - t.Run(dt.String(), func(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - lb := array.NewBuilder(pool, dt) - defer lb.Release() - arr := lb.NewArray() - defer arr.Release() - if got, want := arr.Len(), 0; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - }) - } -} - -func TestListArrayBulkAppend(t *testing.T) { - tests := []struct { - typeID arrow.Type - offsets interface{} - sizes interface{} - dt arrow.DataType - }{ - {arrow.LIST, []int32{0, 3, 3, 3, 7}, nil, arrow.ListOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LARGE_LIST, []int64{0, 3, 3, 3, 7}, nil, arrow.LargeListOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LIST_VIEW, []int32{0, 3, 3, 3}, []int32{3, 0, 0, 4}, arrow.ListViewOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LARGE_LIST_VIEW, []int64{0, 3, 3, 3}, []int64{3, 0, 0, 4}, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32)}, - } - - for _, tt := range tests { - t.Run(tt.typeID.String(), func(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - vs = []int32{0, 1, 2, 3, 4, 5, 6} - lengths = []int{3, 0, 0, 4} - isValid = []bool{true, false, true, true} - ) - - lb := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) - defer lb.Release() - vb := lb.ValueBuilder().(*array.Int32Builder) - vb.Reserve(len(vs)) - - switch tt.typeID { - case arrow.LIST: - lb.(*array.ListBuilder).AppendValues(tt.offsets.([]int32), isValid) - case arrow.LARGE_LIST: - lb.(*array.LargeListBuilder).AppendValues(tt.offsets.([]int64), isValid) - case arrow.LIST_VIEW: - lb.(*array.ListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int32), tt.sizes.([]int32), isValid) - case arrow.LARGE_LIST_VIEW: - lb.(*array.LargeListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int64), tt.sizes.([]int64), isValid) - } - for _, v := range vs { - vb.Append(v) - } - - arr := lb.NewArray().(array.VarLenListLike) - defer arr.Release() - - if got, want := arr.DataType().ID(), tt.typeID; got != want { - t.Fatalf("got=%v, want=%v", got, want) - } - - if got, want := arr.Len(), len(isValid); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - for i := range lengths { - if got, want := arr.IsValid(i), isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - if got, want := arr.IsNull(i), !isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - } - - var gotOffsets, gotSizes interface{} - switch tt.typeID { - case arrow.LIST: - arr := arr.(*array.List) - gotOffsets = arr.Offsets() - case arrow.LARGE_LIST: - arr := arr.(*array.LargeList) - gotOffsets = arr.Offsets() - case arrow.LIST_VIEW: - arr := arr.(*array.ListView) - gotOffsets = arr.Offsets() - gotSizes = arr.Sizes() - case arrow.LARGE_LIST_VIEW: - arr := arr.(*array.LargeListView) - gotOffsets = arr.Offsets() - gotSizes = arr.Sizes() - } - - if !reflect.DeepEqual(gotOffsets, tt.offsets) { - t.Fatalf("got=%v, want=%v", gotOffsets, tt.offsets) - } - if tt.typeID == arrow.LIST_VIEW || tt.typeID == arrow.LARGE_LIST_VIEW { - if !reflect.DeepEqual(gotSizes, tt.sizes) { - t.Fatalf("got=%v, want=%v", gotSizes, tt.sizes) - } - } - - varr := arr.ListValues().(*array.Int32) - if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - }) - } -} - -func TestListViewArrayBulkAppend(t *testing.T) { - tests := []struct { - typeID arrow.Type - offsets interface{} - sizes interface{} - dt arrow.DataType - }{ - {arrow.LIST_VIEW, []int32{5, 0, 0, 1}, []int32{3, 0, 0, 4}, arrow.ListViewOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LARGE_LIST_VIEW, []int64{5, 0, 0, 1}, []int64{3, 0, 0, 4}, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32)}, - } - - for _, tt := range tests { - t.Run(tt.typeID.String(), func(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - vs = []int32{-1, 3, 4, 5, 6, 0, 1, 2} - lengths = []int{3, 0, 0, 4} - isValid = []bool{true, false, true, true} - ) - - lb := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) - defer lb.Release() - vb := lb.ValueBuilder().(*array.Int32Builder) - vb.Reserve(len(vs)) - - switch tt.typeID { - case arrow.LIST_VIEW: - lb.(*array.ListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int32), tt.sizes.([]int32), isValid) - case arrow.LARGE_LIST_VIEW: - lb.(*array.LargeListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int64), tt.sizes.([]int64), isValid) - } - for _, v := range vs { - vb.Append(v) - } - - arr := lb.NewArray().(array.VarLenListLike) - defer arr.Release() - - if got, want := arr.DataType().ID(), tt.typeID; got != want { - t.Fatalf("got=%v, want=%v", got, want) - } - - if got, want := arr.Len(), len(isValid); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - for i := range lengths { - if got, want := arr.IsValid(i), isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - if got, want := arr.IsNull(i), !isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - } - - var gotOffsets, gotSizes interface{} - switch tt.typeID { - case arrow.LIST_VIEW: - arr := arr.(*array.ListView) - gotOffsets = arr.Offsets() - gotSizes = arr.Sizes() - case arrow.LARGE_LIST_VIEW: - arr := arr.(*array.LargeListView) - gotOffsets = arr.Offsets() - gotSizes = arr.Sizes() - } - - if !reflect.DeepEqual(gotOffsets, tt.offsets) { - t.Fatalf("got=%v, want=%v", gotOffsets, tt.offsets) - } - if !reflect.DeepEqual(gotSizes, tt.sizes) { - t.Fatalf("got=%v, want=%v", gotSizes, tt.sizes) - } - - varr := arr.ListValues().(*array.Int32) - if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - }) - } -} - -func TestListArraySlice(t *testing.T) { - tests := []struct { - typeID arrow.Type - offsets interface{} - sizes interface{} - dt arrow.DataType - }{ - {arrow.LIST, []int32{0, 3, 3, 3, 7}, nil, arrow.ListOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LARGE_LIST, []int64{0, 3, 3, 3, 7}, nil, arrow.LargeListOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LIST_VIEW, []int32{0, 3, 3, 3, 7}, []int32{3, 0, 0, 4}, arrow.ListViewOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LARGE_LIST_VIEW, []int64{0, 3, 3, 3, 7}, []int64{3, 0, 0, 4}, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32)}, - } - - for _, tt := range tests { - t.Run(tt.typeID.String(), func(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - vs = []int32{0, 1, 2, 3, 4, 5, 6} - lengths = []int{3, 0, 0, 4} - isValid = []bool{true, false, true, true} - ) - - lb := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) - defer lb.Release() - vb := lb.ValueBuilder().(*array.Int32Builder) - vb.Reserve(len(vs)) - - switch tt.typeID { - case arrow.LIST: - lb.(*array.ListBuilder).AppendValues(tt.offsets.([]int32), isValid) - case arrow.LARGE_LIST: - lb.(*array.LargeListBuilder).AppendValues(tt.offsets.([]int64), isValid) - case arrow.LIST_VIEW: - lb.(*array.ListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int32), tt.sizes.([]int32), isValid) - case arrow.LARGE_LIST_VIEW: - lb.(*array.LargeListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int64), tt.sizes.([]int64), isValid) - } - for _, v := range vs { - vb.Append(v) - } - - arr := lb.NewArray().(array.VarLenListLike) - defer arr.Release() - - if got, want := arr.DataType().ID(), tt.typeID; got != want { - t.Fatalf("got=%v, want=%v", got, want) - } - - if got, want := arr.Len(), len(isValid); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - for i := range lengths { - if got, want := arr.IsValid(i), isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - if got, want := arr.IsNull(i), !isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - } - - var gotOffsets, gotSizes interface{} - switch tt.typeID { - case arrow.LIST: - arr := arr.(*array.List) - gotOffsets = arr.Offsets() - case arrow.LARGE_LIST: - arr := arr.(*array.LargeList) - gotOffsets = arr.Offsets() - case arrow.LIST_VIEW: - arr := arr.(*array.ListView) - gotOffsets = arr.Offsets() - gotSizes = arr.Sizes() - case arrow.LARGE_LIST_VIEW: - arr := arr.(*array.LargeListView) - gotOffsets = arr.Offsets() - gotSizes = arr.Sizes() - } - - if !reflect.DeepEqual(gotOffsets, tt.offsets) { - t.Fatalf("got=%v, want=%v", gotOffsets, tt.offsets) - } - - if tt.typeID == arrow.LIST_VIEW || tt.typeID == arrow.LARGE_LIST_VIEW { - if !reflect.DeepEqual(gotSizes, tt.sizes) { - t.Fatalf("got=%v, want=%v", gotSizes, tt.sizes) - } - } - - varr := arr.ListValues().(*array.Int32) - if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - if got, want := arr.String(), `[[0 1 2] (null) [] [3 4 5 6]]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - assert.Equal(t, "[0,1,2]", arr.ValueStr(0)) - - sub := array.NewSlice(arr, 1, 4).(array.ListLike) - defer sub.Release() - - if got, want := sub.String(), `[(null) [] [3 4 5 6]]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - }) - } -} - -func TestListViewArraySlice(t *testing.T) { - tests := []struct { - typeID arrow.Type - offsets interface{} - sizes interface{} - dt arrow.DataType - }{ - {arrow.LIST_VIEW, []int32{5, 0, 0, 1}, []int32{3, 0, 0, 4}, arrow.ListViewOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LARGE_LIST_VIEW, []int64{5, 0, 0, 1}, []int64{3, 0, 0, 4}, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32)}, - } - - for _, tt := range tests { - t.Run(tt.typeID.String(), func(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - vs = []int32{-1, 3, 4, 5, 6, 0, 1, 2} - lengths = []int{3, 0, 0, 4} - isValid = []bool{true, false, true, true} - ) - - lb := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) - defer lb.Release() - vb := lb.ValueBuilder().(*array.Int32Builder) - vb.Reserve(len(vs)) - - switch tt.typeID { - case arrow.LIST_VIEW: - lb.(*array.ListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int32), tt.sizes.([]int32), isValid) - case arrow.LARGE_LIST_VIEW: - lb.(*array.LargeListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int64), tt.sizes.([]int64), isValid) - } - for _, v := range vs { - vb.Append(v) - } - - arr := lb.NewArray().(array.VarLenListLike) - defer arr.Release() - - if got, want := arr.DataType().ID(), tt.typeID; got != want { - t.Fatalf("got=%v, want=%v", got, want) - } - - if got, want := arr.Len(), len(isValid); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - for i := range lengths { - if got, want := arr.IsValid(i), isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - if got, want := arr.IsNull(i), !isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - } - - var gotOffsets, gotSizes interface{} - switch tt.typeID { - case arrow.LIST_VIEW: - arr := arr.(*array.ListView) - gotOffsets = arr.Offsets() - gotSizes = arr.Sizes() - case arrow.LARGE_LIST_VIEW: - arr := arr.(*array.LargeListView) - gotOffsets = arr.Offsets() - gotSizes = arr.Sizes() - } - - if !reflect.DeepEqual(gotOffsets, tt.offsets) { - t.Fatalf("got=%v, want=%v", gotOffsets, tt.offsets) - } - - if !reflect.DeepEqual(gotSizes, tt.sizes) { - t.Fatalf("got=%v, want=%v", gotSizes, tt.sizes) - } - - varr := arr.ListValues().(*array.Int32) - if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - if got, want := arr.String(), `[[0 1 2] (null) [] [3 4 5 6]]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - assert.Equal(t, "[0,1,2]", arr.ValueStr(0)) - - sub := array.NewSlice(arr, 1, 4).(array.ListLike) - defer sub.Release() - - if got, want := sub.String(), `[(null) [] [3 4 5 6]]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - }) - } -} - -func TestVarLenListLikeStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - builders := []array.VarLenListLikeBuilder{ - array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32), - array.NewListViewBuilder(mem, arrow.PrimitiveTypes.Int32), - array.NewLargeListBuilder(mem, arrow.PrimitiveTypes.Int32), - array.NewLargeListViewBuilder(mem, arrow.PrimitiveTypes.Int32), - } - - builders1 := []array.VarLenListLikeBuilder{ - array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32), - array.NewListViewBuilder(mem, arrow.PrimitiveTypes.Int32), - array.NewLargeListBuilder(mem, arrow.PrimitiveTypes.Int32), - array.NewLargeListViewBuilder(mem, arrow.PrimitiveTypes.Int32), - } - - for i, b := range builders { - defer b.Release() - - vb := b.ValueBuilder().(*array.Int32Builder) - - var values = [][]int32{ - {0, 1, 2, 3, 4, 5, 6}, - {1, 2, 3, 4, 5, 6, 7}, - {2, 3, 4, 5, 6, 7, 8}, - {3, 4, 5, 6, 7, 8, 9}, - } - for _, value := range values { - b.AppendNull() - b.AppendWithSize(true, 2*len(value)) - for _, el := range value { - vb.Append(el) - vb.AppendNull() - } - b.AppendWithSize(false, 0) - } - - arr := b.NewArray() - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := builders1[i] - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray() - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) - } -} - -// Test the string roun-trip for a list-view containing out-of-order offsets. -func TestListViewStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - builders := []array.VarLenListLikeBuilder{ - array.NewListViewBuilder(mem, arrow.PrimitiveTypes.Int32), - array.NewLargeListViewBuilder(mem, arrow.PrimitiveTypes.Int32), - } - - builders1 := []array.VarLenListLikeBuilder{ - array.NewListViewBuilder(mem, arrow.PrimitiveTypes.Int32), - array.NewLargeListViewBuilder(mem, arrow.PrimitiveTypes.Int32), - } - - for i, b := range builders { - defer b.Release() - - switch lvb := b.(type) { - case *array.ListViewBuilder: - lvb.AppendDimensions(5, 3) - b.AppendNull() - lvb.AppendDimensions(0, 0) - lvb.AppendDimensions(1, 4) - case *array.LargeListViewBuilder: - lvb.AppendDimensions(5, 3) - b.AppendNull() - lvb.AppendDimensions(0, 0) - lvb.AppendDimensions(1, 4) - } - - vb := b.ValueBuilder().(*array.Int32Builder) - - vs := []int32{-1, 3, 4, 5, 6, 0, 1, 2} - isValid := []bool{false, true, true, true, true, true, true, true} - vb.Reserve(len(vs)) - vb.AppendValues(vs, isValid) - - arr := b.NewArray() - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := builders1[i] - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray() - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) - } -} - -func TestRangeOfValuesUsed(t *testing.T) { - tests := []struct { - typeID arrow.Type - dt arrow.DataType - }{ - {arrow.LIST, arrow.ListOf(arrow.PrimitiveTypes.Int16)}, - {arrow.LARGE_LIST, arrow.LargeListOf(arrow.PrimitiveTypes.Int16)}, - {arrow.LIST_VIEW, arrow.ListViewOf(arrow.PrimitiveTypes.Int16)}, - {arrow.LARGE_LIST_VIEW, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int16)}, - } - for _, tt := range tests { - t.Run(tt.typeID.String(), func(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - isListView := tt.typeID == arrow.LIST_VIEW || tt.typeID == arrow.LARGE_LIST_VIEW - - bldr := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) - defer bldr.Release() - - var arr array.VarLenListLike - - // Empty array - arr = bldr.NewArray().(array.VarLenListLike) - defer arr.Release() - offset, len := array.RangeOfValuesUsed(arr) - assert.Equal(t, 0, offset) - assert.Equal(t, 0, len) - - // List-like array with only nulls - bldr.AppendNulls(3) - arr = bldr.NewArray().(array.VarLenListLike) - defer arr.Release() - offset, len = array.RangeOfValuesUsed(arr) - assert.Equal(t, 0, offset) - assert.Equal(t, 0, len) - - // Array with nulls and non-nulls (starting at a non-zero offset) - vb := bldr.ValueBuilder().(*array.Int16Builder) - vb.Append(-2) - vb.Append(-1) - bldr.AppendWithSize(false, 0) - bldr.AppendWithSize(true, 2) - vb.Append(0) - vb.Append(1) - bldr.AppendWithSize(true, 3) - vb.Append(2) - vb.Append(3) - vb.Append(4) - if isListView { - vb.Append(10) - vb.Append(11) - } - arr = bldr.NewArray().(array.VarLenListLike) - defer arr.Release() - offset, len = array.RangeOfValuesUsed(arr) - assert.Equal(t, 2, offset) - assert.Equal(t, 5, len) - - // Overlapping list-views - // [null, [0, 1, 2, 3, 4, 5], [1, 2], null, [4], null, null] - vb = bldr.ValueBuilder().(*array.Int16Builder) - vb.Append(-2) - vb.Append(-1) - bldr.AppendWithSize(false, 0) - if isListView { - bldr.AppendWithSize(true, 6) - vb.Append(0) - bldr.AppendWithSize(true, 2) - vb.Append(1) - vb.Append(2) - vb.Append(3) - bldr.AppendWithSize(false, 0) - bldr.AppendWithSize(true, 1) - vb.Append(4) - vb.Append(5) - // -- used range ends here -- - vb.Append(10) - vb.Append(11) - } else { - bldr.AppendWithSize(true, 6) - vb.Append(0) - vb.Append(1) - vb.Append(2) - vb.Append(3) - vb.Append(4) - vb.Append(5) - bldr.AppendWithSize(true, 2) - vb.Append(1) - vb.Append(2) - bldr.AppendWithSize(false, 0) - bldr.AppendWithSize(true, 1) - vb.Append(4) - } - bldr.AppendNulls(2) - arr = bldr.NewArray().(array.VarLenListLike) - defer arr.Release() - - // Check the range - offset, len = array.RangeOfValuesUsed(arr) - assert.Equal(t, 2, offset) - if isListView { - assert.Equal(t, 6, len) - } else { - assert.Equal(t, 9, len) - } - }) - } -} diff --git a/go/arrow/array/map.go b/go/arrow/array/map.go deleted file mode 100644 index a692c2cd6d71a..0000000000000 --- a/go/arrow/array/map.go +++ /dev/null @@ -1,361 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "fmt" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -// Map represents an immutable sequence of Key/Value structs. It is a -// logical type that is implemented as a List. -type Map struct { - *List - keys, items arrow.Array -} - -var _ ListLike = (*Map)(nil) - -// NewMapData returns a new Map array value, from data -func NewMapData(data arrow.ArrayData) *Map { - a := &Map{List: &List{}} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// KeysSorted checks the datatype that was used to construct this array and -// returns the KeysSorted boolean value used to denote if the key array is -// sorted for each list element. -// -// Important note: Nothing is enforced regarding the KeysSorted value, it is -// solely a metadata field that should be set if keys within each value are sorted. -// This value is not used at all in regards to comparisons / equality. -func (a *Map) KeysSorted() bool { return a.DataType().(*arrow.MapType).KeysSorted } - -func (a *Map) validateData(data *Data) { - if len(data.childData) != 1 || data.childData[0] == nil { - panic("arrow/array: expected one child array for map array") - } - - if data.childData[0].DataType().ID() != arrow.STRUCT { - panic("arrow/array: map array child should be struct type") - } - - if data.childData[0].NullN() != 0 { - panic("arrow/array: map array child array should have no nulls") - } - - if len(data.childData[0].Children()) != 2 { - panic("arrow/array: map array child array should have two fields") - } - - if data.childData[0].Children()[0].NullN() != 0 { - panic("arrow/array: map array keys array should have no nulls") - } -} - -func (a *Map) setData(data *Data) { - a.validateData(data) - - a.List.setData(data) - a.keys = MakeFromData(data.childData[0].Children()[0]) - a.items = MakeFromData(data.childData[0].Children()[1]) -} - -// Keys returns the full Array of Key values, equivalent to grabbing -// the key field of the child struct. -func (a *Map) Keys() arrow.Array { return a.keys } - -// Items returns the full Array of Item values, equivalent to grabbing -// the Value field (the second field) of the child struct. -func (a *Map) Items() arrow.Array { return a.items } - -// Retain increases the reference count by 1. -// Retain may be called simultaneously from multiple goroutines. -func (a *Map) Retain() { - a.List.Retain() - a.keys.Retain() - a.items.Retain() -} - -// Release decreases the reference count by 1. -// Release may be called simultaneously from multiple goroutines. -// When the reference count goes to zero, the memory is freed. -func (a *Map) Release() { - a.List.Release() - a.keys.Release() - a.items.Release() -} - -func arrayEqualMap(left, right *Map) bool { - // since Map is implemented using a list, we can just use arrayEqualList - return arrayEqualList(left.List, right.List) -} - -type MapBuilder struct { - listBuilder *ListBuilder - - etype *arrow.MapType - keytype, itemtype arrow.DataType - keyBuilder, itemBuilder Builder - keysSorted bool -} - -// NewMapBuilder returns a builder, using the provided memory allocator. -// The created Map builder will create a map array whose keys will be a non-nullable -// array of type `keytype` and whose mapped items will be a nullable array of itemtype. -// -// KeysSorted is not enforced at all by the builder, it should only be set to true -// building using keys in sorted order for each value. The KeysSorted value will just be -// used when creating the DataType for the map. -// -// # Example -// -// Simple example provided of converting a []map[string]int32 to an array.Map -// by using a MapBuilder: -// -// /* assume maplist == []map[string]int32 */ -// bldr := array.NewMapBuilder(memory.DefaultAllocator, arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32, false) -// defer bldr.Release() -// kb := bldr.KeyBuilder().(*array.StringBuilder) -// ib := bldr.ItemBuilder().(*array.Int32Builder) -// for _, m := range maplist { -// bldr.Append(true) -// for k, v := range m { -// kb.Append(k) -// ib.Append(v) -// } -// } -// maparr := bldr.NewMapArray() -// defer maparr.Release() -func NewMapBuilder(mem memory.Allocator, keytype, itemtype arrow.DataType, keysSorted bool) *MapBuilder { - etype := arrow.MapOf(keytype, itemtype) - etype.KeysSorted = keysSorted - listBldr := NewListBuilder(mem, etype.Elem()) - keyBldr := listBldr.ValueBuilder().(*StructBuilder).FieldBuilder(0) - keyBldr.Retain() - itemBldr := listBldr.ValueBuilder().(*StructBuilder).FieldBuilder(1) - itemBldr.Retain() - return &MapBuilder{ - listBuilder: listBldr, - keyBuilder: keyBldr, - itemBuilder: itemBldr, - etype: etype, - keytype: keytype, - itemtype: itemtype, - keysSorted: keysSorted, - } -} - -func NewMapBuilderWithType(mem memory.Allocator, dt *arrow.MapType) *MapBuilder { - listBldr := NewListBuilder(mem, dt.Elem()) - keyBldr := listBldr.ValueBuilder().(*StructBuilder).FieldBuilder(0) - keyBldr.Retain() - itemBldr := listBldr.ValueBuilder().(*StructBuilder).FieldBuilder(1) - itemBldr.Retain() - return &MapBuilder{ - listBuilder: listBldr, - keyBuilder: keyBldr, - itemBuilder: itemBldr, - etype: dt, - keytype: dt.KeyType(), - itemtype: dt.ItemType(), - keysSorted: dt.KeysSorted, - } -} - -func (b *MapBuilder) Type() arrow.DataType { return b.etype } - -// Retain increases the reference count by 1 for the sub-builders (list, key, item). -// Retain may be called simultaneously from multiple goroutines. -func (b *MapBuilder) Retain() { - b.listBuilder.Retain() - b.keyBuilder.Retain() - b.itemBuilder.Retain() -} - -// Release decreases the reference count by 1 for the sub builders (list, key, item). -func (b *MapBuilder) Release() { - b.listBuilder.Release() - b.keyBuilder.Release() - b.itemBuilder.Release() -} - -// Len returns the current number of Maps that are in the builder -func (b *MapBuilder) Len() int { return b.listBuilder.Len() } - -// Cap returns the total number of elements that can be stored -// without allocating additional memory. -func (b *MapBuilder) Cap() int { return b.listBuilder.Cap() } - -// NullN returns the number of null values in the array builder. -func (b *MapBuilder) NullN() int { return b.listBuilder.NullN() } - -// IsNull returns if a previously appended value at a given index is null or not. -func (b *MapBuilder) IsNull(i int) bool { - return b.listBuilder.IsNull(i) -} - -// Append adds a new Map element to the array, calling Append(false) is -// equivalent to calling AppendNull. -func (b *MapBuilder) Append(v bool) { - b.adjustStructBuilderLen() - b.listBuilder.Append(v) -} - -func (b *MapBuilder) AppendWithSize(v bool, _ int) { - b.Append(v) -} - -// AppendNull adds a null map entry to the array. -func (b *MapBuilder) AppendNull() { - b.Append(false) -} - -// AppendNulls adds null map entry to the array. -func (b *MapBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *MapBuilder) SetNull(i int) { - b.listBuilder.SetNull(i) -} - -func (b *MapBuilder) AppendEmptyValue() { - b.Append(true) -} - -func (b *MapBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -// Reserve enough space for n maps -func (b *MapBuilder) Reserve(n int) { b.listBuilder.Reserve(n) } - -// Resize adjust the space allocated by b to n map elements. If n is greater than -// b.Cap(), additional memory will be allocated. If n is smaller, the allocated memory may be reduced. -func (b *MapBuilder) Resize(n int) { b.listBuilder.Resize(n) } - -// AppendValues is for bulk appending a group of elements with offsets provided -// and validity booleans provided. -func (b *MapBuilder) AppendValues(offsets []int32, valid []bool) { - b.adjustStructBuilderLen() - b.listBuilder.AppendValues(offsets, valid) -} - -func (b *MapBuilder) UnsafeAppendBoolToBitmap(v bool) { - b.listBuilder.UnsafeAppendBoolToBitmap(v) -} - -func (b *MapBuilder) init(capacity int) { b.listBuilder.init(capacity) } -func (b *MapBuilder) resize(newBits int, init func(int)) { b.listBuilder.resize(newBits, init) } - -func (b *MapBuilder) adjustStructBuilderLen() { - sb := b.listBuilder.ValueBuilder().(*StructBuilder) - if sb.Len() < b.keyBuilder.Len() { - valids := make([]bool, b.keyBuilder.Len()-sb.Len()) - for i := range valids { - valids[i] = true - } - sb.AppendValues(valids) - } -} - -// NewArray creates a new Map array from the memory buffers used by the builder, and -// resets the builder so it can be used again to build a new Map array. -func (b *MapBuilder) NewArray() arrow.Array { - return b.NewMapArray() -} - -// NewMapArray creates a new Map array from the memory buffers used by the builder, and -// resets the builder so it can be used again to build a new Map array. -func (b *MapBuilder) NewMapArray() (a *Map) { - if !b.etype.ItemField().Nullable && b.ItemBuilder().NullN() > 0 { - panic("arrow/array: item not nullable") - } - - data := b.newData() - defer data.Release() - a = NewMapData(data) - return -} - -func (b *MapBuilder) newData() (data *Data) { - b.adjustStructBuilderLen() - values := b.listBuilder.NewListArray() - defer values.Release() - - data = NewData(b.etype, - values.Len(), values.data.buffers, - values.data.childData, values.NullN(), 0) - return -} - -// KeyBuilder returns a builder that can be used to populate the keys of the maps. -func (b *MapBuilder) KeyBuilder() Builder { return b.keyBuilder } - -// ItemBuilder returns a builder that can be used to populate the values that the -// keys point to. -func (b *MapBuilder) ItemBuilder() Builder { return b.itemBuilder } - -// ValueBuilder can be used instead of separately using the Key/Item builders -// to build the list as a List of Structs rather than building the keys/items -// separately. -func (b *MapBuilder) ValueBuilder() Builder { - return b.listBuilder.ValueBuilder() -} - -func (b *MapBuilder) AppendValueFromString(s string) error { - return b.listBuilder.AppendValueFromString(s) -} - -func (b *MapBuilder) UnmarshalOne(dec *json.Decoder) error { - return b.listBuilder.UnmarshalOne(dec) -} - -func (b *MapBuilder) Unmarshal(dec *json.Decoder) error { - return b.listBuilder.Unmarshal(dec) -} - -func (b *MapBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("map builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -var ( - _ arrow.Array = (*Map)(nil) - _ Builder = (*MapBuilder)(nil) - _ ListLikeBuilder = (*MapBuilder)(nil) -) diff --git a/go/arrow/array/map_test.go b/go/arrow/array/map_test.go deleted file mode 100644 index e73508e6afe11..0000000000000 --- a/go/arrow/array/map_test.go +++ /dev/null @@ -1,254 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "strconv" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestMapArray(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - arr, equalArr, unequalArr *array.Map - - equalValid = []bool{true, true, true, true, true, true, true} - equalOffsets = []int32{0, 1, 2, 5, 6, 7, 8, 10} - equalKeys = []string{"a", "a", "a", "b", "c", "a", "a", "a", "a", "b"} - equalValues = []int32{1, 2, 3, 4, 5, 2, 2, 2, 5, 6} - unequalValid = []bool{true, true, true} - unequalOffsets = []int32{0, 1, 4, 7} - unequalKeys = []string{"a", "a", "b", "c", "a", "b", "c"} - unequalValues = []int32{1, 2, 2, 2, 3, 4, 5} - ) - - bldr := array.NewMapBuilder(pool, arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32, false) - defer bldr.Release() - - kb := bldr.KeyBuilder().(*array.StringBuilder) - ib := bldr.ItemBuilder().(*array.Int32Builder) - - bldr.AppendValues(equalOffsets, equalValid) - for _, k := range equalKeys { - kb.Append(k) - } - ib.AppendValues(equalValues, nil) - - assert.Equal(t, len(equalValid), bldr.Len()) - assert.Zero(t, bldr.NullN()) - - arr = bldr.NewMapArray() - defer arr.Release() - - bldr.AppendValues(equalOffsets, equalValid) - for _, k := range equalKeys { - kb.Append(k) - } - ib.AppendValues(equalValues, nil) - - equalArr = bldr.NewMapArray() - defer equalArr.Release() - - bldr.AppendValues(unequalOffsets, unequalValid) - for _, k := range unequalKeys { - kb.Append(k) - } - ib.AppendValues(unequalValues, nil) - - unequalArr = bldr.NewMapArray() - defer unequalArr.Release() - - assert.True(t, array.Equal(arr, arr)) - assert.True(t, array.Equal(arr, equalArr)) - assert.True(t, array.Equal(equalArr, arr)) - assert.False(t, array.Equal(equalArr, unequalArr)) - assert.False(t, array.Equal(unequalArr, equalArr)) - - assert.True(t, array.SliceEqual(arr, 0, 1, unequalArr, 0, 1)) - assert.False(t, array.SliceEqual(arr, 0, 2, unequalArr, 0, 2)) - assert.False(t, array.SliceEqual(arr, 1, 2, unequalArr, 1, 2)) - assert.True(t, array.SliceEqual(arr, 2, 3, unequalArr, 2, 3)) - - t.Run("items non nullable", func(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dt := arrow.MapOf(arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Int16) - dt.KeysSorted = true - dt.SetItemNullable(false) - - bldr := array.NewBuilder(pool, dt).(*array.MapBuilder) - defer bldr.Release() - - kb := bldr.KeyBuilder().(*array.Int16Builder) - ib := bldr.ItemBuilder().(*array.Int16Builder) - - bldr.Append(true) - kb.Append(1) - ib.AppendNull() - - assert.Panics(t, func() { - _ = bldr.NewArray() - }) - }) -} - -func TestMapArrayBuildIntToInt(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - dtype = arrow.MapOf(arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Int16) - keys = []int16{0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5} - items = []int16{1, 1, 2, 3, 5, 8, -1, -1, 0, 1, -1, 2} - validItems = []bool{true, true, true, true, true, true, false, false, true, true, false, true} - offsets = []int32{0, 6, 6, 12, 12} - validMaps = []bool{true, false, true, true} - ) - - bldr := array.NewBuilder(pool, dtype).(*array.MapBuilder) - defer bldr.Release() - - bldr.Reserve(len(validMaps)) - - kb := bldr.KeyBuilder().(*array.Int16Builder) - ib := bldr.ItemBuilder().(*array.Int16Builder) - - bldr.Append(true) - kb.AppendValues(keys[:6], nil) - ib.AppendValues(items[:6], nil) - - bldr.AppendNull() - bldr.Append(true) - kb.AppendValues(keys[6:], nil) - ib.AppendValues(items[6:], []bool{false, false, true, true, false, true}) - - bldr.Append(true) - arr := bldr.NewArray().(*array.Map) - defer arr.Release() - - assert.Equal(t, arrow.MAP, arr.DataType().ID()) - assert.EqualValues(t, len(validMaps), arr.Len()) - - for i, ex := range validMaps { - assert.Equal(t, ex, arr.IsValid(i)) - assert.Equal(t, !ex, arr.IsNull(i)) - } - - assert.Equal(t, offsets, arr.Offsets()) - assert.Equal(t, keys, arr.Keys().(*array.Int16).Int16Values()) - - itemArr := arr.Items().(*array.Int16) - for i, ex := range validItems { - if ex { - assert.True(t, itemArr.IsValid(i)) - assert.False(t, itemArr.IsNull(i)) - assert.Equal(t, items[i], itemArr.Value(i)) - } else { - assert.False(t, itemArr.IsValid(i)) - assert.True(t, itemArr.IsNull(i)) - } - } - - assert.Equal(t, "[{[0 1 2 3 4 5] [1 1 2 3 5 8]} (null) {[0 1 2 3 4 5] [(null) (null) 0 1 (null) 2]} {[] []}]", arr.String()) -} - -func TestMapStringRoundTrip(t *testing.T) { - // 1. create array - dt := arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32) - - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewMapBuilderWithType(mem, dt) - defer b.Release() - - kb := b.KeyBuilder().(*array.StringBuilder) - ib := b.ItemBuilder().(*array.Int32Builder) - - for n := 0; n < 10; n++ { - b.AppendNull() - b.Append(true) - - for r := 'a'; r <= 'z'; r++ { - kb.Append(string(r) + strconv.Itoa(n)) - if (n+int(r))%2 == 0 { - ib.AppendNull() - } else { - ib.Append(int32(n + int(r))) - } - } - } - - arr := b.NewArray().(*array.Map) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewMapBuilderWithType(mem, dt) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Map) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestMapBuilder_SetNull(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - arr *array.Map - equalValid = []bool{true, true, true, true, true, true, true} - equalOffsets = []int32{0, 1, 2, 5, 6, 7, 8, 10} - equalKeys = []string{"a", "a", "a", "b", "c", "a", "a", "a", "a", "b"} - equalValues = []int32{1, 2, 3, 4, 5, 2, 2, 2, 5, 6} - ) - - bldr := array.NewMapBuilder(pool, arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32, false) - defer bldr.Release() - - kb := bldr.KeyBuilder().(*array.StringBuilder) - ib := bldr.ItemBuilder().(*array.Int32Builder) - - bldr.AppendValues(equalOffsets, equalValid) - for _, k := range equalKeys { - kb.Append(k) - } - ib.AppendValues(equalValues, nil) - - bldr.SetNull(0) - bldr.SetNull(3) - - arr = bldr.NewMapArray() - defer arr.Release() - - assert.True(t, arr.IsNull(0)) - assert.True(t, arr.IsValid(1)) - assert.True(t, arr.IsNull(3)) -} diff --git a/go/arrow/array/null.go b/go/arrow/array/null.go deleted file mode 100644 index 6dccd3af59f2a..0000000000000 --- a/go/arrow/array/null.go +++ /dev/null @@ -1,218 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "fmt" - "reflect" - "strings" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -// Null represents an immutable, degenerate array with no physical storage. -type Null struct { - array -} - -// NewNull returns a new Null array value of size n. -func NewNull(n int) *Null { - a := &Null{} - a.refCount = 1 - data := NewData( - arrow.Null, n, - []*memory.Buffer{nil}, - nil, - n, - 0, - ) - a.setData(data) - data.Release() - return a -} - -// NewNullData returns a new Null array value, from data. -func NewNullData(data arrow.ArrayData) *Null { - a := &Null{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *Null) ValueStr(int) string { return NullValueStr } - -func (a *Null) Value(int) interface{} { return nil } - -func (a *Null) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - o.WriteString(" ") - } - o.WriteString(NullValueStr) - } - o.WriteString("]") - return o.String() -} - -func (a *Null) setData(data *Data) { - a.array.setData(data) - a.array.nullBitmapBytes = nil - a.array.data.nulls = a.array.data.length -} - -func (a *Null) GetOneForMarshal(i int) interface{} { - return nil -} - -func (a *Null) MarshalJSON() ([]byte, error) { - return json.Marshal(make([]interface{}, a.Len())) -} - -type NullBuilder struct { - builder -} - -// NewNullBuilder returns a builder, using the provided memory allocator. -func NewNullBuilder(mem memory.Allocator) *NullBuilder { - return &NullBuilder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *NullBuilder) Type() arrow.DataType { return arrow.Null } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *NullBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - } -} - -func (b *NullBuilder) AppendNull() { - b.builder.length++ - b.builder.nulls++ -} - -func (b *NullBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *NullBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - return fmt.Errorf("cannot convert %q to null", s) -} - -func (b *NullBuilder) AppendEmptyValue() { b.AppendNull() } - -func (b *NullBuilder) AppendEmptyValues(n int) { b.AppendNulls(n) } - -func (*NullBuilder) Reserve(size int) {} -func (*NullBuilder) Resize(size int) {} - -func (*NullBuilder) init(cap int) {} -func (*NullBuilder) resize(newBits int, init func(int)) {} - -// NewArray creates a Null array from the memory buffers used by the builder and resets the NullBuilder -// so it can be used to build a new array. -func (b *NullBuilder) NewArray() arrow.Array { - return b.NewNullArray() -} - -// NewNullArray creates a Null array from the memory buffers used by the builder and resets the NullBuilder -// so it can be used to build a new array. -func (b *NullBuilder) NewNullArray() (a *Null) { - data := b.newData() - a = NewNullData(data) - data.Release() - return -} - -func (b *NullBuilder) newData() (data *Data) { - data = NewData( - arrow.Null, b.length, - []*memory.Buffer{nil}, - nil, - b.nulls, - 0, - ) - b.reset() - - return -} - -func (b *NullBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch t.(type) { - case nil: - b.AppendNull() - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(nil), - Offset: dec.InputOffset(), - } - } - return nil -} - -func (b *NullBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *NullBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("null builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -var ( - _ arrow.Array = (*Null)(nil) - _ Builder = (*NullBuilder)(nil) -) diff --git a/go/arrow/array/null_test.go b/go/arrow/array/null_test.go deleted file mode 100644 index 61ccb472b1f7b..0000000000000 --- a/go/arrow/array/null_test.go +++ /dev/null @@ -1,110 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestNullArray(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - b := array.NewNullBuilder(pool) - defer b.Release() - - b.AppendNull() - b.AppendNulls(2) - b.AppendEmptyValue() - b.AppendEmptyValues(2) - - arr1 := b.NewArray().(*array.Null) - defer arr1.Release() - - if got, want := arr1.Len(), 6; got != want { - t.Fatalf("invalid null array length: got=%d, want=%d", got, want) - } - - if got, want := arr1.NullN(), 6; got != want { - t.Fatalf("invalid number of nulls: got=%d, want=%d", got, want) - } - - if got, want := arr1.DataType(), arrow.Null; got != want { - t.Fatalf("invalid null data type: got=%v, want=%v", got, want) - } - - arr1.Retain() - arr1.Release() - - if arr1.Data() == nil { - t.Fatalf("invalid null data") - } - - arr2 := b.NewNullArray() - defer arr2.Release() - - if got, want := arr2.Len(), 0; got != want { - t.Fatalf("invalid null array length: got=%d, want=%d", got, want) - } - - arr3 := array.NewNull(10) - defer arr3.Release() - - if got, want := arr3.Len(), 10; got != want { - t.Fatalf("invalid null array length: got=%d, want=%d", got, want) - } - - if got, want := arr3.NullN(), 10; got != want { - t.Fatalf("invalid number of nulls: got=%d, want=%d", got, want) - } - -} - -func TestNullStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewNullBuilder(mem) - defer b.Release() - - b.AppendNull() - b.AppendNulls(2) - b.AppendEmptyValue() - b.AppendEmptyValues(2) - - arr := b.NewArray().(*array.Null) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewNullBuilder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Null) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} diff --git a/go/arrow/array/numeric.gen.go b/go/arrow/array/numeric.gen.go deleted file mode 100644 index 413a356c2a8ab..0000000000000 --- a/go/arrow/array/numeric.gen.go +++ /dev/null @@ -1,1452 +0,0 @@ -// Code generated by array/numeric.gen.go.tmpl. DO NOT EDIT. - -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "fmt" - "math" - "strconv" - "strings" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/internal/json" -) - -// A type which represents an immutable sequence of int64 values. -type Int64 struct { - array - values []int64 -} - -// NewInt64Data creates a new Int64. -func NewInt64Data(data arrow.ArrayData) *Int64 { - a := &Int64{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Int64) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Int64) Value(i int) int64 { return a.values[i] } - -// Values returns the values. -func (a *Int64) Int64Values() []int64 { return a.values } - -// String returns a string representation of the array. -func (a *Int64) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Int64) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Int64Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Int64) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return strconv.FormatInt(int64(a.Value(i)), 10) -} - -func (a *Int64) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - return a.values[i] -} - -func (a *Int64) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if a.IsValid(i) { - vals[i] = a.values[i] - } else { - vals[i] = nil - } - } - - return json.Marshal(vals) -} - -func arrayEqualInt64(left, right *Int64) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of uint64 values. -type Uint64 struct { - array - values []uint64 -} - -// NewUint64Data creates a new Uint64. -func NewUint64Data(data arrow.ArrayData) *Uint64 { - a := &Uint64{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Uint64) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Uint64) Value(i int) uint64 { return a.values[i] } - -// Values returns the values. -func (a *Uint64) Uint64Values() []uint64 { return a.values } - -// String returns a string representation of the array. -func (a *Uint64) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Uint64) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Uint64Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Uint64) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return strconv.FormatUint(uint64(a.Value(i)), 10) -} - -func (a *Uint64) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - return a.values[i] -} - -func (a *Uint64) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if a.IsValid(i) { - vals[i] = a.values[i] - } else { - vals[i] = nil - } - } - - return json.Marshal(vals) -} - -func arrayEqualUint64(left, right *Uint64) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of float64 values. -type Float64 struct { - array - values []float64 -} - -// NewFloat64Data creates a new Float64. -func NewFloat64Data(data arrow.ArrayData) *Float64 { - a := &Float64{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Float64) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Float64) Value(i int) float64 { return a.values[i] } - -// Values returns the values. -func (a *Float64) Float64Values() []float64 { return a.values } - -// String returns a string representation of the array. -func (a *Float64) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Float64) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Float64Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Float64) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return strconv.FormatFloat(float64(a.Value(i)), 'g', -1, 64) -} - -func (a *Float64) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - return a.values[i] -} - -func (a *Float64) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if !a.IsValid(i) { - vals[i] = nil - continue - } - - f := a.Value(i) - switch { - case math.IsNaN(f): - vals[i] = "NaN" - case math.IsInf(f, 1): - vals[i] = "+Inf" - case math.IsInf(f, -1): - vals[i] = "-Inf" - default: - vals[i] = f - } - - } - - return json.Marshal(vals) -} - -func arrayEqualFloat64(left, right *Float64) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of int32 values. -type Int32 struct { - array - values []int32 -} - -// NewInt32Data creates a new Int32. -func NewInt32Data(data arrow.ArrayData) *Int32 { - a := &Int32{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Int32) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Int32) Value(i int) int32 { return a.values[i] } - -// Values returns the values. -func (a *Int32) Int32Values() []int32 { return a.values } - -// String returns a string representation of the array. -func (a *Int32) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Int32) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Int32Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Int32) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return strconv.FormatInt(int64(a.Value(i)), 10) -} - -func (a *Int32) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - return a.values[i] -} - -func (a *Int32) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if a.IsValid(i) { - vals[i] = a.values[i] - } else { - vals[i] = nil - } - } - - return json.Marshal(vals) -} - -func arrayEqualInt32(left, right *Int32) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of uint32 values. -type Uint32 struct { - array - values []uint32 -} - -// NewUint32Data creates a new Uint32. -func NewUint32Data(data arrow.ArrayData) *Uint32 { - a := &Uint32{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Uint32) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Uint32) Value(i int) uint32 { return a.values[i] } - -// Values returns the values. -func (a *Uint32) Uint32Values() []uint32 { return a.values } - -// String returns a string representation of the array. -func (a *Uint32) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Uint32) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Uint32Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Uint32) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return strconv.FormatUint(uint64(a.Value(i)), 10) -} - -func (a *Uint32) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - return a.values[i] -} - -func (a *Uint32) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if a.IsValid(i) { - vals[i] = a.values[i] - } else { - vals[i] = nil - } - } - - return json.Marshal(vals) -} - -func arrayEqualUint32(left, right *Uint32) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of float32 values. -type Float32 struct { - array - values []float32 -} - -// NewFloat32Data creates a new Float32. -func NewFloat32Data(data arrow.ArrayData) *Float32 { - a := &Float32{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Float32) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Float32) Value(i int) float32 { return a.values[i] } - -// Values returns the values. -func (a *Float32) Float32Values() []float32 { return a.values } - -// String returns a string representation of the array. -func (a *Float32) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Float32) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Float32Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Float32) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return strconv.FormatFloat(float64(a.Value(i)), 'g', -1, 32) -} - -func (a *Float32) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - return a.values[i] -} - -func (a *Float32) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if !a.IsValid(i) { - vals[i] = nil - continue - } - - f := a.Value(i) - v := strconv.FormatFloat(float64(f), 'g', -1, 32) - - switch v { - case "NaN", "+Inf", "-Inf": - vals[i] = v - default: - vals[i] = f - } - } - - return json.Marshal(vals) -} - -func arrayEqualFloat32(left, right *Float32) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of int16 values. -type Int16 struct { - array - values []int16 -} - -// NewInt16Data creates a new Int16. -func NewInt16Data(data arrow.ArrayData) *Int16 { - a := &Int16{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Int16) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Int16) Value(i int) int16 { return a.values[i] } - -// Values returns the values. -func (a *Int16) Int16Values() []int16 { return a.values } - -// String returns a string representation of the array. -func (a *Int16) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Int16) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Int16Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Int16) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return strconv.FormatInt(int64(a.Value(i)), 10) -} - -func (a *Int16) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - return a.values[i] -} - -func (a *Int16) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if a.IsValid(i) { - vals[i] = a.values[i] - } else { - vals[i] = nil - } - } - - return json.Marshal(vals) -} - -func arrayEqualInt16(left, right *Int16) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of uint16 values. -type Uint16 struct { - array - values []uint16 -} - -// NewUint16Data creates a new Uint16. -func NewUint16Data(data arrow.ArrayData) *Uint16 { - a := &Uint16{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Uint16) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Uint16) Value(i int) uint16 { return a.values[i] } - -// Values returns the values. -func (a *Uint16) Uint16Values() []uint16 { return a.values } - -// String returns a string representation of the array. -func (a *Uint16) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Uint16) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Uint16Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Uint16) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return strconv.FormatUint(uint64(a.Value(i)), 10) -} - -func (a *Uint16) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - return a.values[i] -} - -func (a *Uint16) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if a.IsValid(i) { - vals[i] = a.values[i] - } else { - vals[i] = nil - } - } - - return json.Marshal(vals) -} - -func arrayEqualUint16(left, right *Uint16) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of int8 values. -type Int8 struct { - array - values []int8 -} - -// NewInt8Data creates a new Int8. -func NewInt8Data(data arrow.ArrayData) *Int8 { - a := &Int8{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Int8) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Int8) Value(i int) int8 { return a.values[i] } - -// Values returns the values. -func (a *Int8) Int8Values() []int8 { return a.values } - -// String returns a string representation of the array. -func (a *Int8) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Int8) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Int8Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Int8) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return strconv.FormatInt(int64(a.Value(i)), 10) -} - -func (a *Int8) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - return float64(a.values[i]) // prevent uint8 from being seen as binary data -} - -func (a *Int8) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if a.IsValid(i) { - vals[i] = float64(a.values[i]) // prevent uint8 from being seen as binary data - } else { - vals[i] = nil - } - } - - return json.Marshal(vals) -} - -func arrayEqualInt8(left, right *Int8) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of uint8 values. -type Uint8 struct { - array - values []uint8 -} - -// NewUint8Data creates a new Uint8. -func NewUint8Data(data arrow.ArrayData) *Uint8 { - a := &Uint8{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Uint8) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Uint8) Value(i int) uint8 { return a.values[i] } - -// Values returns the values. -func (a *Uint8) Uint8Values() []uint8 { return a.values } - -// String returns a string representation of the array. -func (a *Uint8) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Uint8) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Uint8Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Uint8) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return strconv.FormatUint(uint64(a.Value(i)), 10) -} - -func (a *Uint8) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - return float64(a.values[i]) // prevent uint8 from being seen as binary data -} - -func (a *Uint8) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if a.IsValid(i) { - vals[i] = float64(a.values[i]) // prevent uint8 from being seen as binary data - } else { - vals[i] = nil - } - } - - return json.Marshal(vals) -} - -func arrayEqualUint8(left, right *Uint8) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of arrow.Time32 values. -type Time32 struct { - array - values []arrow.Time32 -} - -// NewTime32Data creates a new Time32. -func NewTime32Data(data arrow.ArrayData) *Time32 { - a := &Time32{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Time32) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Time32) Value(i int) arrow.Time32 { return a.values[i] } - -// Values returns the values. -func (a *Time32) Time32Values() []arrow.Time32 { return a.values } - -// String returns a string representation of the array. -func (a *Time32) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Time32) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Time32Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Time32) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return a.values[i].FormattedString(a.DataType().(*arrow.Time32Type).Unit) -} - -func (a *Time32) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - return a.values[i].ToTime(a.DataType().(*arrow.Time32Type).Unit).Format("15:04:05.999999999") -} - -func (a *Time32) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := range a.values { - vals[i] = a.GetOneForMarshal(i) - } - - return json.Marshal(vals) -} - -func arrayEqualTime32(left, right *Time32) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of arrow.Time64 values. -type Time64 struct { - array - values []arrow.Time64 -} - -// NewTime64Data creates a new Time64. -func NewTime64Data(data arrow.ArrayData) *Time64 { - a := &Time64{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Time64) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Time64) Value(i int) arrow.Time64 { return a.values[i] } - -// Values returns the values. -func (a *Time64) Time64Values() []arrow.Time64 { return a.values } - -// String returns a string representation of the array. -func (a *Time64) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Time64) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Time64Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Time64) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return a.values[i].FormattedString(a.DataType().(*arrow.Time64Type).Unit) -} - -func (a *Time64) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - return a.values[i].ToTime(a.DataType().(*arrow.Time64Type).Unit).Format("15:04:05.999999999") -} - -func (a *Time64) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := range a.values { - vals[i] = a.GetOneForMarshal(i) - } - - return json.Marshal(vals) -} - -func arrayEqualTime64(left, right *Time64) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of arrow.Date32 values. -type Date32 struct { - array - values []arrow.Date32 -} - -// NewDate32Data creates a new Date32. -func NewDate32Data(data arrow.ArrayData) *Date32 { - a := &Date32{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Date32) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Date32) Value(i int) arrow.Date32 { return a.values[i] } - -// Values returns the values. -func (a *Date32) Date32Values() []arrow.Date32 { return a.values } - -// String returns a string representation of the array. -func (a *Date32) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Date32) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Date32Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Date32) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return a.values[i].FormattedString() -} - -func (a *Date32) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - return a.values[i].ToTime().Format("2006-01-02") -} - -func (a *Date32) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := range a.values { - vals[i] = a.GetOneForMarshal(i) - } - - return json.Marshal(vals) -} - -func arrayEqualDate32(left, right *Date32) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of arrow.Date64 values. -type Date64 struct { - array - values []arrow.Date64 -} - -// NewDate64Data creates a new Date64. -func NewDate64Data(data arrow.ArrayData) *Date64 { - a := &Date64{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Date64) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Date64) Value(i int) arrow.Date64 { return a.values[i] } - -// Values returns the values. -func (a *Date64) Date64Values() []arrow.Date64 { return a.values } - -// String returns a string representation of the array. -func (a *Date64) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Date64) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Date64Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Date64) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return a.values[i].FormattedString() -} - -func (a *Date64) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - return a.values[i].ToTime().Format("2006-01-02") -} - -func (a *Date64) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := range a.values { - vals[i] = a.GetOneForMarshal(i) - } - - return json.Marshal(vals) -} - -func arrayEqualDate64(left, right *Date64) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of arrow.Duration values. -type Duration struct { - array - values []arrow.Duration -} - -// NewDurationData creates a new Duration. -func NewDurationData(data arrow.ArrayData) *Duration { - a := &Duration{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Duration) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Duration) Value(i int) arrow.Duration { return a.values[i] } - -// Values returns the values. -func (a *Duration) DurationValues() []arrow.Duration { return a.values } - -// String returns a string representation of the array. -func (a *Duration) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Duration) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.DurationTraits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Duration) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - // return value and suffix as a string such as "12345ms" - return fmt.Sprintf("%d%s", a.values[i], a.DataType().(*arrow.DurationType).Unit) -} - -func (a *Duration) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - // return value and suffix as a string such as "12345ms" - return fmt.Sprintf("%d%s", a.values[i], a.DataType().(*arrow.DurationType).Unit.String()) -} - -func (a *Duration) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := range a.values { - vals[i] = a.GetOneForMarshal(i) - } - - return json.Marshal(vals) -} - -func arrayEqualDuration(left, right *Duration) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} diff --git a/go/arrow/array/numeric.gen.go.tmpl b/go/arrow/array/numeric.gen.go.tmpl deleted file mode 100644 index 1f4b56609f464..0000000000000 --- a/go/arrow/array/numeric.gen.go.tmpl +++ /dev/null @@ -1,192 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "fmt" - "strings" - "time" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/internal/json" -) - -{{range .In}} - -// A type which represents an immutable sequence of {{or .QualifiedType .Type}} values. -type {{.Name}} struct { - array - values []{{or .QualifiedType .Type}} -} - -// New{{.Name}}Data creates a new {{.Name}}. -func New{{.Name}}Data(data arrow.ArrayData) *{{.Name}} { - a := &{{.Name}}{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *{{.Name}}) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *{{.Name}}) Value(i int) {{or .QualifiedType .Type}} { return a.values[i] } - -// Values returns the values. -func (a *{{.Name}}) {{.Name}}Values() []{{or .QualifiedType .Type}} { return a.values } - -// String returns a string representation of the array. -func (a *{{.Name}}) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *{{.Name}}) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.{{.Name}}Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *{{.Name}}) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } -{{if or (eq .Name "Date32") (eq .Name "Date64") -}} - return a.values[i].FormattedString() -{{else if or (eq .Name "Time32") (eq .Name "Time64") -}} - return a.values[i].FormattedString(a.DataType().(*{{.QualifiedType}}Type).Unit) -{{else if (eq .Name "Duration") -}} - // return value and suffix as a string such as "12345ms" - return fmt.Sprintf("%d%s", a.values[i], a.DataType().(*{{.QualifiedType}}Type).Unit) -{{else if or (eq .Name "Int8") (eq .Name "Int16") (eq .Name "Int32") (eq .Name "Int64") -}} - return strconv.FormatInt(int64(a.Value(i)), 10) -{{else if or (eq .Name "Uint8") (eq .Name "Uint16") (eq .Name "Uint32") (eq .Name "Uint64") -}} - return strconv.FormatUint(uint64(a.Value(i)), 10) -{{else if or (eq .Name "Float32") -}} - return strconv.FormatFloat(float64(a.Value(i)), 'g', -1, 32) -{{else if or (eq .Name "Float64") -}} - return strconv.FormatFloat(float64(a.Value(i)), 'g', -1, 64) -{{else}} - return fmt.Sprintf("%v", a.values[i]) -{{end -}} -} - -func (a *{{.Name}}) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } -{{if or (eq .Name "Date32") (eq .Name "Date64") -}} - return a.values[i].ToTime().Format("2006-01-02") -{{else if or (eq .Name "Time32") (eq .Name "Time64") -}} - return a.values[i].ToTime(a.DataType().(*{{.QualifiedType}}Type).Unit).Format("15:04:05.999999999") -{{else if (eq .Name "Duration") -}} - // return value and suffix as a string such as "12345ms" - return fmt.Sprintf("%d%s", a.values[i], a.DataType().(*{{.QualifiedType}}Type).Unit.String()) -{{else if (eq .Size "1")}} - return float64(a.values[i]) // prevent uint8 from being seen as binary data -{{else}} - return a.values[i] -{{end -}} -} - -func (a *{{.Name}}) MarshalJSON() ([]byte, error) { -{{if .QualifiedType -}} - vals := make([]interface{}, a.Len()) - for i := range a.values { - vals[i] = a.GetOneForMarshal(i) - } -{{else -}} - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - {{if (eq .Name "Float32") -}} - if !a.IsValid(i) { - vals[i] = nil - continue - } - - f := a.Value(i) - v := strconv.FormatFloat(float64(f), 'g', -1, 32) - - switch v { - case "NaN", "+Inf", "-Inf": - vals[i] = v - default: - vals[i] = f - } - {{else if (eq .Name "Float64") -}} - if !a.IsValid(i) { - vals[i] = nil - continue - } - - f := a.Value(i) - switch { - case math.IsNaN(f): - vals[i] = "NaN" - case math.IsInf(f, 1): - vals[i] = "+Inf" - case math.IsInf(f, -1): - vals[i] = "-Inf" - default: - vals[i] = f - } - {{else}} - if a.IsValid(i) { - {{ if (eq .Size "1") }}vals[i] = float64(a.values[i]) // prevent uint8 from being seen as binary data{{ else }}vals[i] = a.values[i]{{ end }} - } else { - vals[i] = nil - } - {{end}} - } -{{end}} - return json.Marshal(vals) -} - -func arrayEqual{{.Name}}(left, right *{{.Name}}) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -{{end}} diff --git a/go/arrow/array/numeric_test.go b/go/arrow/array/numeric_test.go deleted file mode 100644 index bb8acc3f41519..0000000000000 --- a/go/arrow/array/numeric_test.go +++ /dev/null @@ -1,779 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "math" - "reflect" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/float16" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" - "github.com/stretchr/testify/assert" -) - -func TestNewFloat64Data(t *testing.T) { - exp := []float64{1.0, 2.0, 4.0, 8.0, 16.0} - - ad := array.NewData( - arrow.PrimitiveTypes.Float64, len(exp), - []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Float64Traits.CastToBytes(exp))}, - nil, 0, 0, - ) - fa := array.NewFloat64Data(ad) - - assert.Equal(t, len(exp), fa.Len(), "unexpected Len()") - assert.Equal(t, exp, fa.Float64Values(), "unexpected Float64Values()") -} - -func TestFloat64SliceData(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - const ( - beg = 2 - end = 4 - ) - - var ( - vs = []float64{1, 2, 3, 4, 5} - sub = vs[beg:end] - ) - - b := array.NewFloat64Builder(pool) - defer b.Release() - - for _, v := range vs { - b.Append(v) - } - - arr := b.NewArray().(*array.Float64) - defer arr.Release() - - if got, want := arr.Len(), len(vs); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.Float64Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - slice := array.NewSlice(arr, beg, end).(*array.Float64) - defer slice.Release() - - if got, want := slice.Len(), len(sub); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := slice.Float64Values(), sub; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } -} - -func TestFloat64SliceDataWithNull(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - const ( - beg = 2 - end = 5 - ) - - var ( - valids = []bool{true, true, true, false, true, true} - vs = []float64{1, 2, 3, 0, 4, 5} - sub = vs[beg:end] - ) - - b := array.NewFloat64Builder(pool) - defer b.Release() - - b.AppendValues(vs, valids) - - arr := b.NewArray().(*array.Float64) - defer arr.Release() - - if got, want := arr.Len(), len(valids); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.NullN(), 1; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.Float64Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - slice := array.NewSlice(arr, beg, end).(*array.Float64) - defer slice.Release() - - if got, want := slice.NullN(), 1; got != want { - t.Errorf("got=%d, want=%d", got, want) - } - - if got, want := slice.Len(), len(sub); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := slice.Float64Values(), sub; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } -} - -func TestFloat16MarshalJSON(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - bldr := array.NewFloat16Builder(pool) - defer bldr.Release() - - jsonstr := `[0, 1, 2, 3, "NaN", "NaN", 4, 5, "+Inf", "-Inf"]` - - bldr.Append(float16.New(0)) - bldr.Append(float16.New(1)) - bldr.Append(float16.New(2)) - bldr.Append(float16.New(3)) - bldr.Append(float16.NaN()) - bldr.Append(float16.NaN()) - bldr.Append(float16.New(4)) - bldr.Append(float16.New(5)) - bldr.Append(float16.Inf()) - bldr.Append(float16.Inf().Negate()) - - expected := bldr.NewFloat16Array() - defer expected.Release() - expected_json, err := expected.MarshalJSON() - assert.NoError(t, err) - assert.JSONEq(t, jsonstr, string(expected_json)) -} - -func TestFloat32MarshalJSON(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - bldr := array.NewFloat32Builder(pool) - defer bldr.Release() - - jsonstr := `[0, 1, "+Inf", 2, 3, "NaN", "NaN", 4, 5, "-Inf"]` - - bldr.Append(0) - bldr.Append(1) - bldr.Append(float32(math.Inf(1))) - bldr.Append(2) - bldr.Append(3) - bldr.Append(float32(math.NaN())) - bldr.Append(float32(math.NaN())) - bldr.Append(4) - bldr.Append(5) - bldr.Append(float32(math.Inf(-1))) - - expected := bldr.NewFloat32Array() - defer expected.Release() - - expected_json, err := expected.MarshalJSON() - assert.NoError(t, err) - - assert.JSONEq(t, jsonstr, string(expected_json)) -} - -func TestFloat64MarshalJSON(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - bldr := array.NewFloat64Builder(pool) - defer bldr.Release() - - jsonstr := `[0, 1, "+Inf", 2, 3, "NaN", "NaN", 4, 5, "-Inf"]` - - bldr.Append(0) - bldr.Append(1) - bldr.Append(math.Inf(1)) - bldr.Append(2) - bldr.Append(3) - bldr.Append(math.NaN()) - bldr.Append(math.NaN()) - bldr.Append(4) - bldr.Append(5) - bldr.Append(math.Inf(-1)) - - expected := bldr.NewFloat64Array() - defer expected.Release() - - expected_json, err := expected.MarshalJSON() - assert.NoError(t, err) - - assert.JSONEq(t, jsonstr, string(expected_json)) - -} - -func TestUnmarshalSpecialFloat(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - bldr := array.NewFloat32Builder(pool) - defer bldr.Release() - - assert.NoError(t, json.Unmarshal([]byte(`[3.4, "Inf", "-Inf"]`), bldr)) - arr := bldr.NewFloat32Array() - defer arr.Release() - - assert.False(t, math.IsInf(float64(arr.Value(0)), 0), arr.Value(0)) - assert.True(t, math.IsInf(float64(arr.Value(1)), 1), arr.Value(1)) - assert.True(t, math.IsInf(float64(arr.Value(2)), -1), arr.Value(2)) -} - -func TestNewTime32Data(t *testing.T) { - data := []arrow.Time32{ - arrow.Time32(1), - arrow.Time32(2), - arrow.Time32(4), - arrow.Time32(8), - arrow.Time32(16), - } - - dtype := arrow.FixedWidthTypes.Time32s - ad := array.NewData(dtype, len(data), - []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Time32Traits.CastToBytes(data))}, - nil, 0, 0, - ) - t32a := array.NewTime32Data(ad) - - assert.Equal(t, len(data), t32a.Len(), "unexpected Len()") - assert.Equal(t, data, t32a.Time32Values(), "unexpected Float64Values()") -} - -func TestTime32SliceData(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - const ( - beg = 2 - end = 4 - ) - - var ( - vs = []arrow.Time32{ - arrow.Time32(1), - arrow.Time32(2), - arrow.Time32(4), - arrow.Time32(8), - arrow.Time32(16), - } - sub = vs[beg:end] - ) - - dtype := arrow.FixedWidthTypes.Time32s - b := array.NewTime32Builder(pool, dtype.(*arrow.Time32Type)) - defer b.Release() - - for _, v := range vs { - b.Append(v) - } - - arr := b.NewArray().(*array.Time32) - defer arr.Release() - - if got, want := arr.Len(), len(vs); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.Time32Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - slice := array.NewSlice(arr, beg, end).(*array.Time32) - defer slice.Release() - - if got, want := slice.Len(), len(sub); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := slice.Time32Values(), sub; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } -} - -func TestTime32SliceDataWithNull(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - const ( - beg = 2 - end = 5 - ) - - var ( - valids = []bool{true, true, true, false, true, true} - vs = []arrow.Time32{ - arrow.Time32(1), - arrow.Time32(2), - arrow.Time32(3), - arrow.Time32(0), - arrow.Time32(4), - arrow.Time32(5), - } - sub = vs[beg:end] - ) - - dtype := arrow.FixedWidthTypes.Time32s - b := array.NewTime32Builder(pool, dtype.(*arrow.Time32Type)) - defer b.Release() - - b.AppendValues(vs, valids) - - arr := b.NewArray().(*array.Time32) - defer arr.Release() - - if got, want := arr.Len(), len(valids); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.NullN(), 1; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.Time32Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - slice := array.NewSlice(arr, beg, end).(*array.Time32) - defer slice.Release() - - if got, want := slice.NullN(), 1; got != want { - t.Errorf("got=%d, want=%d", got, want) - } - - if got, want := slice.Len(), len(sub); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := slice.Time32Values(), sub; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } -} - -func TestNewTime64Data(t *testing.T) { - data := []arrow.Time64{ - arrow.Time64(1), - arrow.Time64(2), - arrow.Time64(4), - arrow.Time64(8), - arrow.Time64(16), - } - - dtype := arrow.FixedWidthTypes.Time64us - ad := array.NewData(dtype, len(data), - []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Time64Traits.CastToBytes(data))}, - nil, 0, 0, - ) - t64a := array.NewTime64Data(ad) - - assert.Equal(t, len(data), t64a.Len(), "unexpected Len()") - assert.Equal(t, data, t64a.Time64Values(), "unexpected Float64Values()") -} - -func TestTime64SliceData(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - const ( - beg = 2 - end = 4 - ) - - var ( - vs = []arrow.Time64{ - arrow.Time64(1), - arrow.Time64(2), - arrow.Time64(4), - arrow.Time64(8), - arrow.Time64(16), - } - sub = vs[beg:end] - ) - - dtype := arrow.FixedWidthTypes.Time64us - b := array.NewTime64Builder(pool, dtype.(*arrow.Time64Type)) - defer b.Release() - - for _, v := range vs { - b.Append(v) - } - - arr := b.NewArray().(*array.Time64) - defer arr.Release() - - if got, want := arr.Len(), len(vs); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.Time64Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - slice := array.NewSlice(arr, beg, end).(*array.Time64) - defer slice.Release() - - if got, want := slice.Len(), len(sub); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := slice.Time64Values(), sub; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } -} - -func TestTime64SliceDataWithNull(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - const ( - beg = 2 - end = 5 - ) - - var ( - valids = []bool{true, true, true, false, true, true} - vs = []arrow.Time64{ - arrow.Time64(1), - arrow.Time64(2), - arrow.Time64(3), - arrow.Time64(0), - arrow.Time64(4), - arrow.Time64(5), - } - sub = vs[beg:end] - ) - - dtype := arrow.FixedWidthTypes.Time64us - b := array.NewTime64Builder(pool, dtype.(*arrow.Time64Type)) - defer b.Release() - - b.AppendValues(vs, valids) - - arr := b.NewArray().(*array.Time64) - defer arr.Release() - - if got, want := arr.Len(), len(valids); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.NullN(), 1; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.Time64Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - slice := array.NewSlice(arr, beg, end).(*array.Time64) - defer slice.Release() - - if got, want := slice.NullN(), 1; got != want { - t.Errorf("got=%d, want=%d", got, want) - } - - if got, want := slice.Len(), len(sub); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := slice.Time64Values(), sub; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } -} - -func TestNewDate32Data(t *testing.T) { - exp := []arrow.Date32{1, 2, 4, 8, 16} - - dtype := &arrow.Date32Type{} - ad := array.NewData( - dtype, len(exp), - []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Date32Traits.CastToBytes(exp))}, - nil, 0, 0, - ) - fa := array.NewDate32Data(ad) - - assert.Equal(t, len(exp), fa.Len(), "unexpected Len()") - assert.Equal(t, exp, fa.Date32Values(), "unexpected Date32Values()") -} - -func TestDate32SliceData(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - const ( - beg = 2 - end = 4 - ) - - var ( - vs = []arrow.Date32{1, 2, 3, 4, 5} - sub = vs[beg:end] - ) - - b := array.NewDate32Builder(pool) - defer b.Release() - - for _, v := range vs { - b.Append(v) - } - - arr := b.NewArray().(*array.Date32) - defer arr.Release() - - if got, want := arr.Len(), len(vs); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.Date32Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - slice := array.NewSlice(arr, beg, end).(*array.Date32) - defer slice.Release() - - if got, want := slice.Len(), len(sub); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := slice.Date32Values(), sub; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } -} - -func TestDate32SliceDataWithNull(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - const ( - beg = 2 - end = 5 - ) - - var ( - valids = []bool{true, true, true, false, true, true} - vs = []arrow.Date32{1, 2, 3, 0, 4, 5} - sub = vs[beg:end] - ) - - b := array.NewDate32Builder(pool) - defer b.Release() - - b.AppendValues(vs, valids) - - arr := b.NewArray().(*array.Date32) - defer arr.Release() - - if got, want := arr.Len(), len(valids); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.NullN(), 1; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.Date32Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - slice := array.NewSlice(arr, beg, end).(*array.Date32) - defer slice.Release() - - if got, want := slice.NullN(), 1; got != want { - t.Errorf("got=%d, want=%d", got, want) - } - - if got, want := slice.Len(), len(sub); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := slice.Date32Values(), sub; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } -} - -func TestNewDate64Data(t *testing.T) { - exp := []arrow.Date64{1, 2, 4, 8, 16} - - dtype := &arrow.Date64Type{} - ad := array.NewData( - dtype, len(exp), - []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Date64Traits.CastToBytes(exp))}, - nil, 0, 0, - ) - fa := array.NewDate64Data(ad) - - assert.Equal(t, len(exp), fa.Len(), "unexpected Len()") - assert.Equal(t, exp, fa.Date64Values(), "unexpected Date64Values()") -} - -func TestDate64SliceData(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - const ( - beg = 2 - end = 4 - ) - - var ( - vs = []arrow.Date64{1, 2, 3, 4, 5} - sub = vs[beg:end] - ) - - b := array.NewDate64Builder(pool) - defer b.Release() - - for _, v := range vs { - b.Append(v) - } - - arr := b.NewArray().(*array.Date64) - defer arr.Release() - - if got, want := arr.Len(), len(vs); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.Date64Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - slice := array.NewSlice(arr, beg, end).(*array.Date64) - defer slice.Release() - - if got, want := slice.Len(), len(sub); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := slice.Date64Values(), sub; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } -} - -func TestDate64SliceDataWithNull(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - const ( - beg = 2 - end = 5 - ) - - var ( - valids = []bool{true, true, true, false, true, true} - vs = []arrow.Date64{1, 2, 3, 0, 4, 5} - sub = vs[beg:end] - ) - - b := array.NewDate64Builder(pool) - defer b.Release() - - b.AppendValues(vs, valids) - - arr := b.NewArray().(*array.Date64) - defer arr.Release() - - if got, want := arr.Len(), len(valids); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.NullN(), 1; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.Date64Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - slice := array.NewSlice(arr, beg, end).(*array.Date64) - defer slice.Release() - - if got, want := slice.NullN(), 1; got != want { - t.Errorf("got=%d, want=%d", got, want) - } - - if got, want := slice.Len(), len(sub); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := slice.Date64Values(), sub; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } -} - -func TestInt64MarshalJSON(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - vs = []int64{-5474557666971701248} - ) - - b := array.NewInt64Builder(pool) - defer b.Release() - - for _, v := range vs { - b.Append(v) - } - - arr := b.NewArray().(*array.Int64) - defer arr.Release() - - jsonBytes, err := json.Marshal(arr) - if err != nil { - t.Fatal(err) - } - got := string(jsonBytes) - want := `[-5474557666971701248]` - if got != want { - t.Fatalf("got=%s, want=%s", got, want) - } -} - -func TestUInt64MarshalJSON(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - vs = []uint64{14697929703826477056} - ) - - b := array.NewUint64Builder(pool) - defer b.Release() - - for _, v := range vs { - b.Append(v) - } - - arr := b.NewArray().(*array.Uint64) - defer arr.Release() - - jsonBytes, err := json.Marshal(arr) - if err != nil { - t.Fatal(err) - } - got := string(jsonBytes) - want := `[14697929703826477056]` - if got != want { - t.Fatalf("got=%s, want=%s", got, want) - } -} diff --git a/go/arrow/array/numericbuilder.gen.go b/go/arrow/array/numericbuilder.gen.go deleted file mode 100644 index c80f0c7c9578e..0000000000000 --- a/go/arrow/array/numericbuilder.gen.go +++ /dev/null @@ -1,3664 +0,0 @@ -// Code generated by array/numericbuilder.gen.go.tmpl. DO NOT EDIT. - -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "fmt" - "reflect" - "strconv" - "strings" - "sync/atomic" - "time" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -type Int64Builder struct { - builder - - data *memory.Buffer - rawData []int64 -} - -func NewInt64Builder(mem memory.Allocator) *Int64Builder { - return &Int64Builder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *Int64Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Int64 } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Int64Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Int64Builder) Append(v int64) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Int64Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Int64Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Int64Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Int64Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Int64Builder) UnsafeAppend(v int64) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Int64Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Int64Builder) AppendValues(v []int64, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Int64Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Int64Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Int64Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Int64Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Int64Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Int64Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Int64Traits.BytesRequired(n)) - b.rawData = arrow.Int64Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Int64Builder) Value(i int) int64 { - return b.rawData[i] -} - -// NewArray creates a Int64 array from the memory buffers used by the builder and resets the Int64Builder -// so it can be used to build a new array. -func (b *Int64Builder) NewArray() arrow.Array { - return b.NewInt64Array() -} - -// NewInt64Array creates a Int64 array from the memory buffers used by the builder and resets the Int64Builder -// so it can be used to build a new array. -func (b *Int64Builder) NewInt64Array() (a *Int64) { - data := b.newData() - a = NewInt64Data(data) - data.Release() - return -} - -func (b *Int64Builder) newData() (data *Data) { - bytesRequired := arrow.Int64Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.PrimitiveTypes.Int64, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Int64Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - v, err := strconv.ParseInt(s, 10, 8*8) - if err != nil { - b.AppendNull() - return err - } - b.Append(int64(v)) - return nil -} - -func (b *Int64Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - - case string: - f, err := strconv.ParseInt(v, 10, 8*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(int64(0)), - Offset: dec.InputOffset(), - } - } - b.Append(int64(f)) - case float64: - b.Append(int64(v)) - case json.Number: - f, err := strconv.ParseInt(v.String(), 10, 8*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(int64(0)), - Offset: dec.InputOffset(), - } - } - b.Append(int64(f)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(int64(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Int64Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Int64Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type Uint64Builder struct { - builder - - data *memory.Buffer - rawData []uint64 -} - -func NewUint64Builder(mem memory.Allocator) *Uint64Builder { - return &Uint64Builder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *Uint64Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Uint64 } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Uint64Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Uint64Builder) Append(v uint64) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Uint64Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Uint64Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Uint64Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Uint64Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Uint64Builder) UnsafeAppend(v uint64) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Uint64Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Uint64Builder) AppendValues(v []uint64, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Uint64Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Uint64Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Uint64Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Uint64Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Uint64Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Uint64Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Uint64Traits.BytesRequired(n)) - b.rawData = arrow.Uint64Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Uint64Builder) Value(i int) uint64 { - return b.rawData[i] -} - -// NewArray creates a Uint64 array from the memory buffers used by the builder and resets the Uint64Builder -// so it can be used to build a new array. -func (b *Uint64Builder) NewArray() arrow.Array { - return b.NewUint64Array() -} - -// NewUint64Array creates a Uint64 array from the memory buffers used by the builder and resets the Uint64Builder -// so it can be used to build a new array. -func (b *Uint64Builder) NewUint64Array() (a *Uint64) { - data := b.newData() - a = NewUint64Data(data) - data.Release() - return -} - -func (b *Uint64Builder) newData() (data *Data) { - bytesRequired := arrow.Uint64Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.PrimitiveTypes.Uint64, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Uint64Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - v, err := strconv.ParseUint(s, 10, 8*8) - if err != nil { - b.AppendNull() - return err - } - b.Append(uint64(v)) - return nil -} - -func (b *Uint64Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - - case string: - f, err := strconv.ParseUint(v, 10, 8*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(uint64(0)), - Offset: dec.InputOffset(), - } - } - b.Append(uint64(f)) - case float64: - b.Append(uint64(v)) - case json.Number: - f, err := strconv.ParseUint(v.String(), 10, 8*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(uint64(0)), - Offset: dec.InputOffset(), - } - } - b.Append(uint64(f)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(uint64(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Uint64Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Uint64Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type Float64Builder struct { - builder - - data *memory.Buffer - rawData []float64 -} - -func NewFloat64Builder(mem memory.Allocator) *Float64Builder { - return &Float64Builder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *Float64Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Float64 } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Float64Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Float64Builder) Append(v float64) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Float64Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Float64Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Float64Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Float64Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Float64Builder) UnsafeAppend(v float64) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Float64Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Float64Builder) AppendValues(v []float64, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Float64Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Float64Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Float64Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Float64Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Float64Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Float64Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Float64Traits.BytesRequired(n)) - b.rawData = arrow.Float64Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Float64Builder) Value(i int) float64 { - return b.rawData[i] -} - -// NewArray creates a Float64 array from the memory buffers used by the builder and resets the Float64Builder -// so it can be used to build a new array. -func (b *Float64Builder) NewArray() arrow.Array { - return b.NewFloat64Array() -} - -// NewFloat64Array creates a Float64 array from the memory buffers used by the builder and resets the Float64Builder -// so it can be used to build a new array. -func (b *Float64Builder) NewFloat64Array() (a *Float64) { - data := b.newData() - a = NewFloat64Data(data) - data.Release() - return -} - -func (b *Float64Builder) newData() (data *Data) { - bytesRequired := arrow.Float64Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.PrimitiveTypes.Float64, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Float64Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - v, err := strconv.ParseFloat(s, 8*8) - if err != nil { - b.AppendNull() - return err - } - b.Append(float64(v)) - return nil -} - -func (b *Float64Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - - case string: - f, err := strconv.ParseFloat(v, 8*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(float64(0)), - Offset: dec.InputOffset(), - } - } - b.Append(float64(f)) - case float64: - b.Append(float64(v)) - case json.Number: - f, err := strconv.ParseFloat(v.String(), 8*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(float64(0)), - Offset: dec.InputOffset(), - } - } - b.Append(float64(f)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(float64(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Float64Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Float64Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type Int32Builder struct { - builder - - data *memory.Buffer - rawData []int32 -} - -func NewInt32Builder(mem memory.Allocator) *Int32Builder { - return &Int32Builder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *Int32Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Int32 } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Int32Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Int32Builder) Append(v int32) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Int32Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Int32Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Int32Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Int32Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Int32Builder) UnsafeAppend(v int32) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Int32Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Int32Builder) AppendValues(v []int32, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Int32Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Int32Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Int32Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Int32Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Int32Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Int32Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Int32Traits.BytesRequired(n)) - b.rawData = arrow.Int32Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Int32Builder) Value(i int) int32 { - return b.rawData[i] -} - -// NewArray creates a Int32 array from the memory buffers used by the builder and resets the Int32Builder -// so it can be used to build a new array. -func (b *Int32Builder) NewArray() arrow.Array { - return b.NewInt32Array() -} - -// NewInt32Array creates a Int32 array from the memory buffers used by the builder and resets the Int32Builder -// so it can be used to build a new array. -func (b *Int32Builder) NewInt32Array() (a *Int32) { - data := b.newData() - a = NewInt32Data(data) - data.Release() - return -} - -func (b *Int32Builder) newData() (data *Data) { - bytesRequired := arrow.Int32Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.PrimitiveTypes.Int32, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Int32Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - v, err := strconv.ParseInt(s, 10, 4*8) - if err != nil { - b.AppendNull() - return err - } - b.Append(int32(v)) - return nil -} - -func (b *Int32Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - - case string: - f, err := strconv.ParseInt(v, 10, 4*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(int32(0)), - Offset: dec.InputOffset(), - } - } - b.Append(int32(f)) - case float64: - b.Append(int32(v)) - case json.Number: - f, err := strconv.ParseInt(v.String(), 10, 4*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(int32(0)), - Offset: dec.InputOffset(), - } - } - b.Append(int32(f)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(int32(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Int32Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Int32Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type Uint32Builder struct { - builder - - data *memory.Buffer - rawData []uint32 -} - -func NewUint32Builder(mem memory.Allocator) *Uint32Builder { - return &Uint32Builder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *Uint32Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Uint32 } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Uint32Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Uint32Builder) Append(v uint32) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Uint32Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Uint32Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Uint32Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Uint32Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Uint32Builder) UnsafeAppend(v uint32) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Uint32Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Uint32Builder) AppendValues(v []uint32, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Uint32Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Uint32Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Uint32Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Uint32Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Uint32Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Uint32Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Uint32Traits.BytesRequired(n)) - b.rawData = arrow.Uint32Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Uint32Builder) Value(i int) uint32 { - return b.rawData[i] -} - -// NewArray creates a Uint32 array from the memory buffers used by the builder and resets the Uint32Builder -// so it can be used to build a new array. -func (b *Uint32Builder) NewArray() arrow.Array { - return b.NewUint32Array() -} - -// NewUint32Array creates a Uint32 array from the memory buffers used by the builder and resets the Uint32Builder -// so it can be used to build a new array. -func (b *Uint32Builder) NewUint32Array() (a *Uint32) { - data := b.newData() - a = NewUint32Data(data) - data.Release() - return -} - -func (b *Uint32Builder) newData() (data *Data) { - bytesRequired := arrow.Uint32Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.PrimitiveTypes.Uint32, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Uint32Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - v, err := strconv.ParseUint(s, 10, 4*8) - if err != nil { - b.AppendNull() - return err - } - b.Append(uint32(v)) - return nil -} - -func (b *Uint32Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - - case string: - f, err := strconv.ParseUint(v, 10, 4*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(uint32(0)), - Offset: dec.InputOffset(), - } - } - b.Append(uint32(f)) - case float64: - b.Append(uint32(v)) - case json.Number: - f, err := strconv.ParseUint(v.String(), 10, 4*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(uint32(0)), - Offset: dec.InputOffset(), - } - } - b.Append(uint32(f)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(uint32(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Uint32Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Uint32Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type Float32Builder struct { - builder - - data *memory.Buffer - rawData []float32 -} - -func NewFloat32Builder(mem memory.Allocator) *Float32Builder { - return &Float32Builder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *Float32Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Float32 } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Float32Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Float32Builder) Append(v float32) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Float32Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Float32Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Float32Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Float32Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Float32Builder) UnsafeAppend(v float32) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Float32Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Float32Builder) AppendValues(v []float32, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Float32Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Float32Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Float32Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Float32Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Float32Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Float32Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Float32Traits.BytesRequired(n)) - b.rawData = arrow.Float32Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Float32Builder) Value(i int) float32 { - return b.rawData[i] -} - -// NewArray creates a Float32 array from the memory buffers used by the builder and resets the Float32Builder -// so it can be used to build a new array. -func (b *Float32Builder) NewArray() arrow.Array { - return b.NewFloat32Array() -} - -// NewFloat32Array creates a Float32 array from the memory buffers used by the builder and resets the Float32Builder -// so it can be used to build a new array. -func (b *Float32Builder) NewFloat32Array() (a *Float32) { - data := b.newData() - a = NewFloat32Data(data) - data.Release() - return -} - -func (b *Float32Builder) newData() (data *Data) { - bytesRequired := arrow.Float32Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.PrimitiveTypes.Float32, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Float32Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - v, err := strconv.ParseFloat(s, 4*8) - if err != nil { - b.AppendNull() - return err - } - b.Append(float32(v)) - return nil -} - -func (b *Float32Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - - case string: - f, err := strconv.ParseFloat(v, 4*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(float32(0)), - Offset: dec.InputOffset(), - } - } - b.Append(float32(f)) - case float64: - b.Append(float32(v)) - case json.Number: - f, err := strconv.ParseFloat(v.String(), 4*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(float32(0)), - Offset: dec.InputOffset(), - } - } - b.Append(float32(f)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(float32(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Float32Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Float32Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type Int16Builder struct { - builder - - data *memory.Buffer - rawData []int16 -} - -func NewInt16Builder(mem memory.Allocator) *Int16Builder { - return &Int16Builder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *Int16Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Int16 } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Int16Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Int16Builder) Append(v int16) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Int16Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Int16Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Int16Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Int16Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Int16Builder) UnsafeAppend(v int16) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Int16Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Int16Builder) AppendValues(v []int16, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Int16Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Int16Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Int16Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Int16Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Int16Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Int16Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Int16Traits.BytesRequired(n)) - b.rawData = arrow.Int16Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Int16Builder) Value(i int) int16 { - return b.rawData[i] -} - -// NewArray creates a Int16 array from the memory buffers used by the builder and resets the Int16Builder -// so it can be used to build a new array. -func (b *Int16Builder) NewArray() arrow.Array { - return b.NewInt16Array() -} - -// NewInt16Array creates a Int16 array from the memory buffers used by the builder and resets the Int16Builder -// so it can be used to build a new array. -func (b *Int16Builder) NewInt16Array() (a *Int16) { - data := b.newData() - a = NewInt16Data(data) - data.Release() - return -} - -func (b *Int16Builder) newData() (data *Data) { - bytesRequired := arrow.Int16Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.PrimitiveTypes.Int16, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Int16Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - v, err := strconv.ParseInt(s, 10, 2*8) - if err != nil { - b.AppendNull() - return err - } - b.Append(int16(v)) - return nil -} - -func (b *Int16Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - - case string: - f, err := strconv.ParseInt(v, 10, 2*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(int16(0)), - Offset: dec.InputOffset(), - } - } - b.Append(int16(f)) - case float64: - b.Append(int16(v)) - case json.Number: - f, err := strconv.ParseInt(v.String(), 10, 2*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(int16(0)), - Offset: dec.InputOffset(), - } - } - b.Append(int16(f)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(int16(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Int16Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Int16Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type Uint16Builder struct { - builder - - data *memory.Buffer - rawData []uint16 -} - -func NewUint16Builder(mem memory.Allocator) *Uint16Builder { - return &Uint16Builder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *Uint16Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Uint16 } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Uint16Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Uint16Builder) Append(v uint16) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Uint16Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Uint16Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Uint16Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Uint16Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Uint16Builder) UnsafeAppend(v uint16) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Uint16Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Uint16Builder) AppendValues(v []uint16, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Uint16Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Uint16Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Uint16Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Uint16Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Uint16Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Uint16Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Uint16Traits.BytesRequired(n)) - b.rawData = arrow.Uint16Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Uint16Builder) Value(i int) uint16 { - return b.rawData[i] -} - -// NewArray creates a Uint16 array from the memory buffers used by the builder and resets the Uint16Builder -// so it can be used to build a new array. -func (b *Uint16Builder) NewArray() arrow.Array { - return b.NewUint16Array() -} - -// NewUint16Array creates a Uint16 array from the memory buffers used by the builder and resets the Uint16Builder -// so it can be used to build a new array. -func (b *Uint16Builder) NewUint16Array() (a *Uint16) { - data := b.newData() - a = NewUint16Data(data) - data.Release() - return -} - -func (b *Uint16Builder) newData() (data *Data) { - bytesRequired := arrow.Uint16Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.PrimitiveTypes.Uint16, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Uint16Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - v, err := strconv.ParseUint(s, 10, 2*8) - if err != nil { - b.AppendNull() - return err - } - b.Append(uint16(v)) - return nil -} - -func (b *Uint16Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - - case string: - f, err := strconv.ParseUint(v, 10, 2*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(uint16(0)), - Offset: dec.InputOffset(), - } - } - b.Append(uint16(f)) - case float64: - b.Append(uint16(v)) - case json.Number: - f, err := strconv.ParseUint(v.String(), 10, 2*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(uint16(0)), - Offset: dec.InputOffset(), - } - } - b.Append(uint16(f)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(uint16(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Uint16Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Uint16Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type Int8Builder struct { - builder - - data *memory.Buffer - rawData []int8 -} - -func NewInt8Builder(mem memory.Allocator) *Int8Builder { - return &Int8Builder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *Int8Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Int8 } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Int8Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Int8Builder) Append(v int8) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Int8Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Int8Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Int8Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Int8Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Int8Builder) UnsafeAppend(v int8) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Int8Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Int8Builder) AppendValues(v []int8, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Int8Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Int8Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Int8Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Int8Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Int8Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Int8Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Int8Traits.BytesRequired(n)) - b.rawData = arrow.Int8Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Int8Builder) Value(i int) int8 { - return b.rawData[i] -} - -// NewArray creates a Int8 array from the memory buffers used by the builder and resets the Int8Builder -// so it can be used to build a new array. -func (b *Int8Builder) NewArray() arrow.Array { - return b.NewInt8Array() -} - -// NewInt8Array creates a Int8 array from the memory buffers used by the builder and resets the Int8Builder -// so it can be used to build a new array. -func (b *Int8Builder) NewInt8Array() (a *Int8) { - data := b.newData() - a = NewInt8Data(data) - data.Release() - return -} - -func (b *Int8Builder) newData() (data *Data) { - bytesRequired := arrow.Int8Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.PrimitiveTypes.Int8, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Int8Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - v, err := strconv.ParseInt(s, 10, 1*8) - if err != nil { - b.AppendNull() - return err - } - b.Append(int8(v)) - return nil -} - -func (b *Int8Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - - case string: - f, err := strconv.ParseInt(v, 10, 1*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(int8(0)), - Offset: dec.InputOffset(), - } - } - b.Append(int8(f)) - case float64: - b.Append(int8(v)) - case json.Number: - f, err := strconv.ParseInt(v.String(), 10, 1*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(int8(0)), - Offset: dec.InputOffset(), - } - } - b.Append(int8(f)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(int8(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Int8Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Int8Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type Uint8Builder struct { - builder - - data *memory.Buffer - rawData []uint8 -} - -func NewUint8Builder(mem memory.Allocator) *Uint8Builder { - return &Uint8Builder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *Uint8Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Uint8 } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Uint8Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Uint8Builder) Append(v uint8) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Uint8Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Uint8Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Uint8Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Uint8Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Uint8Builder) UnsafeAppend(v uint8) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Uint8Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Uint8Builder) AppendValues(v []uint8, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Uint8Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Uint8Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Uint8Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Uint8Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Uint8Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Uint8Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Uint8Traits.BytesRequired(n)) - b.rawData = arrow.Uint8Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Uint8Builder) Value(i int) uint8 { - return b.rawData[i] -} - -// NewArray creates a Uint8 array from the memory buffers used by the builder and resets the Uint8Builder -// so it can be used to build a new array. -func (b *Uint8Builder) NewArray() arrow.Array { - return b.NewUint8Array() -} - -// NewUint8Array creates a Uint8 array from the memory buffers used by the builder and resets the Uint8Builder -// so it can be used to build a new array. -func (b *Uint8Builder) NewUint8Array() (a *Uint8) { - data := b.newData() - a = NewUint8Data(data) - data.Release() - return -} - -func (b *Uint8Builder) newData() (data *Data) { - bytesRequired := arrow.Uint8Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.PrimitiveTypes.Uint8, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Uint8Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - v, err := strconv.ParseUint(s, 10, 1*8) - if err != nil { - b.AppendNull() - return err - } - b.Append(uint8(v)) - return nil -} - -func (b *Uint8Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - - case string: - f, err := strconv.ParseUint(v, 10, 1*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(uint8(0)), - Offset: dec.InputOffset(), - } - } - b.Append(uint8(f)) - case float64: - b.Append(uint8(v)) - case json.Number: - f, err := strconv.ParseUint(v.String(), 10, 1*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(uint8(0)), - Offset: dec.InputOffset(), - } - } - b.Append(uint8(f)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(uint8(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Uint8Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Uint8Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type Time32Builder struct { - builder - - dtype *arrow.Time32Type - data *memory.Buffer - rawData []arrow.Time32 -} - -func NewTime32Builder(mem memory.Allocator, dtype *arrow.Time32Type) *Time32Builder { - return &Time32Builder{builder: builder{refCount: 1, mem: mem}, dtype: dtype} -} - -func (b *Time32Builder) Type() arrow.DataType { return b.dtype } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Time32Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Time32Builder) Append(v arrow.Time32) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Time32Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Time32Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Time32Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Time32Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Time32Builder) UnsafeAppend(v arrow.Time32) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Time32Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Time32Builder) AppendValues(v []arrow.Time32, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Time32Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Time32Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Time32Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Time32Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Time32Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Time32Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Time32Traits.BytesRequired(n)) - b.rawData = arrow.Time32Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Time32Builder) Value(i int) arrow.Time32 { - return b.rawData[i] -} - -// NewArray creates a Time32 array from the memory buffers used by the builder and resets the Time32Builder -// so it can be used to build a new array. -func (b *Time32Builder) NewArray() arrow.Array { - return b.NewTime32Array() -} - -// NewTime32Array creates a Time32 array from the memory buffers used by the builder and resets the Time32Builder -// so it can be used to build a new array. -func (b *Time32Builder) NewTime32Array() (a *Time32) { - data := b.newData() - a = NewTime32Data(data) - data.Release() - return -} - -func (b *Time32Builder) newData() (data *Data) { - bytesRequired := arrow.Time32Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Time32Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - val, err := arrow.Time32FromString(s, b.dtype.Unit) - if err != nil { - b.AppendNull() - return err - } - b.Append(val) - return nil -} - -func (b *Time32Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - case string: - tm, err := arrow.Time32FromString(v, b.dtype.Unit) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(arrow.Time32(0)), - Offset: dec.InputOffset(), - } - } - - b.Append(tm) - case json.Number: - n, err := v.Int64() - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(arrow.Time32(0)), - Offset: dec.InputOffset(), - } - } - b.Append(arrow.Time32(n)) - case float64: - b.Append(arrow.Time32(v)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(arrow.Time32(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Time32Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Time32Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type Time64Builder struct { - builder - - dtype *arrow.Time64Type - data *memory.Buffer - rawData []arrow.Time64 -} - -func NewTime64Builder(mem memory.Allocator, dtype *arrow.Time64Type) *Time64Builder { - return &Time64Builder{builder: builder{refCount: 1, mem: mem}, dtype: dtype} -} - -func (b *Time64Builder) Type() arrow.DataType { return b.dtype } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Time64Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Time64Builder) Append(v arrow.Time64) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Time64Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Time64Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Time64Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Time64Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Time64Builder) UnsafeAppend(v arrow.Time64) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Time64Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Time64Builder) AppendValues(v []arrow.Time64, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Time64Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Time64Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Time64Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Time64Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Time64Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Time64Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Time64Traits.BytesRequired(n)) - b.rawData = arrow.Time64Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Time64Builder) Value(i int) arrow.Time64 { - return b.rawData[i] -} - -// NewArray creates a Time64 array from the memory buffers used by the builder and resets the Time64Builder -// so it can be used to build a new array. -func (b *Time64Builder) NewArray() arrow.Array { - return b.NewTime64Array() -} - -// NewTime64Array creates a Time64 array from the memory buffers used by the builder and resets the Time64Builder -// so it can be used to build a new array. -func (b *Time64Builder) NewTime64Array() (a *Time64) { - data := b.newData() - a = NewTime64Data(data) - data.Release() - return -} - -func (b *Time64Builder) newData() (data *Data) { - bytesRequired := arrow.Time64Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Time64Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - val, err := arrow.Time64FromString(s, b.dtype.Unit) - if err != nil { - b.AppendNull() - return err - } - b.Append(val) - return nil -} - -func (b *Time64Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - case string: - tm, err := arrow.Time64FromString(v, b.dtype.Unit) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(arrow.Time64(0)), - Offset: dec.InputOffset(), - } - } - - b.Append(tm) - case json.Number: - n, err := v.Int64() - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(arrow.Time64(0)), - Offset: dec.InputOffset(), - } - } - b.Append(arrow.Time64(n)) - case float64: - b.Append(arrow.Time64(v)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(arrow.Time64(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Time64Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Time64Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type Date32Builder struct { - builder - - data *memory.Buffer - rawData []arrow.Date32 -} - -func NewDate32Builder(mem memory.Allocator) *Date32Builder { - return &Date32Builder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *Date32Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Date32 } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Date32Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Date32Builder) Append(v arrow.Date32) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Date32Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Date32Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Date32Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Date32Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Date32Builder) UnsafeAppend(v arrow.Date32) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Date32Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Date32Builder) AppendValues(v []arrow.Date32, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Date32Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Date32Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Date32Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Date32Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Date32Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Date32Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Date32Traits.BytesRequired(n)) - b.rawData = arrow.Date32Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Date32Builder) Value(i int) arrow.Date32 { - return b.rawData[i] -} - -// NewArray creates a Date32 array from the memory buffers used by the builder and resets the Date32Builder -// so it can be used to build a new array. -func (b *Date32Builder) NewArray() arrow.Array { - return b.NewDate32Array() -} - -// NewDate32Array creates a Date32 array from the memory buffers used by the builder and resets the Date32Builder -// so it can be used to build a new array. -func (b *Date32Builder) NewDate32Array() (a *Date32) { - data := b.newData() - a = NewDate32Data(data) - data.Release() - return -} - -func (b *Date32Builder) newData() (data *Data) { - bytesRequired := arrow.Date32Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.PrimitiveTypes.Date32, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Date32Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - tm, err := time.Parse("2006-01-02", s) - if err != nil { - b.AppendNull() - return err - } - b.Append(arrow.Date32FromTime(tm)) - return nil -} - -func (b *Date32Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - case string: - tm, err := time.Parse("2006-01-02", v) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(arrow.Date32(0)), - Offset: dec.InputOffset(), - } - } - - b.Append(arrow.Date32FromTime(tm)) - case json.Number: - n, err := v.Int64() - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(arrow.Date32(0)), - Offset: dec.InputOffset(), - } - } - b.Append(arrow.Date32(n)) - case float64: - b.Append(arrow.Date32(v)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(arrow.Date32(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Date32Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Date32Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type Date64Builder struct { - builder - - data *memory.Buffer - rawData []arrow.Date64 -} - -func NewDate64Builder(mem memory.Allocator) *Date64Builder { - return &Date64Builder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *Date64Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Date64 } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Date64Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Date64Builder) Append(v arrow.Date64) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Date64Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Date64Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Date64Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Date64Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Date64Builder) UnsafeAppend(v arrow.Date64) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Date64Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Date64Builder) AppendValues(v []arrow.Date64, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Date64Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Date64Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Date64Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Date64Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Date64Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Date64Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Date64Traits.BytesRequired(n)) - b.rawData = arrow.Date64Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Date64Builder) Value(i int) arrow.Date64 { - return b.rawData[i] -} - -// NewArray creates a Date64 array from the memory buffers used by the builder and resets the Date64Builder -// so it can be used to build a new array. -func (b *Date64Builder) NewArray() arrow.Array { - return b.NewDate64Array() -} - -// NewDate64Array creates a Date64 array from the memory buffers used by the builder and resets the Date64Builder -// so it can be used to build a new array. -func (b *Date64Builder) NewDate64Array() (a *Date64) { - data := b.newData() - a = NewDate64Data(data) - data.Release() - return -} - -func (b *Date64Builder) newData() (data *Data) { - bytesRequired := arrow.Date64Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.PrimitiveTypes.Date64, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Date64Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - tm, err := time.Parse("2006-01-02", s) - if err != nil { - b.AppendNull() - return err - } - b.Append(arrow.Date64FromTime(tm)) - return nil -} - -func (b *Date64Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - case string: - tm, err := time.Parse("2006-01-02", v) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(arrow.Date64(0)), - Offset: dec.InputOffset(), - } - } - - b.Append(arrow.Date64FromTime(tm)) - case json.Number: - n, err := v.Int64() - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(arrow.Date64(0)), - Offset: dec.InputOffset(), - } - } - b.Append(arrow.Date64(n)) - case float64: - b.Append(arrow.Date64(v)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(arrow.Date64(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Date64Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Date64Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type DurationBuilder struct { - builder - - dtype *arrow.DurationType - data *memory.Buffer - rawData []arrow.Duration -} - -func NewDurationBuilder(mem memory.Allocator, dtype *arrow.DurationType) *DurationBuilder { - return &DurationBuilder{builder: builder{refCount: 1, mem: mem}, dtype: dtype} -} - -func (b *DurationBuilder) Type() arrow.DataType { return b.dtype } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *DurationBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *DurationBuilder) Append(v arrow.Duration) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *DurationBuilder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *DurationBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *DurationBuilder) AppendEmptyValue() { - b.Append(0) -} - -func (b *DurationBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *DurationBuilder) UnsafeAppend(v arrow.Duration) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *DurationBuilder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *DurationBuilder) AppendValues(v []arrow.Duration, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.DurationTraits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *DurationBuilder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.DurationTraits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.DurationTraits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *DurationBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *DurationBuilder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.DurationTraits.BytesRequired(n)) - b.rawData = arrow.DurationTraits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *DurationBuilder) Value(i int) arrow.Duration { - return b.rawData[i] -} - -// NewArray creates a Duration array from the memory buffers used by the builder and resets the DurationBuilder -// so it can be used to build a new array. -func (b *DurationBuilder) NewArray() arrow.Array { - return b.NewDurationArray() -} - -// NewDurationArray creates a Duration array from the memory buffers used by the builder and resets the DurationBuilder -// so it can be used to build a new array. -func (b *DurationBuilder) NewDurationArray() (a *Duration) { - data := b.newData() - a = NewDurationData(data) - data.Release() - return -} - -func (b *DurationBuilder) newData() (data *Data) { - bytesRequired := arrow.DurationTraits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *DurationBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - dur, err := time.ParseDuration(s) - if err != nil { - return err - } - - b.Append(arrow.Duration(dur / b.dtype.Unit.Multiplier())) - return nil -} - -func (b *DurationBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - case json.Number: - n, err := v.Int64() - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(arrow.Duration(0)), - Offset: dec.InputOffset(), - } - } - b.Append(arrow.Duration(n)) - case float64: - b.Append(arrow.Duration(v)) - case string: - // be flexible for specifying durations by accepting forms like - // 3h2m0.5s regardless of the unit and converting it to the proper - // precision. - val, err := time.ParseDuration(v) - if err != nil { - // if we got an error, maybe it was because the attempt to create - // a time.Duration (int64) in nanoseconds would overflow. check if - // the string is just a large number followed by the unit suffix - if strings.HasSuffix(v, b.dtype.Unit.String()) { - value, err := strconv.ParseInt(v[:len(v)-len(b.dtype.Unit.String())], 10, 64) - if err == nil { - b.Append(arrow.Duration(value)) - break - } - } - - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(arrow.Duration(0)), - Offset: dec.InputOffset(), - } - } - - switch b.dtype.Unit { - case arrow.Nanosecond: - b.Append(arrow.Duration(val.Nanoseconds())) - case arrow.Microsecond: - b.Append(arrow.Duration(val.Microseconds())) - case arrow.Millisecond: - b.Append(arrow.Duration(val.Milliseconds())) - case arrow.Second: - b.Append(arrow.Duration(val.Seconds())) - } - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(arrow.Duration(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *DurationBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *DurationBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -var ( - _ Builder = (*Int64Builder)(nil) - _ Builder = (*Uint64Builder)(nil) - _ Builder = (*Float64Builder)(nil) - _ Builder = (*Int32Builder)(nil) - _ Builder = (*Uint32Builder)(nil) - _ Builder = (*Float32Builder)(nil) - _ Builder = (*Int16Builder)(nil) - _ Builder = (*Uint16Builder)(nil) - _ Builder = (*Int8Builder)(nil) - _ Builder = (*Uint8Builder)(nil) - _ Builder = (*Time32Builder)(nil) - _ Builder = (*Time64Builder)(nil) - _ Builder = (*Date32Builder)(nil) - _ Builder = (*Date64Builder)(nil) - _ Builder = (*DurationBuilder)(nil) -) diff --git a/go/arrow/array/numericbuilder.gen.go.tmpl b/go/arrow/array/numericbuilder.gen.go.tmpl deleted file mode 100644 index d8b92cf60cc39..0000000000000 --- a/go/arrow/array/numericbuilder.gen.go.tmpl +++ /dev/null @@ -1,447 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -{{range .In}} - -type {{.Name}}Builder struct { - builder - -{{if .Opt.Parametric -}} - dtype *arrow.{{.Name}}Type -{{end -}} - data *memory.Buffer - rawData []{{or .QualifiedType .Type}} -} - -{{if .Opt.Parametric}} -func New{{.Name}}Builder(mem memory.Allocator, dtype *arrow.{{.Name}}Type) *{{.Name}}Builder { - return &{{.Name}}Builder{builder: builder{refCount:1, mem: mem}, dtype: dtype} -} - -func (b *{{.Name}}Builder) Type() arrow.DataType { return b.dtype } - -{{else}} -func New{{.Name}}Builder(mem memory.Allocator) *{{.Name}}Builder { - return &{{.Name}}Builder{builder: builder{refCount:1, mem: mem}} -} - -func (b *{{.Name}}Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.{{.Name}} } -{{end}} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *{{.Name}}Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *{{.Name}}Builder) Append(v {{or .QualifiedType .Type}}) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *{{.Name}}Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *{{.Name}}Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *{{.Name}}Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *{{.Name}}Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i ++ { - b.AppendEmptyValue() - } -} - -func (b *{{.Name}}Builder) UnsafeAppend(v {{or .QualifiedType .Type}}) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *{{.Name}}Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *{{.Name}}Builder) AppendValues(v []{{or .QualifiedType .Type}}, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.{{.Name}}Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *{{.Name}}Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.{{.Name}}Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.{{.Name}}Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *{{.Name}}Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *{{.Name}}Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.{{.Name}}Traits.BytesRequired(n)) - b.rawData = arrow.{{.Name}}Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *{{.Name}}Builder) Value(i int) {{or .QualifiedType .Type}} { - return b.rawData[i] -} - -// NewArray creates a {{.Name}} array from the memory buffers used by the builder and resets the {{.Name}}Builder -// so it can be used to build a new array. -func (b *{{.Name}}Builder) NewArray() arrow.Array { - return b.New{{.Name}}Array() -} - -// New{{.Name}}Array creates a {{.Name}} array from the memory buffers used by the builder and resets the {{.Name}}Builder -// so it can be used to build a new array. -func (b *{{.Name}}Builder) New{{.Name}}Array() (a *{{.Name}}) { - data := b.newData() - a = New{{.Name}}Data(data) - data.Release() - return -} - -func (b *{{.Name}}Builder) newData() (data *Data) { - bytesRequired := arrow.{{.Name}}Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } -{{if .Opt.Parametric -}} - data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) -{{else -}} - data = NewData(arrow.PrimitiveTypes.{{.Name}}, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) -{{end -}} - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *{{.Name}}Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - {{if or (eq .Name "Date32") -}} - tm, err := time.Parse("2006-01-02", s) - if err != nil { - b.AppendNull() - return err - } - b.Append(arrow.Date32FromTime(tm)) - {{else if or (eq .Name "Date64") -}} - tm, err := time.Parse("2006-01-02", s) - if err != nil { - b.AppendNull() - return err - } - b.Append(arrow.Date64FromTime(tm)) - {{else if or (eq .Name "Time32") -}} - val, err := arrow.Time32FromString(s, b.dtype.Unit) - if err != nil { - b.AppendNull() - return err - } - b.Append(val) - {{else if or (eq .Name "Time64") -}} - val, err := arrow.Time64FromString(s, b.dtype.Unit) - if err != nil { - b.AppendNull() - return err - } - b.Append(val) - {{else if (eq .Name "Duration") -}} - dur, err := time.ParseDuration(s) - if err != nil { - return err - } - - b.Append(arrow.Duration(dur / b.dtype.Unit.Multiplier())) - {{else if or (eq .Name "Int8") (eq .Name "Int16") (eq .Name "Int32") (eq .Name "Int64") -}} - v, err := strconv.ParseInt(s, 10, {{.Size}} * 8) - if err != nil { - b.AppendNull() - return err - } - b.Append({{.name}}(v)) - {{else if or (eq .Name "Uint8") (eq .Name "Uint16") (eq .Name "Uint32") (eq .Name "Uint64") -}} - v, err := strconv.ParseUint(s, 10, {{.Size}} * 8) - if err != nil { - b.AppendNull() - return err - } - b.Append({{.name}}(v)) - {{else if or (eq .Name "Float32") (eq .Name "Float64") -}} - v, err := strconv.ParseFloat(s, {{.Size}} * 8) - if err != nil { - b.AppendNull() - return err - } - b.Append({{.name}}(v)) - {{end -}} - return nil -} - -func (b *{{.Name}}Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() -{{if or (eq .Name "Date32") (eq .Name "Date64") -}} - case string: - tm, err := time.Parse("2006-01-02", v) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf({{.QualifiedType}}(0)), - Offset: dec.InputOffset(), - } - } - - b.Append({{.QualifiedType}}FromTime(tm)) - case json.Number: - n, err := v.Int64() - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf({{.QualifiedType}}(0)), - Offset: dec.InputOffset(), - } - } - b.Append({{.QualifiedType}}(n)) - case float64: - b.Append({{.QualifiedType}}(v)) -{{else if or (eq .Name "Time32") (eq .Name "Time64") -}} - case string: - tm, err := {{.QualifiedType}}FromString(v, b.dtype.Unit) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf({{.QualifiedType}}(0)), - Offset: dec.InputOffset(), - } - } - - b.Append(tm) - case json.Number: - n, err := v.Int64() - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf({{.QualifiedType}}(0)), - Offset: dec.InputOffset(), - } - } - b.Append({{.QualifiedType}}(n)) - case float64: - b.Append({{.QualifiedType}}(v)) -{{else if eq .Name "Duration" -}} - case json.Number: - n, err := v.Int64() - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf({{.QualifiedType}}(0)), - Offset: dec.InputOffset(), - } - } - b.Append({{.QualifiedType}}(n)) - case float64: - b.Append({{.QualifiedType}}(v)) - case string: - // be flexible for specifying durations by accepting forms like - // 3h2m0.5s regardless of the unit and converting it to the proper - // precision. - val, err := time.ParseDuration(v) - if err != nil { - // if we got an error, maybe it was because the attempt to create - // a time.Duration (int64) in nanoseconds would overflow. check if - // the string is just a large number followed by the unit suffix - if strings.HasSuffix(v, b.dtype.Unit.String()) { - value, err := strconv.ParseInt(v[:len(v)-len(b.dtype.Unit.String())], 10, 64) - if err == nil { - b.Append(arrow.Duration(value)) - break - } - } - - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf({{.QualifiedType}}(0)), - Offset: dec.InputOffset(), - } - } - - switch b.dtype.Unit { - case arrow.Nanosecond: - b.Append({{.QualifiedType}}(val.Nanoseconds())) - case arrow.Microsecond: - b.Append({{.QualifiedType}}(val.Microseconds())) - case arrow.Millisecond: - b.Append({{.QualifiedType}}(val.Milliseconds())) - case arrow.Second: - b.Append({{.QualifiedType}}(val.Seconds())) - } -{{else}} - case string: -{{if or (eq .Name "Float32") (eq .Name "Float64") -}} - f, err := strconv.ParseFloat(v, {{.Size}}*8) -{{else if eq (printf "%.1s" .Name) "U" -}} - f, err := strconv.ParseUint(v, 10, {{.Size}}*8) -{{else -}} - f, err := strconv.ParseInt(v, 10, {{.Size}}*8) -{{end -}} - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf({{.name}}(0)), - Offset: dec.InputOffset(), - } - } - b.Append({{.name}}(f)) - case float64: - b.Append({{.name}}(v)) - case json.Number: -{{if or (eq .Name "Float32") (eq .Name "Float64") -}} - f, err := strconv.ParseFloat(v.String(), {{.Size}}*8) -{{else if eq (printf "%.1s" .Name) "U" -}} - f, err := strconv.ParseUint(v.String(), 10, {{.Size}}*8) -{{else -}} - f, err := strconv.ParseInt(v.String(), 10, {{.Size}}*8) -{{end -}} - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf({{.name}}(0)), - Offset: dec.InputOffset(), - } - } - b.Append({{.name}}(f)) -{{end}} - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf({{or .QualifiedType .Type}}(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *{{.Name}}Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *{{.Name}}Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} -{{end}} - -var ( -{{- range .In}} - _ Builder = (*{{.Name}}Builder)(nil) -{{- end}} -) diff --git a/go/arrow/array/numericbuilder.gen_test.go b/go/arrow/array/numericbuilder.gen_test.go deleted file mode 100644 index 8adf86853b7c7..0000000000000 --- a/go/arrow/array/numericbuilder.gen_test.go +++ /dev/null @@ -1,3125 +0,0 @@ -// Code generated by array/numericbuilder.gen_test.go.tmpl. DO NOT EDIT. - -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "math" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestInt64StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewInt64Builder(mem) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Int64) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewInt64Builder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Int64) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewInt64Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt64Builder(mem) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewInt64Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewInt64Array() - - // check state of builder after NewInt64Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewInt64Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewInt64Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewInt64Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []int64{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Int64Values(), "unexpected Int64Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Int64Values(), 10, "unexpected length of Int64Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewInt64Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []int64{7, 8}, a.Int64Values()) - assert.Len(t, a.Int64Values(), 2) - - a.Release() - - var ( - want = []int64{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewInt64Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Int64); !ok { - t.Fatalf("could not type-assert to array.Int64") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Int64) - if !ok { - t.Fatalf("could not type-assert to array.Int64") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestInt64Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt64Builder(mem) - defer ab.Release() - - exp := []int64{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewInt64Array() - assert.Equal(t, exp, a.Int64Values()) - - a.Release() -} - -func TestInt64Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt64Builder(mem) - defer ab.Release() - - exp := []int64{0, 1, 2, 3} - - ab.AppendValues([]int64{}, nil) - a := ab.NewInt64Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewInt64Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]int64{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewInt64Array() - assert.Equal(t, exp, a.Int64Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]int64{}, nil) - a = ab.NewInt64Array() - assert.Equal(t, exp, a.Int64Values()) - a.Release() -} - -func TestInt64Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt64Builder(mem) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestUint64StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewUint64Builder(mem) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Uint64) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewUint64Builder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Uint64) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewUint64Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint64Builder(mem) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewUint64Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewUint64Array() - - // check state of builder after NewUint64Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewUint64Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewUint64Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewUint64Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []uint64{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Uint64Values(), "unexpected Uint64Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Uint64Values(), 10, "unexpected length of Uint64Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewUint64Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []uint64{7, 8}, a.Uint64Values()) - assert.Len(t, a.Uint64Values(), 2) - - a.Release() - - var ( - want = []uint64{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewUint64Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Uint64); !ok { - t.Fatalf("could not type-assert to array.Uint64") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Uint64) - if !ok { - t.Fatalf("could not type-assert to array.Uint64") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestUint64Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint64Builder(mem) - defer ab.Release() - - exp := []uint64{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewUint64Array() - assert.Equal(t, exp, a.Uint64Values()) - - a.Release() -} - -func TestUint64Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint64Builder(mem) - defer ab.Release() - - exp := []uint64{0, 1, 2, 3} - - ab.AppendValues([]uint64{}, nil) - a := ab.NewUint64Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewUint64Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]uint64{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewUint64Array() - assert.Equal(t, exp, a.Uint64Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]uint64{}, nil) - a = ab.NewUint64Array() - assert.Equal(t, exp, a.Uint64Values()) - a.Release() -} - -func TestUint64Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint64Builder(mem) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestFloat64StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewFloat64Builder(mem) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Float64) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewFloat64Builder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Float64) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewFloat64Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewFloat64Builder(mem) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewFloat64Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewFloat64Array() - - // check state of builder after NewFloat64Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewFloat64Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewFloat64Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewFloat64Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []float64{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Float64Values(), "unexpected Float64Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Float64Values(), 10, "unexpected length of Float64Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewFloat64Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []float64{7, 8}, a.Float64Values()) - assert.Len(t, a.Float64Values(), 2) - - a.Release() - - var ( - want = []float64{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewFloat64Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Float64); !ok { - t.Fatalf("could not type-assert to array.Float64") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Float64) - if !ok { - t.Fatalf("could not type-assert to array.Float64") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestFloat64Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewFloat64Builder(mem) - defer ab.Release() - - exp := []float64{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewFloat64Array() - assert.Equal(t, exp, a.Float64Values()) - - a.Release() -} - -func TestFloat64Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewFloat64Builder(mem) - defer ab.Release() - - exp := []float64{0, 1, 2, 3} - - ab.AppendValues([]float64{}, nil) - a := ab.NewFloat64Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewFloat64Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]float64{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewFloat64Array() - assert.Equal(t, exp, a.Float64Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]float64{}, nil) - a = ab.NewFloat64Array() - assert.Equal(t, exp, a.Float64Values()) - a.Release() -} - -func TestFloat64Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewFloat64Builder(mem) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestFloat64BuilderUnmarshalJSON(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - bldr := array.NewFloat64Builder(mem) - defer bldr.Release() - - jsonstr := `[0, 1, "+Inf", 2, 3, "NaN", "NaN", 4, 5, "-Inf"]` - - err := bldr.UnmarshalJSON([]byte(jsonstr)) - assert.NoError(t, err) - - arr := bldr.NewFloat64Array() - defer arr.Release() - - assert.NotNil(t, arr) - - assert.False(t, math.IsInf(float64(arr.Value(0)), 0), arr.Value(0)) - assert.True(t, math.IsInf(float64(arr.Value(2)), 1), arr.Value(2)) - assert.True(t, math.IsNaN(float64(arr.Value(5))), arr.Value(5)) -} - -func TestInt32StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewInt32Builder(mem) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Int32) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewInt32Builder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Int32) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewInt32Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt32Builder(mem) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewInt32Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewInt32Array() - - // check state of builder after NewInt32Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewInt32Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewInt32Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewInt32Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []int32{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Int32Values(), "unexpected Int32Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Int32Values(), 10, "unexpected length of Int32Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewInt32Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []int32{7, 8}, a.Int32Values()) - assert.Len(t, a.Int32Values(), 2) - - a.Release() - - var ( - want = []int32{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewInt32Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Int32); !ok { - t.Fatalf("could not type-assert to array.Int32") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Int32) - if !ok { - t.Fatalf("could not type-assert to array.Int32") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestInt32Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt32Builder(mem) - defer ab.Release() - - exp := []int32{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewInt32Array() - assert.Equal(t, exp, a.Int32Values()) - - a.Release() -} - -func TestInt32Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt32Builder(mem) - defer ab.Release() - - exp := []int32{0, 1, 2, 3} - - ab.AppendValues([]int32{}, nil) - a := ab.NewInt32Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewInt32Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]int32{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewInt32Array() - assert.Equal(t, exp, a.Int32Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]int32{}, nil) - a = ab.NewInt32Array() - assert.Equal(t, exp, a.Int32Values()) - a.Release() -} - -func TestInt32Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt32Builder(mem) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestUint32StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewUint32Builder(mem) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Uint32) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewUint32Builder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Uint32) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewUint32Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint32Builder(mem) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewUint32Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewUint32Array() - - // check state of builder after NewUint32Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewUint32Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewUint32Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewUint32Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []uint32{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Uint32Values(), "unexpected Uint32Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Uint32Values(), 10, "unexpected length of Uint32Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewUint32Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []uint32{7, 8}, a.Uint32Values()) - assert.Len(t, a.Uint32Values(), 2) - - a.Release() - - var ( - want = []uint32{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewUint32Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Uint32); !ok { - t.Fatalf("could not type-assert to array.Uint32") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Uint32) - if !ok { - t.Fatalf("could not type-assert to array.Uint32") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestUint32Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint32Builder(mem) - defer ab.Release() - - exp := []uint32{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewUint32Array() - assert.Equal(t, exp, a.Uint32Values()) - - a.Release() -} - -func TestUint32Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint32Builder(mem) - defer ab.Release() - - exp := []uint32{0, 1, 2, 3} - - ab.AppendValues([]uint32{}, nil) - a := ab.NewUint32Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewUint32Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]uint32{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewUint32Array() - assert.Equal(t, exp, a.Uint32Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]uint32{}, nil) - a = ab.NewUint32Array() - assert.Equal(t, exp, a.Uint32Values()) - a.Release() -} - -func TestUint32Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint32Builder(mem) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestFloat32StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewFloat32Builder(mem) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Float32) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewFloat32Builder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Float32) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewFloat32Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewFloat32Builder(mem) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewFloat32Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewFloat32Array() - - // check state of builder after NewFloat32Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewFloat32Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewFloat32Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewFloat32Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []float32{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Float32Values(), "unexpected Float32Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Float32Values(), 10, "unexpected length of Float32Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewFloat32Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []float32{7, 8}, a.Float32Values()) - assert.Len(t, a.Float32Values(), 2) - - a.Release() - - var ( - want = []float32{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewFloat32Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Float32); !ok { - t.Fatalf("could not type-assert to array.Float32") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Float32) - if !ok { - t.Fatalf("could not type-assert to array.Float32") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestFloat32Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewFloat32Builder(mem) - defer ab.Release() - - exp := []float32{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewFloat32Array() - assert.Equal(t, exp, a.Float32Values()) - - a.Release() -} - -func TestFloat32Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewFloat32Builder(mem) - defer ab.Release() - - exp := []float32{0, 1, 2, 3} - - ab.AppendValues([]float32{}, nil) - a := ab.NewFloat32Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewFloat32Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]float32{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewFloat32Array() - assert.Equal(t, exp, a.Float32Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]float32{}, nil) - a = ab.NewFloat32Array() - assert.Equal(t, exp, a.Float32Values()) - a.Release() -} - -func TestFloat32Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewFloat32Builder(mem) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestFloat32BuilderUnmarshalJSON(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - bldr := array.NewFloat32Builder(mem) - defer bldr.Release() - - jsonstr := `[0, 1, "+Inf", 2, 3, "NaN", "NaN", 4, 5, "-Inf"]` - - err := bldr.UnmarshalJSON([]byte(jsonstr)) - assert.NoError(t, err) - - arr := bldr.NewFloat32Array() - defer arr.Release() - - assert.NotNil(t, arr) - - assert.False(t, math.IsInf(float64(arr.Value(0)), 0), arr.Value(0)) - assert.True(t, math.IsInf(float64(arr.Value(2)), 1), arr.Value(2)) - assert.True(t, math.IsNaN(float64(arr.Value(5))), arr.Value(5)) -} - -func TestInt16StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewInt16Builder(mem) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Int16) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewInt16Builder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Int16) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewInt16Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt16Builder(mem) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewInt16Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewInt16Array() - - // check state of builder after NewInt16Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewInt16Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewInt16Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewInt16Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []int16{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Int16Values(), "unexpected Int16Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Int16Values(), 10, "unexpected length of Int16Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewInt16Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []int16{7, 8}, a.Int16Values()) - assert.Len(t, a.Int16Values(), 2) - - a.Release() - - var ( - want = []int16{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewInt16Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Int16); !ok { - t.Fatalf("could not type-assert to array.Int16") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Int16) - if !ok { - t.Fatalf("could not type-assert to array.Int16") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestInt16Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt16Builder(mem) - defer ab.Release() - - exp := []int16{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewInt16Array() - assert.Equal(t, exp, a.Int16Values()) - - a.Release() -} - -func TestInt16Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt16Builder(mem) - defer ab.Release() - - exp := []int16{0, 1, 2, 3} - - ab.AppendValues([]int16{}, nil) - a := ab.NewInt16Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewInt16Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]int16{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewInt16Array() - assert.Equal(t, exp, a.Int16Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]int16{}, nil) - a = ab.NewInt16Array() - assert.Equal(t, exp, a.Int16Values()) - a.Release() -} - -func TestInt16Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt16Builder(mem) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestUint16StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewUint16Builder(mem) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Uint16) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewUint16Builder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Uint16) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewUint16Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint16Builder(mem) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewUint16Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewUint16Array() - - // check state of builder after NewUint16Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewUint16Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewUint16Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewUint16Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []uint16{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Uint16Values(), "unexpected Uint16Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Uint16Values(), 10, "unexpected length of Uint16Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewUint16Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []uint16{7, 8}, a.Uint16Values()) - assert.Len(t, a.Uint16Values(), 2) - - a.Release() - - var ( - want = []uint16{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewUint16Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Uint16); !ok { - t.Fatalf("could not type-assert to array.Uint16") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Uint16) - if !ok { - t.Fatalf("could not type-assert to array.Uint16") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestUint16Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint16Builder(mem) - defer ab.Release() - - exp := []uint16{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewUint16Array() - assert.Equal(t, exp, a.Uint16Values()) - - a.Release() -} - -func TestUint16Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint16Builder(mem) - defer ab.Release() - - exp := []uint16{0, 1, 2, 3} - - ab.AppendValues([]uint16{}, nil) - a := ab.NewUint16Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewUint16Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]uint16{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewUint16Array() - assert.Equal(t, exp, a.Uint16Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]uint16{}, nil) - a = ab.NewUint16Array() - assert.Equal(t, exp, a.Uint16Values()) - a.Release() -} - -func TestUint16Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint16Builder(mem) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestInt8StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewInt8Builder(mem) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Int8) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewInt8Builder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Int8) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewInt8Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt8Builder(mem) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewInt8Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewInt8Array() - - // check state of builder after NewInt8Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewInt8Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewInt8Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewInt8Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []int8{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Int8Values(), "unexpected Int8Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Int8Values(), 10, "unexpected length of Int8Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewInt8Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []int8{7, 8}, a.Int8Values()) - assert.Len(t, a.Int8Values(), 2) - - a.Release() - - var ( - want = []int8{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewInt8Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Int8); !ok { - t.Fatalf("could not type-assert to array.Int8") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Int8) - if !ok { - t.Fatalf("could not type-assert to array.Int8") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestInt8Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt8Builder(mem) - defer ab.Release() - - exp := []int8{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewInt8Array() - assert.Equal(t, exp, a.Int8Values()) - - a.Release() -} - -func TestInt8Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt8Builder(mem) - defer ab.Release() - - exp := []int8{0, 1, 2, 3} - - ab.AppendValues([]int8{}, nil) - a := ab.NewInt8Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewInt8Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]int8{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewInt8Array() - assert.Equal(t, exp, a.Int8Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]int8{}, nil) - a = ab.NewInt8Array() - assert.Equal(t, exp, a.Int8Values()) - a.Release() -} - -func TestInt8Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt8Builder(mem) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestUint8StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewUint8Builder(mem) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Uint8) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewUint8Builder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Uint8) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewUint8Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint8Builder(mem) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewUint8Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewUint8Array() - - // check state of builder after NewUint8Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewUint8Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewUint8Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewUint8Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []uint8{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Uint8Values(), "unexpected Uint8Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Uint8Values(), 10, "unexpected length of Uint8Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewUint8Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []uint8{7, 8}, a.Uint8Values()) - assert.Len(t, a.Uint8Values(), 2) - - a.Release() - - var ( - want = []uint8{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewUint8Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Uint8); !ok { - t.Fatalf("could not type-assert to array.Uint8") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Uint8) - if !ok { - t.Fatalf("could not type-assert to array.Uint8") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestUint8Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint8Builder(mem) - defer ab.Release() - - exp := []uint8{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewUint8Array() - assert.Equal(t, exp, a.Uint8Values()) - - a.Release() -} - -func TestUint8Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint8Builder(mem) - defer ab.Release() - - exp := []uint8{0, 1, 2, 3} - - ab.AppendValues([]uint8{}, nil) - a := ab.NewUint8Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewUint8Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]uint8{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewUint8Array() - assert.Equal(t, exp, a.Uint8Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]uint8{}, nil) - a = ab.NewUint8Array() - assert.Equal(t, exp, a.Uint8Values()) - a.Release() -} - -func TestUint8Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint8Builder(mem) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestTime32StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dt := &arrow.Time32Type{Unit: arrow.Second} - b := array.NewTime32Builder(mem, dt) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Time32) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewTime32Builder(mem, dt) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Time32) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewTime32Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.Time32Type{Unit: arrow.Second} - ab := array.NewTime32Builder(mem, dtype) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewTime32Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewTime32Array() - - // check state of builder after NewTime32Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewTime32Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewTime32Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewTime32Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []arrow.Time32{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Time32Values(), "unexpected Time32Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Time32Values(), 10, "unexpected length of Time32Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewTime32Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []arrow.Time32{7, 8}, a.Time32Values()) - assert.Len(t, a.Time32Values(), 2) - - a.Release() - - var ( - want = []arrow.Time32{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewTime32Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Time32); !ok { - t.Fatalf("could not type-assert to array.Time32") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Time32) - if !ok { - t.Fatalf("could not type-assert to array.Time32") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestTime32Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.Time32Type{Unit: arrow.Second} - ab := array.NewTime32Builder(mem, dtype) - defer ab.Release() - - exp := []arrow.Time32{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewTime32Array() - assert.Equal(t, exp, a.Time32Values()) - - a.Release() -} - -func TestTime32Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.Time32Type{Unit: arrow.Second} - ab := array.NewTime32Builder(mem, dtype) - defer ab.Release() - - exp := []arrow.Time32{0, 1, 2, 3} - - ab.AppendValues([]arrow.Time32{}, nil) - a := ab.NewTime32Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewTime32Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]arrow.Time32{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewTime32Array() - assert.Equal(t, exp, a.Time32Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]arrow.Time32{}, nil) - a = ab.NewTime32Array() - assert.Equal(t, exp, a.Time32Values()) - a.Release() -} - -func TestTime32Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.Time32Type{Unit: arrow.Second} - ab := array.NewTime32Builder(mem, dtype) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestTime64StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dt := &arrow.Time64Type{Unit: arrow.Microsecond} - b := array.NewTime64Builder(mem, dt) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Time64) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewTime64Builder(mem, dt) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Time64) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewTime64Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.Time64Type{Unit: arrow.Second} - ab := array.NewTime64Builder(mem, dtype) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewTime64Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewTime64Array() - - // check state of builder after NewTime64Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewTime64Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewTime64Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewTime64Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []arrow.Time64{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Time64Values(), "unexpected Time64Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Time64Values(), 10, "unexpected length of Time64Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewTime64Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []arrow.Time64{7, 8}, a.Time64Values()) - assert.Len(t, a.Time64Values(), 2) - - a.Release() - - var ( - want = []arrow.Time64{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewTime64Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Time64); !ok { - t.Fatalf("could not type-assert to array.Time64") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Time64) - if !ok { - t.Fatalf("could not type-assert to array.Time64") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestTime64Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.Time64Type{Unit: arrow.Second} - ab := array.NewTime64Builder(mem, dtype) - defer ab.Release() - - exp := []arrow.Time64{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewTime64Array() - assert.Equal(t, exp, a.Time64Values()) - - a.Release() -} - -func TestTime64Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.Time64Type{Unit: arrow.Second} - ab := array.NewTime64Builder(mem, dtype) - defer ab.Release() - - exp := []arrow.Time64{0, 1, 2, 3} - - ab.AppendValues([]arrow.Time64{}, nil) - a := ab.NewTime64Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewTime64Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]arrow.Time64{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewTime64Array() - assert.Equal(t, exp, a.Time64Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]arrow.Time64{}, nil) - a = ab.NewTime64Array() - assert.Equal(t, exp, a.Time64Values()) - a.Release() -} - -func TestTime64Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.Time64Type{Unit: arrow.Second} - ab := array.NewTime64Builder(mem, dtype) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestDate32StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewDate32Builder(mem) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Date32) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewDate32Builder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Date32) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewDate32Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewDate32Builder(mem) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewDate32Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewDate32Array() - - // check state of builder after NewDate32Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewDate32Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewDate32Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewDate32Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []arrow.Date32{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Date32Values(), "unexpected Date32Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Date32Values(), 10, "unexpected length of Date32Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewDate32Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []arrow.Date32{7, 8}, a.Date32Values()) - assert.Len(t, a.Date32Values(), 2) - - a.Release() - - var ( - want = []arrow.Date32{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewDate32Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Date32); !ok { - t.Fatalf("could not type-assert to array.Date32") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Date32) - if !ok { - t.Fatalf("could not type-assert to array.Date32") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestDate32Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewDate32Builder(mem) - defer ab.Release() - - exp := []arrow.Date32{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewDate32Array() - assert.Equal(t, exp, a.Date32Values()) - - a.Release() -} - -func TestDate32Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewDate32Builder(mem) - defer ab.Release() - - exp := []arrow.Date32{0, 1, 2, 3} - - ab.AppendValues([]arrow.Date32{}, nil) - a := ab.NewDate32Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewDate32Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]arrow.Date32{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewDate32Array() - assert.Equal(t, exp, a.Date32Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]arrow.Date32{}, nil) - a = ab.NewDate32Array() - assert.Equal(t, exp, a.Date32Values()) - a.Release() -} - -func TestDate32Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewDate32Builder(mem) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestDate64StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewDate64Builder(mem) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Date64) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewDate64Builder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Date64) - defer arr1.Release() - - assert.Exactly(t, arr.Len(), arr1.Len()) - for i := 0; i < arr.Len(); i++ { - assert.Exactly(t, arr.IsValid(i), arr1.IsValid(i)) - assert.Exactly(t, arr.ValueStr(i), arr1.ValueStr(i)) - if arr.IsValid(i) { - assert.Exactly(t, arr.Value(i).ToTime(), arr1.Value(i).ToTime()) - } - } -} - -func TestNewDate64Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewDate64Builder(mem) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewDate64Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewDate64Array() - - // check state of builder after NewDate64Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewDate64Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewDate64Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewDate64Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []arrow.Date64{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Date64Values(), "unexpected Date64Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Date64Values(), 10, "unexpected length of Date64Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewDate64Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []arrow.Date64{7, 8}, a.Date64Values()) - assert.Len(t, a.Date64Values(), 2) - - a.Release() - - var ( - want = []arrow.Date64{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewDate64Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Date64); !ok { - t.Fatalf("could not type-assert to array.Date64") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Date64) - if !ok { - t.Fatalf("could not type-assert to array.Date64") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestDate64Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewDate64Builder(mem) - defer ab.Release() - - exp := []arrow.Date64{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewDate64Array() - assert.Equal(t, exp, a.Date64Values()) - - a.Release() -} - -func TestDate64Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewDate64Builder(mem) - defer ab.Release() - - exp := []arrow.Date64{0, 1, 2, 3} - - ab.AppendValues([]arrow.Date64{}, nil) - a := ab.NewDate64Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewDate64Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]arrow.Date64{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewDate64Array() - assert.Equal(t, exp, a.Date64Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]arrow.Date64{}, nil) - a = ab.NewDate64Array() - assert.Equal(t, exp, a.Date64Values()) - a.Release() -} - -func TestDate64Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewDate64Builder(mem) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestDurationStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dt := &arrow.DurationType{Unit: arrow.Second} - b := array.NewDurationBuilder(mem, dt) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Duration) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewDurationBuilder(mem, dt) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Duration) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewDurationBuilder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.DurationType{Unit: arrow.Second} - ab := array.NewDurationBuilder(mem, dtype) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewDurationArray - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewDurationArray() - - // check state of builder after NewDurationArray - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewDurationArray did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewDurationArray did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewDurationArray did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []arrow.Duration{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.DurationValues(), "unexpected DurationValues") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.DurationValues(), 10, "unexpected length of DurationValues") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewDurationArray() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []arrow.Duration{7, 8}, a.DurationValues()) - assert.Len(t, a.DurationValues(), 2) - - a.Release() - - var ( - want = []arrow.Duration{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewDurationArray() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Duration); !ok { - t.Fatalf("could not type-assert to array.Duration") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Duration) - if !ok { - t.Fatalf("could not type-assert to array.Duration") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestDurationBuilder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.DurationType{Unit: arrow.Second} - ab := array.NewDurationBuilder(mem, dtype) - defer ab.Release() - - exp := []arrow.Duration{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewDurationArray() - assert.Equal(t, exp, a.DurationValues()) - - a.Release() -} - -func TestDurationBuilder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.DurationType{Unit: arrow.Second} - ab := array.NewDurationBuilder(mem, dtype) - defer ab.Release() - - exp := []arrow.Duration{0, 1, 2, 3} - - ab.AppendValues([]arrow.Duration{}, nil) - a := ab.NewDurationArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewDurationArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]arrow.Duration{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewDurationArray() - assert.Equal(t, exp, a.DurationValues()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]arrow.Duration{}, nil) - a = ab.NewDurationArray() - assert.Equal(t, exp, a.DurationValues()) - a.Release() -} - -func TestDurationBuilder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.DurationType{Unit: arrow.Second} - ab := array.NewDurationBuilder(mem, dtype) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} diff --git a/go/arrow/array/numericbuilder.gen_test.go.tmpl b/go/arrow/array/numericbuilder.gen_test.go.tmpl deleted file mode 100644 index f3cd08a63745d..0000000000000 --- a/go/arrow/array/numericbuilder.gen_test.go.tmpl +++ /dev/null @@ -1,299 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -{{range .In}} -func Test{{.Name}}StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - -{{if .Opt.Parametric -}} -{{ if or (eq .Name "Time64") -}} - dt := &arrow.{{.Name}}Type{Unit: arrow.Microsecond} -{{else -}} - dt := &arrow.{{.Name}}Type{Unit: arrow.Second} -{{end -}} - b := array.New{{.Name}}Builder(mem, dt) -{{else -}} - b := array.New{{.Name}}Builder(mem) -{{end -}} - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.{{.Name}}) - defer arr.Release() - - // 2. create array via AppendValueFromString -{{if .Opt.Parametric -}} - b1 := array.New{{.Name}}Builder(mem, dt) -{{else -}} - b1 := array.New{{.Name}}Builder(mem) -{{end -}} - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.{{.Name}}) - defer arr1.Release() - -{{ if or (eq .Name "Date64") -}} - assert.Exactly(t, arr.Len(), arr1.Len()) - for i := 0; i < arr.Len(); i++ { - assert.Exactly(t, arr.IsValid(i), arr1.IsValid(i)) - assert.Exactly(t, arr.ValueStr(i), arr1.ValueStr(i)) - if arr.IsValid(i) { - assert.Exactly(t, arr.Value(i).ToTime(), arr1.Value(i).ToTime()) - } - } -{{else -}} - assert.True(t, array.Equal(arr, arr1)) -{{end -}} -} - -func TestNew{{.Name}}Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - -{{if .Opt.Parametric -}} - dtype := &arrow.{{.Name}}Type{Unit: arrow.Second} - ab := array.New{{.Name}}Builder(mem, dtype) -{{else}} - ab := array.New{{.Name}}Builder(mem) -{{end -}} - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before New{{.Name}}Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.New{{.Name}}Array() - - // check state of builder after New{{.Name}}Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), New{{.Name}}Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), New{{.Name}}Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), New{{.Name}}Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []{{or .QualifiedType .Type}}{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.{{.Name}}Values(), "unexpected {{.Name}}Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.{{.Name}}Values(), 10, "unexpected length of {{.Name}}Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.New{{.Name}}Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []{{or .QualifiedType .Type}}{7, 8}, a.{{.Name}}Values()) - assert.Len(t, a.{{.Name}}Values(), 2) - - a.Release() - - var ( - want = []{{or .QualifiedType .Type}}{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.New{{.Name}}Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.{{.Name}}); !ok { - t.Fatalf("could not type-assert to array.{{.Name}}") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.{{.Name}}) - if !ok { - t.Fatalf("could not type-assert to array.{{.Name}}") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func Test{{.Name}}Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - -{{if .Opt.Parametric -}} - dtype := &arrow.{{.Name}}Type{Unit: arrow.Second} - ab := array.New{{.Name}}Builder(mem, dtype) -{{else}} - ab := array.New{{.Name}}Builder(mem) -{{end -}} - defer ab.Release() - - exp := []{{or .QualifiedType .Type}}{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.New{{.Name}}Array() - assert.Equal(t, exp, a.{{.Name}}Values()) - - a.Release() -} - -func Test{{.Name}}Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - -{{if .Opt.Parametric -}} - dtype := &arrow.{{.Name}}Type{Unit: arrow.Second} - ab := array.New{{.Name}}Builder(mem, dtype) -{{else}} - ab := array.New{{.Name}}Builder(mem) -{{end -}} - defer ab.Release() - - exp := []{{or .QualifiedType .Type}}{0, 1, 2, 3} - - ab.AppendValues([]{{or .QualifiedType .Type}}{}, nil) - a := ab.New{{.Name}}Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.New{{.Name}}Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]{{or .QualifiedType .Type}}{}, nil) - ab.AppendValues(exp, nil) - a = ab.New{{.Name}}Array() - assert.Equal(t, exp, a.{{.Name}}Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]{{or .QualifiedType .Type}}{}, nil) - a = ab.New{{.Name}}Array() - assert.Equal(t, exp, a.{{.Name}}Values()) - a.Release() -} - -func Test{{.Name}}Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - -{{if .Opt.Parametric -}} - dtype := &arrow.{{.Name}}Type{Unit: arrow.Second} - ab := array.New{{.Name}}Builder(mem, dtype) -{{else}} - ab := array.New{{.Name}}Builder(mem) -{{end -}} - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func Test{{.Name}}BuilderUnmarshalJSON(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - bldr := array.New{{.Name}}Builder(mem) - defer bldr.Release() - - jsonstr := `[0, 1, "+Inf", 2, 3, "NaN", "NaN", 4, 5, "-Inf"]` - - err := bldr.UnmarshalJSON([]byte(jsonstr)) - assert.NoError(t, err) - - arr := bldr.New{{.Name}}Array() - defer arr.Release() - - assert.NotNil(t, arr) - - assert.False(t, math.IsInf(float64(arr.Value(0)), 0), arr.Value(0)) - assert.True(t, math.IsInf(float64(arr.Value(2)), 1), arr.Value(2)) - assert.True(t, math.IsNaN(float64(arr.Value(5))), arr.Value(5)) -} - -{{end}} - - diff --git a/go/arrow/array/record.go b/go/arrow/array/record.go deleted file mode 100644 index 2735f1baa9a30..0000000000000 --- a/go/arrow/array/record.go +++ /dev/null @@ -1,411 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "fmt" - "strings" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -// RecordReader reads a stream of records. -type RecordReader interface { - Retain() - Release() - - Schema() *arrow.Schema - - Next() bool - Record() arrow.Record - Err() error -} - -// simpleRecords is a simple iterator over a collection of records. -type simpleRecords struct { - refCount int64 - - schema *arrow.Schema - recs []arrow.Record - cur arrow.Record -} - -// NewRecordReader returns a simple iterator over the given slice of records. -func NewRecordReader(schema *arrow.Schema, recs []arrow.Record) (RecordReader, error) { - rs := &simpleRecords{ - refCount: 1, - schema: schema, - recs: recs, - cur: nil, - } - - for _, rec := range rs.recs { - rec.Retain() - } - - for _, rec := range recs { - if !rec.Schema().Equal(rs.schema) { - rs.Release() - return nil, fmt.Errorf("arrow/array: mismatch schema") - } - } - - return rs, nil -} - -// Retain increases the reference count by 1. -// Retain may be called simultaneously from multiple goroutines. -func (rs *simpleRecords) Retain() { - atomic.AddInt64(&rs.refCount, 1) -} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -// Release may be called simultaneously from multiple goroutines. -func (rs *simpleRecords) Release() { - debug.Assert(atomic.LoadInt64(&rs.refCount) > 0, "too many releases") - - if atomic.AddInt64(&rs.refCount, -1) == 0 { - if rs.cur != nil { - rs.cur.Release() - } - for _, rec := range rs.recs { - rec.Release() - } - rs.recs = nil - } -} - -func (rs *simpleRecords) Schema() *arrow.Schema { return rs.schema } -func (rs *simpleRecords) Record() arrow.Record { return rs.cur } -func (rs *simpleRecords) Next() bool { - if len(rs.recs) == 0 { - return false - } - if rs.cur != nil { - rs.cur.Release() - } - rs.cur = rs.recs[0] - rs.recs = rs.recs[1:] - return true -} -func (rs *simpleRecords) Err() error { return nil } - -// simpleRecord is a basic, non-lazy in-memory record batch. -type simpleRecord struct { - refCount int64 - - schema *arrow.Schema - - rows int64 - arrs []arrow.Array -} - -// NewRecord returns a basic, non-lazy in-memory record batch. -// -// NewRecord panics if the columns and schema are inconsistent. -// NewRecord panics if rows is larger than the height of the columns. -func NewRecord(schema *arrow.Schema, cols []arrow.Array, nrows int64) arrow.Record { - rec := &simpleRecord{ - refCount: 1, - schema: schema, - rows: nrows, - arrs: make([]arrow.Array, len(cols)), - } - copy(rec.arrs, cols) - for _, arr := range rec.arrs { - arr.Retain() - } - - if rec.rows < 0 { - switch len(rec.arrs) { - case 0: - rec.rows = 0 - default: - rec.rows = int64(rec.arrs[0].Len()) - } - } - - err := rec.validate() - if err != nil { - rec.Release() - panic(err) - } - - return rec -} - -func (rec *simpleRecord) SetColumn(i int, arr arrow.Array) (arrow.Record, error) { - if i < 0 || i >= len(rec.arrs) { - return nil, fmt.Errorf("arrow/array: column index out of range [0, %d): got=%d", len(rec.arrs), i) - } - - if arr.Len() != int(rec.rows) { - return nil, fmt.Errorf("arrow/array: mismatch number of rows in column %q: got=%d, want=%d", - rec.schema.Field(i).Name, - arr.Len(), rec.rows, - ) - } - - f := rec.schema.Field(i) - if !arrow.TypeEqual(f.Type, arr.DataType()) { - return nil, fmt.Errorf("arrow/array: column %q type mismatch: got=%v, want=%v", - f.Name, - arr.DataType(), f.Type, - ) - } - arrs := make([]arrow.Array, len(rec.arrs)) - copy(arrs, rec.arrs) - arrs[i] = arr - - return NewRecord(rec.schema, arrs, rec.rows), nil -} - -func (rec *simpleRecord) validate() error { - if rec.rows == 0 && len(rec.arrs) == 0 { - return nil - } - - if len(rec.arrs) != rec.schema.NumFields() { - return fmt.Errorf("arrow/array: number of columns/fields mismatch") - } - - for i, arr := range rec.arrs { - f := rec.schema.Field(i) - if int64(arr.Len()) < rec.rows { - return fmt.Errorf("arrow/array: mismatch number of rows in column %q: got=%d, want=%d", - f.Name, - arr.Len(), rec.rows, - ) - } - if !arrow.TypeEqual(f.Type, arr.DataType()) { - return fmt.Errorf("arrow/array: column %q type mismatch: got=%v, want=%v", - f.Name, - arr.DataType(), f.Type, - ) - } - } - return nil -} - -// Retain increases the reference count by 1. -// Retain may be called simultaneously from multiple goroutines. -func (rec *simpleRecord) Retain() { - atomic.AddInt64(&rec.refCount, 1) -} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -// Release may be called simultaneously from multiple goroutines. -func (rec *simpleRecord) Release() { - debug.Assert(atomic.LoadInt64(&rec.refCount) > 0, "too many releases") - - if atomic.AddInt64(&rec.refCount, -1) == 0 { - for _, arr := range rec.arrs { - arr.Release() - } - rec.arrs = nil - } -} - -func (rec *simpleRecord) Schema() *arrow.Schema { return rec.schema } -func (rec *simpleRecord) NumRows() int64 { return rec.rows } -func (rec *simpleRecord) NumCols() int64 { return int64(len(rec.arrs)) } -func (rec *simpleRecord) Columns() []arrow.Array { return rec.arrs } -func (rec *simpleRecord) Column(i int) arrow.Array { return rec.arrs[i] } -func (rec *simpleRecord) ColumnName(i int) string { return rec.schema.Field(i).Name } - -// NewSlice constructs a zero-copy slice of the record with the indicated -// indices i and j, corresponding to array[i:j]. -// The returned record must be Release()'d after use. -// -// NewSlice panics if the slice is outside the valid range of the record array. -// NewSlice panics if j < i. -func (rec *simpleRecord) NewSlice(i, j int64) arrow.Record { - arrs := make([]arrow.Array, len(rec.arrs)) - for ii, arr := range rec.arrs { - arrs[ii] = NewSlice(arr, i, j) - } - defer func() { - for _, arr := range arrs { - arr.Release() - } - }() - return NewRecord(rec.schema, arrs, j-i) -} - -func (rec *simpleRecord) String() string { - o := new(strings.Builder) - fmt.Fprintf(o, "record:\n %v\n", rec.schema) - fmt.Fprintf(o, " rows: %d\n", rec.rows) - for i, col := range rec.arrs { - fmt.Fprintf(o, " col[%d][%s]: %v\n", i, rec.schema.Field(i).Name, col) - } - - return o.String() -} - -func (rec *simpleRecord) MarshalJSON() ([]byte, error) { - arr := RecordToStructArray(rec) - defer arr.Release() - return arr.MarshalJSON() -} - -// RecordBuilder eases the process of building a Record, iteratively, from -// a known Schema. -type RecordBuilder struct { - refCount int64 - mem memory.Allocator - schema *arrow.Schema - fields []Builder -} - -// NewRecordBuilder returns a builder, using the provided memory allocator and a schema. -func NewRecordBuilder(mem memory.Allocator, schema *arrow.Schema) *RecordBuilder { - b := &RecordBuilder{ - refCount: 1, - mem: mem, - schema: schema, - fields: make([]Builder, schema.NumFields()), - } - - for i := 0; i < schema.NumFields(); i++ { - b.fields[i] = NewBuilder(b.mem, schema.Field(i).Type) - } - - return b -} - -// Retain increases the reference count by 1. -// Retain may be called simultaneously from multiple goroutines. -func (b *RecordBuilder) Retain() { - atomic.AddInt64(&b.refCount, 1) -} - -// Release decreases the reference count by 1. -func (b *RecordBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - for _, f := range b.fields { - f.Release() - } - b.fields = nil - } -} - -func (b *RecordBuilder) Schema() *arrow.Schema { return b.schema } -func (b *RecordBuilder) Fields() []Builder { return b.fields } -func (b *RecordBuilder) Field(i int) Builder { return b.fields[i] } - -func (b *RecordBuilder) Reserve(size int) { - for _, f := range b.fields { - f.Reserve(size) - } -} - -// NewRecord creates a new record from the memory buffers and resets the -// RecordBuilder so it can be used to build a new record. -// -// The returned Record must be Release()'d after use. -// -// NewRecord panics if the fields' builder do not have the same length. -func (b *RecordBuilder) NewRecord() arrow.Record { - cols := make([]arrow.Array, len(b.fields)) - rows := int64(0) - - defer func(cols []arrow.Array) { - for _, col := range cols { - if col == nil { - continue - } - col.Release() - } - }(cols) - - for i, f := range b.fields { - cols[i] = f.NewArray() - irow := int64(cols[i].Len()) - if i > 0 && irow != rows { - panic(fmt.Errorf("arrow/array: field %d has %d rows. want=%d", i, irow, rows)) - } - rows = irow - } - - return NewRecord(b.schema, cols, rows) -} - -// UnmarshalJSON for record builder will read in a single object and add the values -// to each field in the recordbuilder, missing fields will get a null and unexpected -// keys will be ignored. If reading in an array of records as a single batch, then use -// a structbuilder and use RecordFromStruct. -func (b *RecordBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - // should start with a '{' - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '{' { - return fmt.Errorf("record should start with '{', not %s", t) - } - - keylist := make(map[string]bool) - for dec.More() { - keyTok, err := dec.Token() - if err != nil { - return err - } - - key := keyTok.(string) - if keylist[key] { - return fmt.Errorf("key %s shows up twice in row to be decoded", key) - } - keylist[key] = true - - indices := b.schema.FieldIndices(key) - if len(indices) == 0 { - var extra interface{} - if err := dec.Decode(&extra); err != nil { - return err - } - continue - } - - if err := b.fields[indices[0]].UnmarshalOne(dec); err != nil { - return err - } - } - - for i := 0; i < b.schema.NumFields(); i++ { - if !keylist[b.schema.Field(i).Name] { - b.fields[i].AppendNull() - } - } - return nil -} - -var ( - _ arrow.Record = (*simpleRecord)(nil) - _ RecordReader = (*simpleRecords)(nil) -) diff --git a/go/arrow/array/record_test.go b/go/arrow/array/record_test.go deleted file mode 100644 index 8e6dc3b06d25e..0000000000000 --- a/go/arrow/array/record_test.go +++ /dev/null @@ -1,787 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "fmt" - "reflect" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestRecord(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - schema := arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - {Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64}, - }, - nil, - ) - col1 := func() arrow.Array { - ib := array.NewInt32Builder(mem) - defer ib.Release() - - ib.AppendValues([]int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil) - return ib.NewInt32Array() - }() - defer col1.Release() - - col2 := func() arrow.Array { - b := array.NewFloat64Builder(mem) - defer b.Release() - - b.AppendValues([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil) - return b.NewFloat64Array() - }() - defer col2.Release() - - col2_1 := func() arrow.Array { - b := array.NewFloat64Builder(mem) - defer b.Release() - - b.AppendValues([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil) - return b.NewFloat64Array() - }() - defer col2_1.Release() - - cols := []arrow.Array{col1, col2} - rec := array.NewRecord(schema, cols, -1) - defer rec.Release() - - rec.Retain() - rec.Release() - - if got, want := rec.Schema(), schema; !got.Equal(want) { - t.Fatalf("invalid schema: got=%#v, want=%#v", got, want) - } - - if got, want := rec.NumRows(), int64(10); got != want { - t.Fatalf("invalid number of rows: got=%d, want=%d", got, want) - } - if got, want := rec.NumCols(), int64(2); got != want { - t.Fatalf("invalid number of columns: got=%d, want=%d", got, want) - } - if got, want := rec.Columns()[0], cols[0]; got != want { - t.Fatalf("invalid column: got=%q, want=%q", got, want) - } - if got, want := rec.Column(0), cols[0]; got != want { - t.Fatalf("invalid column: got=%q, want=%q", got, want) - } - if got, want := rec.ColumnName(0), schema.Field(0).Name; got != want { - t.Fatalf("invalid column name: got=%q, want=%q", got, want) - } - if _, err := rec.SetColumn(0, col2_1); err == nil { - t.Fatalf("expected an error") - } - newRec, err := rec.SetColumn(1, col2_1) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - defer newRec.Release() - if !reflect.DeepEqual(newRec.Column(1), col2_1) { - t.Fatalf("invalid column: got=%q, want=%q", rec.Column(1), col2_1) - } - - for _, tc := range []struct { - i, j int64 - err error - }{ - {i: 0, j: 10, err: nil}, - {i: 1, j: 10, err: nil}, - {i: 1, j: 9, err: nil}, - {i: 0, j: 0, err: nil}, - {i: 1, j: 1, err: nil}, - {i: 10, j: 10, err: nil}, - {i: 1, j: 0, err: fmt.Errorf("arrow/array: index out of range")}, - {i: 1, j: 11, err: fmt.Errorf("arrow/array: index out of range")}, - } { - t.Run(fmt.Sprintf("slice-%02d-%02d", tc.i, tc.j), func(t *testing.T) { - if tc.err != nil { - defer func() { - e := recover() - if e == nil { - t.Fatalf("expected an error %q", tc.err) - } - switch err := e.(type) { - case string: - if err != tc.err.Error() { - t.Fatalf("invalid panic message. got=%q, want=%q", err, tc.err) - } - case error: - if err.Error() != tc.err.Error() { - t.Fatalf("invalid panic message. got=%q, want=%q", err, tc.err) - } - default: - t.Fatalf("invalid type for panic message: %T (err=%v)", err, err) - } - }() - } - sub := rec.NewSlice(tc.i, tc.j) - defer sub.Release() - - if got, want := sub.NumRows(), tc.j-tc.i; got != want { - t.Fatalf("invalid rec-slice number of rows: got=%d, want=%d", got, want) - } - }) - } - - for _, tc := range []struct { - schema *arrow.Schema - cols []arrow.Array - rows int64 - err error - }{ - { - schema: schema, - cols: nil, - rows: 0, - }, - { - schema: schema, - cols: cols[:1], - rows: 0, - err: fmt.Errorf("arrow/array: number of columns/fields mismatch"), - }, - { - schema: arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - }, - nil, - ), - cols: cols, - rows: 0, - err: fmt.Errorf("arrow/array: number of columns/fields mismatch"), - }, - { - schema: arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - {Name: "f2-f64", Type: arrow.PrimitiveTypes.Int32}, - }, - nil, - ), - cols: cols, - rows: 0, - err: fmt.Errorf(`arrow/array: column "f2-f64" type mismatch: got=float64, want=int32`), - }, - { - schema: schema, - cols: cols, - rows: 11, - err: fmt.Errorf(`arrow/array: mismatch number of rows in column "f1-i32": got=10, want=11`), - }, - { - schema: schema, - cols: cols, - rows: 10, - err: nil, - }, - { - schema: schema, - cols: cols, - rows: 3, - err: nil, - }, - { - schema: schema, - cols: cols, - rows: 0, - err: nil, - }, - } { - t.Run("", func(t *testing.T) { - if tc.err != nil { - defer func() { - e := recover() - if e == nil { - t.Fatalf("expected an error %q", tc.err) - } - switch err := e.(type) { - case string: - if err != tc.err.Error() { - t.Fatalf("invalid panic message. got=%q, want=%q", err, tc.err) - } - case error: - if err.Error() != tc.err.Error() { - t.Fatalf("invalid panic message. got=%q, want=%q", err, tc.err) - } - default: - t.Fatalf("invalid type for panic message: %T (err=%v)", err, err) - } - }() - } - rec := array.NewRecord(tc.schema, tc.cols, tc.rows) - defer rec.Release() - if got, want := rec.NumRows(), tc.rows; got != want { - t.Fatalf("invalid number of rows: got=%d, want=%d", got, want) - } - }) - } -} - -func TestRecordReader(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - schema := arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - {Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64}, - }, - nil, - ) - rec1 := func() arrow.Record { - col1 := func() arrow.Array { - ib := array.NewInt32Builder(mem) - defer ib.Release() - - ib.AppendValues([]int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil) - return ib.NewInt32Array() - }() - defer col1.Release() - - col2 := func() arrow.Array { - b := array.NewFloat64Builder(mem) - defer b.Release() - - b.AppendValues([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil) - return b.NewFloat64Array() - }() - defer col2.Release() - - cols := []arrow.Array{col1, col2} - return array.NewRecord(schema, cols, -1) - }() - defer rec1.Release() - - rec2 := func() arrow.Record { - col1 := func() arrow.Array { - ib := array.NewInt32Builder(mem) - defer ib.Release() - - ib.AppendValues([]int32{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, nil) - return ib.NewInt32Array() - }() - defer col1.Release() - - col2 := func() arrow.Array { - b := array.NewFloat64Builder(mem) - defer b.Release() - - b.AppendValues([]float64{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, nil) - return b.NewFloat64Array() - }() - defer col2.Release() - - cols := []arrow.Array{col1, col2} - return array.NewRecord(schema, cols, -1) - }() - defer rec2.Release() - - recs := []arrow.Record{rec1, rec2} - itr, err := array.NewRecordReader(schema, recs) - if err != nil { - t.Fatal(err) - } - defer itr.Release() - - itr.Retain() - itr.Release() - - if got, want := itr.Schema(), schema; !got.Equal(want) { - t.Fatalf("invalid schema. got=%#v, want=%#v", got, want) - } - - n := 0 - for itr.Next() { - n++ - if got, want := itr.Record(), recs[n-1]; !reflect.DeepEqual(got, want) { - t.Fatalf("itr[%d], invalid record. got=%#v, want=%#v", n-1, got, want) - } - } - if err := itr.Err(); err != nil { - t.Fatalf("itr error: %#v", err) - } - - if n != len(recs) { - t.Fatalf("invalid number of iterations. got=%d, want=%d", n, len(recs)) - } - - for _, tc := range []struct { - name string - schema *arrow.Schema - err error - }{ - { - name: "mismatch-name", - schema: arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - {Name: "f2-XXX", Type: arrow.PrimitiveTypes.Float64}, - }, - nil, - ), - err: fmt.Errorf("arrow/array: mismatch schema"), - }, - { - name: "mismatch-type", - schema: arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - {Name: "f2-f64", Type: arrow.PrimitiveTypes.Int64}, - }, - nil, - ), - err: fmt.Errorf("arrow/array: mismatch schema"), - }, - } { - t.Run(tc.name, func(t *testing.T) { - itr, err := array.NewRecordReader(tc.schema, recs) - if itr != nil { - itr.Release() - } - if err == nil { - t.Fatalf("expected an error: %v", tc.err) - } - if !assert.Equal(t, tc.err, err) { - t.Fatalf("invalid error: got=%v, want=%v", err, tc.err) - } - }) - } -} - -func TestRecordBuilderRespectsFixedSizeArrayNullability(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - cases := []struct { - assertion string - fields []arrow.Field - }{ - { - "nullable", - []arrow.Field{{Name: "data", Type: arrow.FixedSizeListOf(1, arrow.PrimitiveTypes.Int32)}}, - }, - { - "not nullable", - []arrow.Field{{Name: "data", Type: arrow.FixedSizeListOfNonNullable(1, arrow.PrimitiveTypes.Int32)}}, - }, - } - for _, c := range cases { - t.Run(c.assertion, func(t *testing.T) { - schema := arrow.NewSchema(c.fields, nil) - b := array.NewRecordBuilder(mem, schema) - defer b.Release() - - lb := b.Field(0).(*array.FixedSizeListBuilder) - lb.Append(true) - - vb := lb.ValueBuilder().(*array.Int32Builder) - vb.Append(10) - - rec := b.NewRecord() - defer rec.Release() - - if got, want := rec.Column(0).String(), "[[10]]"; got != want { - t.Fatalf("invalid record: got=%q, want=%q", got, want) - } - }) - } -} - -func TestRecordBuilder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - mapDt := arrow.MapOf(arrow.BinaryTypes.String, arrow.BinaryTypes.String) - mapDt.KeysSorted = true - mapDt.SetItemNullable(false) - schema := arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - {Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64}, - {Name: "map", Type: mapDt}, - }, - nil, - ) - - b := array.NewRecordBuilder(mem, schema) - defer b.Release() - - b.Retain() - b.Release() - - b.Field(0).(*array.Int32Builder).AppendValues([]int32{1, 2, 3}, nil) - b.Field(0).(*array.Int32Builder).AppendValues([]int32{4, 5}, nil) - b.Field(1).(*array.Float64Builder).AppendValues([]float64{1, 2, 3, 4, 5}, nil) - mb := b.Field(2).(*array.MapBuilder) - for i := 0; i < 5; i++ { - mb.Append(true) - - if i%3 == 0 { - mb.KeyBuilder().(*array.StringBuilder).AppendValues([]string{fmt.Sprint(i), "2", "3"}, nil) - mb.ItemBuilder().(*array.StringBuilder).AppendValues([]string{"a", "b", "c"}, nil) - } - } - - rec := b.NewRecord() - defer rec.Release() - - if got, want := rec.Schema(), schema; !got.Equal(want) { - t.Fatalf("invalid schema: got=%#v, want=%#v", got, want) - } - - if got, want := rec.NumRows(), int64(5); got != want { - t.Fatalf("invalid number of rows: got=%d, want=%d", got, want) - } - if got, want := rec.NumCols(), int64(3); got != want { - t.Fatalf("invalid number of columns: got=%d, want=%d", got, want) - } - if got, want := rec.ColumnName(0), schema.Field(0).Name; got != want { - t.Fatalf("invalid column name: got=%q, want=%q", got, want) - } - if got, want := rec.Column(2).String(), `[{["0" "2" "3"] ["a" "b" "c"]} {[] []} {[] []} {["3" "2" "3"] ["a" "b" "c"]} {[] []}]`; got != want { - t.Fatalf("invalid column name: got=%q, want=%q", got, want) - } -} - -type testMessage struct { - Foo *testMessageFoo - Bars []*testMessageBar -} - -func (m *testMessage) Reset() { *m = testMessage{} } - -func (m *testMessage) GetFoo() *testMessageFoo { - if m != nil { - return m.Foo - } - return nil -} - -func (m *testMessage) GetBars() []*testMessageBar { - if m != nil { - return m.Bars - } - return nil -} - -type testMessageFoo struct { - A int32 - B []uint32 -} - -func (m *testMessageFoo) Reset() { *m = testMessageFoo{} } - -func (m *testMessageFoo) GetA() int32 { - if m != nil { - return m.A - } - return 0 -} - -func (m *testMessageFoo) GetB() []uint32 { - if m != nil { - return m.B - } - return nil -} - -type testMessageBar struct { - C int64 - D []uint64 -} - -func (m *testMessageBar) Reset() { *m = testMessageBar{} } - -func (m *testMessageBar) GetC() int64 { - if m != nil { - return m.C - } - return 0 -} - -func (m *testMessageBar) GetD() []uint64 { - if m != nil { - return m.D - } - return nil -} - -var testMessageSchema = arrow.NewSchema( - []arrow.Field{ - {Name: "foo", Type: arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int32}, - arrow.Field{Name: "b", Type: arrow.ListOf( - arrow.PrimitiveTypes.Uint32, - )}, - )}, - {Name: "bars", Type: arrow.ListOf( - arrow.StructOf( - arrow.Field{Name: "c", Type: arrow.PrimitiveTypes.Int64}, - arrow.Field{Name: "d", Type: arrow.ListOf( - arrow.PrimitiveTypes.Uint64, - )}, - ), - )}, - }, - nil, -) - -func (m *testMessage) Fill(rec arrow.Record, row int) error { - m.Reset() - - // foo - if 0 < rec.NumCols() { - src0 := rec.Column(0).Data() - typedSrc0 := array.NewStructData(src0) - defer typedSrc0.Release() - if typedSrc0.IsValid(row) { - m0 := &testMessageFoo{} - { - - // a - if 0 < typedSrc0.NumField() { - src0_0 := typedSrc0.Field(0).Data() - typedSrc0_0 := array.NewInt32Data(src0_0) - defer typedSrc0_0.Release() - m0.A = typedSrc0_0.Value(row) - } - - // b - if 1 < typedSrc0.NumField() { - src0_1 := typedSrc0.Field(1).Data() - listSrc0_1 := array.NewListData(src0_1) - defer listSrc0_1.Release() - if listSrc0_1.IsValid(row) { - typedSrc0_1 := array.NewUint32Data(listSrc0_1.ListValues().Data()) - typedSrc0_1.Release() - start0_1 := int(listSrc0_1.Offsets()[row]) - end0_1 := int(listSrc0_1.Offsets()[row+1]) - for row := start0_1; row < end0_1; row++ { - m0.B = append(m0.B, typedSrc0_1.Value(row)) - } - } - } - } - m.Foo = m0 - } - } - - // bars - if 1 < rec.NumCols() { - src1 := rec.Column(1).Data() - listSrc1 := array.NewListData(src1) - defer listSrc1.Release() - if listSrc1.IsValid(row) { - typedSrc1 := array.NewStructData(listSrc1.ListValues().Data()) - defer typedSrc1.Release() - start1 := int(listSrc1.Offsets()[row]) - end1 := int(listSrc1.Offsets()[row+1]) - for row := start1; row < end1; row++ { - if typedSrc1.IsValid(row) { - m1 := &testMessageBar{} - { - - // c - if 0 < typedSrc1.NumField() { - src1_0 := typedSrc1.Field(0).Data() - typedSrc1_0 := array.NewInt64Data(src1_0) - defer typedSrc1_0.Release() - m1.C = typedSrc1_0.Value(row) - } - - // d - if 1 < typedSrc1.NumField() { - src1_1 := typedSrc1.Field(1).Data() - listSrc1_1 := array.NewListData(src1_1) - defer listSrc1_1.Release() - if listSrc1_1.IsValid(row) { - typedSrc1_1 := array.NewUint64Data(listSrc1_1.ListValues().Data()) - defer typedSrc1_1.Release() - start1_1 := int(listSrc1_1.Offsets()[row]) - end1_1 := int(listSrc1_1.Offsets()[row+1]) - for row := start1_1; row < end1_1; row++ { - m1.D = append(m1.D, typedSrc1_1.Value(row)) - } - } - } - } - m.Bars = append(m.Bars, m1) - } else { - m.Bars = append(m.Bars, nil) - } - } - } - } - return nil -} - -func newTestMessageArrowRecordBuilder(mem memory.Allocator) *testMessageArrowRecordBuilder { - return &testMessageArrowRecordBuilder{ - rb: array.NewRecordBuilder(mem, testMessageSchema), - } -} - -type testMessageArrowRecordBuilder struct { - rb *array.RecordBuilder -} - -func (b *testMessageArrowRecordBuilder) Build() arrow.Record { - return b.rb.NewRecord() -} - -func (b *testMessageArrowRecordBuilder) Release() { - b.rb.Release() -} - -func (b *testMessageArrowRecordBuilder) Append(m *testMessage) { - - // foo - { - builder0 := b.rb.Field(0) - v0 := m.GetFoo() - valueBuilder0 := builder0.(*array.StructBuilder) - if v0 == nil { - valueBuilder0.AppendNull() - } else { - valueBuilder0.Append(true) - - // a - { - v0_0 := v0.GetA() - builder0_0 := valueBuilder0.FieldBuilder(0) - valueBuilder0_0 := builder0_0.(*array.Int32Builder) - valueBuilder0_0.Append(v0_0) - } - - // b - { - v0_1 := v0.GetB() - builder0_1 := valueBuilder0.FieldBuilder(1) - listBuilder0_1 := builder0_1.(*array.ListBuilder) - if len(v0_1) == 0 { - listBuilder0_1.AppendNull() - } else { - listBuilder0_1.Append(true) - valueBuilder0_1 := listBuilder0_1.ValueBuilder().(*array.Uint32Builder) - for _, item := range v0_1 { - valueBuilder0_1.Append(item) - } - } - } - } - } - - // bars - { - builder1 := b.rb.Field(1) - v1 := m.GetBars() - listBuilder1 := builder1.(*array.ListBuilder) - if len(v1) == 0 { - listBuilder1.AppendNull() - } else { - listBuilder1.Append(true) - valueBuilder1 := listBuilder1.ValueBuilder().(*array.StructBuilder) - for _, item := range v1 { - if item == nil { - valueBuilder1.AppendNull() - } else { - valueBuilder1.Append(true) - - // c - { - v1_0 := item.GetC() - builder1_0 := valueBuilder1.FieldBuilder(0) - valueBuilder1_0 := builder1_0.(*array.Int64Builder) - valueBuilder1_0.Append(v1_0) - } - - // d - { - v1_1 := item.GetD() - builder1_1 := valueBuilder1.FieldBuilder(1) - listBuilder1_1 := builder1_1.(*array.ListBuilder) - if len(v1_1) == 0 { - listBuilder1_1.AppendNull() - } else { - listBuilder1_1.Append(true) - valueBuilder1_1 := listBuilder1_1.ValueBuilder().(*array.Uint64Builder) - for _, item := range v1_1 { - valueBuilder1_1.Append(item) - } - } - } - } - } - } - } -} - -func TestRecordBuilderMessages(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := newTestMessageArrowRecordBuilder(mem) - defer b.Release() - - var msgs []*testMessage - for i := 0; i < 1000; i++ { - msg := &testMessage{ - Foo: &testMessageFoo{ - A: int32(i), - B: []uint32{2, 3, 4, 5, 6, 7, 8, 9}, - }, - Bars: []*testMessageBar{ - { - C: 11, - D: []uint64{12, 13, 14}, - }, - { - C: 15, - D: []uint64{16, 17, 18, 19}, - }, - nil, - { - C: 20, - D: []uint64{21}, - }, - }, - } - msgs = append(msgs, msg) - b.Append(msg) - } - - rec := b.Build() - defer rec.Release() - - var got testMessage - for i := 0; i < 1000; i++ { - got.Fill(rec, i) - if !reflect.DeepEqual(&got, msgs[i]) { - t.Fatalf("row[%d], invalid record. got=%#v, want=%#v", i, &got, msgs[i]) - } - } -} diff --git a/go/arrow/array/string.go b/go/arrow/array/string.go deleted file mode 100644 index 88b4568ad5e84..0000000000000 --- a/go/arrow/array/string.go +++ /dev/null @@ -1,718 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "fmt" - "reflect" - "strings" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -type StringLike interface { - arrow.Array - Value(int) string - ValueLen(int) int -} - -// String represents an immutable sequence of variable-length UTF-8 strings. -type String struct { - array - offsets []int32 - values string -} - -// NewStringData constructs a new String array from data. -func NewStringData(data arrow.ArrayData) *String { - a := &String{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the String with a different set of Data. -func (a *String) Reset(data arrow.ArrayData) { - a.setData(data.(*Data)) -} - -// Value returns the slice at index i. This value should not be mutated. -func (a *String) Value(i int) string { - i = i + a.array.data.offset - return a.values[a.offsets[i]:a.offsets[i+1]] -} - -func (a *String) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return a.Value(i) -} - -// ValueOffset returns the offset of the value at index i. -func (a *String) ValueOffset(i int) int { - if i < 0 || i > a.array.data.length { - panic("arrow/array: index out of range") - } - return int(a.offsets[i+a.array.data.offset]) -} - -func (a *String) ValueOffset64(i int) int64 { - return int64(a.ValueOffset(i)) -} - -func (a *String) ValueLen(i int) int { - if i < 0 || i >= a.array.data.length { - panic("arrow/array: index out of range") - } - beg := a.array.data.offset + i - return int(a.offsets[beg+1] - a.offsets[beg]) -} - -func (a *String) ValueOffsets() []int32 { - beg := a.array.data.offset - end := beg + a.array.data.length + 1 - return a.offsets[beg:end] -} - -func (a *String) ValueBytes() []byte { - beg := a.array.data.offset - end := beg + a.array.data.length - if a.array.data.buffers[2] != nil { - return a.array.data.buffers[2].Bytes()[a.offsets[beg]:a.offsets[end]] - } - return nil -} - -func (a *String) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - o.WriteString(" ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%q", a.Value(i)) - } - } - o.WriteString("]") - return o.String() -} - -func (a *String) setData(data *Data) { - if len(data.buffers) != 3 { - panic("arrow/array: len(data.buffers) != 3") - } - - a.array.setData(data) - - if vdata := data.buffers[2]; vdata != nil { - b := vdata.Bytes() - a.values = *(*string)(unsafe.Pointer(&b)) - } - - if offsets := data.buffers[1]; offsets != nil { - a.offsets = arrow.Int32Traits.CastFromBytes(offsets.Bytes()) - } - - if a.array.data.length < 1 { - return - } - - expNumOffsets := a.array.data.offset + a.array.data.length + 1 - if len(a.offsets) < expNumOffsets { - panic(fmt.Errorf("arrow/array: string offset buffer must have at least %d values", expNumOffsets)) - } - - if int(a.offsets[expNumOffsets-1]) > len(a.values) { - panic("arrow/array: string offsets out of bounds of data buffer") - } -} - -func (a *String) GetOneForMarshal(i int) interface{} { - if a.IsValid(i) { - return a.Value(i) - } - return nil -} - -func (a *String) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if a.IsValid(i) { - vals[i] = a.Value(i) - } else { - vals[i] = nil - } - } - return json.Marshal(vals) -} - -func arrayEqualString(left, right *String) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// String represents an immutable sequence of variable-length UTF-8 strings. -type LargeString struct { - array - offsets []int64 - values string -} - -// NewStringData constructs a new String array from data. -func NewLargeStringData(data arrow.ArrayData) *LargeString { - a := &LargeString{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the String with a different set of Data. -func (a *LargeString) Reset(data arrow.ArrayData) { - a.setData(data.(*Data)) -} - -// Value returns the slice at index i. This value should not be mutated. -func (a *LargeString) Value(i int) string { - i = i + a.array.data.offset - return a.values[a.offsets[i]:a.offsets[i+1]] -} - -func (a *LargeString) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return a.Value(i) -} - -// ValueOffset returns the offset of the value at index i. -func (a *LargeString) ValueOffset(i int) int64 { - if i < 0 || i > a.array.data.length { - panic("arrow/array: index out of range") - } - return a.offsets[i+a.array.data.offset] -} - -func (a *LargeString) ValueOffset64(i int) int64 { - return a.ValueOffset(i) -} - -func (a *LargeString) ValueLen(i int) int { - if i < 0 || i >= a.array.data.length { - panic("arrow/array: index out of range") - } - beg := a.array.data.offset + i - return int(a.offsets[beg+1] - a.offsets[beg]) -} - -func (a *LargeString) ValueOffsets() []int64 { - beg := a.array.data.offset - end := beg + a.array.data.length + 1 - return a.offsets[beg:end] -} - -func (a *LargeString) ValueBytes() []byte { - beg := a.array.data.offset - end := beg + a.array.data.length - if a.array.data.buffers[2] != nil { - return a.array.data.buffers[2].Bytes()[a.offsets[beg]:a.offsets[end]] - } - return nil -} - -func (a *LargeString) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - o.WriteString(" ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%q", a.Value(i)) - } - } - o.WriteString("]") - return o.String() -} - -func (a *LargeString) setData(data *Data) { - if len(data.buffers) != 3 { - panic("arrow/array: len(data.buffers) != 3") - } - - a.array.setData(data) - - if vdata := data.buffers[2]; vdata != nil { - b := vdata.Bytes() - a.values = *(*string)(unsafe.Pointer(&b)) - } - - if offsets := data.buffers[1]; offsets != nil { - a.offsets = arrow.Int64Traits.CastFromBytes(offsets.Bytes()) - } - - if a.array.data.length < 1 { - return - } - - expNumOffsets := a.array.data.offset + a.array.data.length + 1 - if len(a.offsets) < expNumOffsets { - panic(fmt.Errorf("arrow/array: string offset buffer must have at least %d values", expNumOffsets)) - } - - if int(a.offsets[expNumOffsets-1]) > len(a.values) { - panic("arrow/array: string offsets out of bounds of data buffer") - } -} - -func (a *LargeString) GetOneForMarshal(i int) interface{} { - if a.IsValid(i) { - return a.Value(i) - } - return nil -} - -func (a *LargeString) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - vals[i] = a.GetOneForMarshal(i) - } - return json.Marshal(vals) -} - -func arrayEqualLargeString(left, right *LargeString) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -type StringView struct { - array - values []arrow.ViewHeader - dataBuffers []*memory.Buffer -} - -func NewStringViewData(data arrow.ArrayData) *StringView { - a := &StringView{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the String with a different set of Data. -func (a *StringView) Reset(data arrow.ArrayData) { - a.setData(data.(*Data)) -} - -func (a *StringView) setData(data *Data) { - if len(data.buffers) < 2 { - panic("len(data.buffers) < 2") - } - a.array.setData(data) - - if valueData := data.buffers[1]; valueData != nil { - a.values = arrow.ViewHeaderTraits.CastFromBytes(valueData.Bytes()) - } - - a.dataBuffers = data.buffers[2:] -} - -func (a *StringView) ValueHeader(i int) *arrow.ViewHeader { - if i < 0 || i >= a.array.data.length { - panic("arrow/array: index out of range") - } - return &a.values[a.array.data.offset+i] -} - -func (a *StringView) Value(i int) string { - s := a.ValueHeader(i) - if s.IsInline() { - return s.InlineString() - } - start := s.BufferOffset() - buf := a.dataBuffers[s.BufferIndex()] - value := buf.Bytes()[start : start+int32(s.Len())] - return *(*string)(unsafe.Pointer(&value)) -} - -func (a *StringView) ValueLen(i int) int { - s := a.ValueHeader(i) - return s.Len() -} - -func (a *StringView) String() string { - var o strings.Builder - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - o.WriteString(" ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(&o, "%q", a.Value(i)) - } - } - o.WriteString("]") - return o.String() -} - -func (a *StringView) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return a.Value(i) -} - -func (a *StringView) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - return a.Value(i) -} - -func (a *StringView) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - vals[i] = a.GetOneForMarshal(i) - } - return json.Marshal(vals) -} - -func arrayEqualStringView(left, right *StringView) bool { - leftBufs, rightBufs := left.dataBuffers, right.dataBuffers - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if !left.ValueHeader(i).Equals(leftBufs, right.ValueHeader(i), rightBufs) { - return false - } - } - return true -} - -// A StringBuilder is used to build a String array using the Append methods. -type StringBuilder struct { - *BinaryBuilder -} - -// NewStringBuilder creates a new StringBuilder. -func NewStringBuilder(mem memory.Allocator) *StringBuilder { - b := &StringBuilder{ - BinaryBuilder: NewBinaryBuilder(mem, arrow.BinaryTypes.String), - } - return b -} - -func (b *StringBuilder) Type() arrow.DataType { - return arrow.BinaryTypes.String -} - -// Append appends a string to the builder. -func (b *StringBuilder) Append(v string) { - b.BinaryBuilder.Append([]byte(v)) -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *StringBuilder) AppendValues(v []string, valid []bool) { - b.BinaryBuilder.AppendStringValues(v, valid) -} - -// Value returns the string at index i. -func (b *StringBuilder) Value(i int) string { - return string(b.BinaryBuilder.Value(i)) -} - -// NewArray creates a String array from the memory buffers used by the builder and resets the StringBuilder -// so it can be used to build a new array. -func (b *StringBuilder) NewArray() arrow.Array { - return b.NewStringArray() -} - -// NewStringArray creates a String array from the memory buffers used by the builder and resets the StringBuilder -// so it can be used to build a new array. -func (b *StringBuilder) NewStringArray() (a *String) { - data := b.newData() - a = NewStringData(data) - data.Release() - return -} - -func (b *StringBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - case string: - b.Append(v) - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(v), - Type: reflect.TypeOf(string("")), - Offset: dec.InputOffset(), - } - } - return nil -} - -func (b *StringBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *StringBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("string builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -// A LargeStringBuilder is used to build a LargeString array using the Append methods. -// LargeString is for when you need the offset buffer to be 64-bit integers -// instead of 32-bit integers. -type LargeStringBuilder struct { - *BinaryBuilder -} - -// NewStringBuilder creates a new StringBuilder. -func NewLargeStringBuilder(mem memory.Allocator) *LargeStringBuilder { - b := &LargeStringBuilder{ - BinaryBuilder: NewBinaryBuilder(mem, arrow.BinaryTypes.LargeString), - } - return b -} - -func (b *LargeStringBuilder) Type() arrow.DataType { return arrow.BinaryTypes.LargeString } - -// Append appends a string to the builder. -func (b *LargeStringBuilder) Append(v string) { - b.BinaryBuilder.Append([]byte(v)) -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *LargeStringBuilder) AppendValues(v []string, valid []bool) { - b.BinaryBuilder.AppendStringValues(v, valid) -} - -// Value returns the string at index i. -func (b *LargeStringBuilder) Value(i int) string { - return string(b.BinaryBuilder.Value(i)) -} - -// NewArray creates a String array from the memory buffers used by the builder and resets the StringBuilder -// so it can be used to build a new array. -func (b *LargeStringBuilder) NewArray() arrow.Array { - return b.NewLargeStringArray() -} - -// NewStringArray creates a String array from the memory buffers used by the builder and resets the StringBuilder -// so it can be used to build a new array. -func (b *LargeStringBuilder) NewLargeStringArray() (a *LargeString) { - data := b.newData() - a = NewLargeStringData(data) - data.Release() - return -} - -func (b *LargeStringBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - case string: - b.Append(v) - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(v), - Type: reflect.TypeOf(string("")), - Offset: dec.InputOffset(), - } - } - return nil -} - -func (b *LargeStringBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *LargeStringBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("string builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type StringViewBuilder struct { - *BinaryViewBuilder -} - -func NewStringViewBuilder(mem memory.Allocator) *StringViewBuilder { - bldr := &StringViewBuilder{ - BinaryViewBuilder: NewBinaryViewBuilder(mem), - } - bldr.dtype = arrow.BinaryTypes.StringView - return bldr -} - -func (b *StringViewBuilder) Append(v string) { - b.BinaryViewBuilder.AppendString(v) -} - -func (b *StringViewBuilder) AppendValues(v []string, valid []bool) { - b.BinaryViewBuilder.AppendStringValues(v, valid) -} - -func (b *StringViewBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case string: - b.Append(v) - case []byte: - b.BinaryViewBuilder.Append(v) - case nil: - b.AppendNull() - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf([]byte{}), - Offset: dec.InputOffset(), - } - } - return nil -} - -func (b *StringViewBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *StringViewBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary view builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -func (b *StringViewBuilder) NewArray() arrow.Array { - return b.NewStringViewArray() -} - -func (b *StringViewBuilder) NewStringViewArray() (a *StringView) { - data := b.newData() - a = NewStringViewData(data) - data.Release() - return -} - -type StringLikeBuilder interface { - Builder - Append(string) - AppendValues([]string, []bool) - UnsafeAppend([]byte) - ReserveData(int) -} - -var ( - _ arrow.Array = (*String)(nil) - _ arrow.Array = (*LargeString)(nil) - _ arrow.Array = (*StringView)(nil) - _ Builder = (*StringBuilder)(nil) - _ Builder = (*LargeStringBuilder)(nil) - _ Builder = (*StringViewBuilder)(nil) - _ StringLikeBuilder = (*StringBuilder)(nil) - _ StringLikeBuilder = (*LargeStringBuilder)(nil) - _ StringLikeBuilder = (*StringViewBuilder)(nil) - _ StringLike = (*String)(nil) - _ StringLike = (*LargeString)(nil) - _ StringLike = (*StringView)(nil) -) diff --git a/go/arrow/array/string_test.go b/go/arrow/array/string_test.go deleted file mode 100644 index efbe51edd1a03..0000000000000 --- a/go/arrow/array/string_test.go +++ /dev/null @@ -1,794 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "bytes" - "reflect" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestStringArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - var ( - want = []string{"hello", "世界", "", "bye"} - valids = []bool{true, true, false, true} - offsets = []int32{0, 5, 11, 11, 14} - ) - - sb := array.NewStringBuilder(mem) - defer sb.Release() - - sb.Retain() - sb.Release() - - assert.NoError(t, sb.AppendValueFromString(want[0])) - sb.AppendValues(want[1:2], nil) - - sb.AppendNull() - sb.Append(want[3]) - - if got, want := sb.Len(), len(want); got != want { - t.Fatalf("invalid len: got=%d, want=%d", got, want) - } - - if got, want := sb.NullN(), 1; got != want { - t.Fatalf("invalid nulls: got=%d, want=%d", got, want) - } - - arr := sb.NewStringArray() - defer arr.Release() - - arr.Retain() - arr.Release() - - assert.Equal(t, "hello", arr.ValueStr(0)) - - if got, want := arr.Len(), len(want); got != want { - t.Fatalf("invalid len: got=%d, want=%d", got, want) - } - - if got, want := arr.NullN(), 1; got != want { - t.Fatalf("invalid nulls: got=%d, want=%d", got, want) - } - - for i := range want { - if arr.IsNull(i) != !valids[i] { - t.Fatalf("arr[%d]-validity: got=%v want=%v", i, !arr.IsNull(i), valids[i]) - } - switch { - case arr.IsNull(i): - default: - got := arr.Value(i) - if got != want[i] { - t.Fatalf("arr[%d]: got=%q, want=%q", i, got, want[i]) - } - } - - if got, want := arr.ValueOffset(i), int(offsets[i]); got != want { - t.Fatalf("arr-offset-beg[%d]: got=%d, want=%d", i, got, want) - } - if got, want := arr.ValueOffset(i+1), int(offsets[i+1]); got != want { - t.Fatalf("arr-offset-end[%d]: got=%d, want=%d", i+1, got, want) - } - } - - if !reflect.DeepEqual(offsets, arr.ValueOffsets()) { - t.Fatalf("ValueOffsets got=%v, want=%v", arr.ValueOffsets(), offsets) - } - - sub := array.MakeFromData(arr.Data()) - defer sub.Release() - - if sub.DataType().ID() != arrow.STRING { - t.Fatalf("invalid type: got=%q, want=string", sub.DataType().Name()) - } - - if _, ok := sub.(*array.String); !ok { - t.Fatalf("could not type-assert to array.String") - } - - if got, want := arr.String(), `["hello" "世界" (null) "bye"]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - if !bytes.Equal([]byte(`hello世界bye`), arr.ValueBytes()) { - t.Fatalf("got=%q, want=%q", string(arr.ValueBytes()), `hello世界bye`) - } - - slice := array.NewSliceData(arr.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.String) - if !ok { - t.Fatalf("could not type-assert to array.String") - } - - if got, want := v.String(), `[(null) "bye"]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - if !bytes.Equal(v.ValueBytes(), []byte("bye")) { - t.Fatalf("got=%q, want=%q", string(v.ValueBytes()), "bye") - } - - for i := 0; i < v.Len(); i++ { - if got, want := v.ValueOffset(0), int(offsets[i+slice.Offset()]); got != want { - t.Fatalf("val-offset-with-offset[%d]: got=%q, want=%q", i, got, want) - } - } - - if !reflect.DeepEqual(offsets[2:5], v.ValueOffsets()) { - t.Fatalf("ValueOffsets got=%v, want=%v", v.ValueOffsets(), offsets[2:5]) - } -} - -func TestStringBuilder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - want := []string{"hello", "世界", "", "bye"} - - ab := array.NewStringBuilder(mem) - defer ab.Release() - - stringValues := func(a *array.String) []string { - vs := make([]string, a.Len()) - for i := range vs { - vs[i] = a.Value(i) - } - return vs - } - - ab.AppendValues([]string{}, nil) - a := ab.NewStringArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewStringArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]string{}, nil) - ab.AppendValues(want, nil) - a = ab.NewStringArray() - assert.Equal(t, want, stringValues(a)) - a.Release() - - ab.AppendValues(want, nil) - ab.AppendValues([]string{}, nil) - a = ab.NewStringArray() - assert.Equal(t, want, stringValues(a)) - a.Release() -} - -// TestStringReset tests the Reset() method on the String type by creating two different Strings and then -// resetting the contents of string2 with the values from string1. -func TestStringReset(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - sb1 := array.NewStringBuilder(mem) - sb2 := array.NewStringBuilder(mem) - defer sb1.Release() - defer sb2.Release() - - sb1.Append("string1") - sb1.AppendNull() - - var ( - string1 = sb1.NewStringArray() - string2 = sb2.NewStringArray() - - string1Data = string1.Data() - ) - string2.Reset(string1Data) - - assert.Equal(t, "string1", string2.Value(0)) -} - -func TestStringInvalidOffsets(t *testing.T) { - const expectedPanic = "arrow/array: string offsets out of bounds of data buffer" - - makeBuffers := func(valids []bool, offsets []int32, data string) []*memory.Buffer { - offsetBuf := memory.NewBufferBytes(arrow.Int32Traits.CastToBytes(offsets)) - var nullBufBytes []byte - var nullBuf *memory.Buffer - if valids != nil { - nullBufBytes = make([]byte, bitutil.BytesForBits(int64(len(valids)))) - for i, v := range valids { - bitutil.SetBitTo(nullBufBytes, i, v) - } - nullBuf = memory.NewBufferBytes(nullBufBytes) - } - return []*memory.Buffer{nullBuf, offsetBuf, memory.NewBufferBytes([]byte(data))} - } - - assert.NotPanics(t, func() { - buffers := makeBuffers(nil, []int32{}, "") - array.NewStringData(array.NewData(arrow.BinaryTypes.String, 0, buffers, nil, 0, 0)) - }, "empty array with no offsets") - - assert.NotPanics(t, func() { - buffers := makeBuffers(nil, []int32{0, 5}, "") - array.NewStringData(array.NewData(arrow.BinaryTypes.String, 0, buffers, nil, 0, 0)) - }, "empty array, offsets ignored") - - assert.NotPanics(t, func() { - buffers := makeBuffers(nil, []int32{0, 3, 4, 9}, "oooabcdef") - array.NewStringData(array.NewData(arrow.BinaryTypes.String, 1, buffers, nil, 0, 2)) - }, "data has offset and value offsets are valid") - - assert.NotPanics(t, func() { - buffers := makeBuffers(nil, []int32{0, 3, 6, 9, 9}, "012345678") - arr := array.NewStringData(array.NewData(arrow.BinaryTypes.String, 4, buffers, nil, 0, 0)) - if assert.Equal(t, 4, arr.Len()) && assert.Zero(t, arr.NullN()) { - assert.Equal(t, "012", arr.Value(0)) - assert.Equal(t, "345", arr.Value(1)) - assert.Equal(t, "678", arr.Value(2)) - assert.Equal(t, "", arr.Value(3), "trailing empty string value will have offset past end") - } - }, "simple valid case") - - assert.NotPanics(t, func() { - buffers := makeBuffers([]bool{true, false, true, false}, []int32{0, 3, 4, 9, 9}, "oooabcdef") - arr := array.NewStringData(array.NewData(arrow.BinaryTypes.String, 4, buffers, nil, 2, 0)) - if assert.Equal(t, 4, arr.Len()) && assert.Equal(t, 2, arr.NullN()) { - assert.Equal(t, "ooo", arr.Value(0)) - assert.True(t, arr.IsNull(1)) - assert.Equal(t, "bcdef", arr.Value(2)) - assert.True(t, arr.IsNull(3)) - } - }, "simple valid case with nulls") - - assert.PanicsWithValue(t, expectedPanic, func() { - buffers := makeBuffers(nil, []int32{0, 5}, "abc") - array.NewStringData(array.NewData(arrow.BinaryTypes.String, 1, buffers, nil, 0, 0)) - }, "last offset is overflowing") - - assert.PanicsWithError(t, "arrow/array: string offset buffer must have at least 2 values", func() { - buffers := makeBuffers(nil, []int32{0}, "abc") - array.NewStringData(array.NewData(arrow.BinaryTypes.String, 1, buffers, nil, 0, 0)) - }, "last offset is missing") - - assert.PanicsWithValue(t, expectedPanic, func() { - buffers := makeBuffers(nil, []int32{0, 3, 10, 15}, "oooabcdef") - array.NewStringData(array.NewData(arrow.BinaryTypes.String, 1, buffers, nil, 0, 2)) - }, "data has offset and value offset is overflowing") -} - -func TestStringStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - var ( - values = []string{"hello", "世界", "", "bye"} - valid = []bool{true, true, false, true} - ) - - b := array.NewStringBuilder(mem) - defer b.Release() - - b.AppendValues(values, valid) - - arr := b.NewArray().(*array.String) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewStringBuilder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.String) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestLargeStringArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - var ( - want = []string{"hello", "世界", "", "bye"} - valids = []bool{true, true, false, true} - offsets = []int64{0, 5, 11, 11, 14} - ) - - sb := array.NewLargeStringBuilder(mem) - defer sb.Release() - - sb.Retain() - sb.Release() - - sb.AppendValues(want[:2], nil) - - sb.AppendNull() - sb.Append(want[3]) - - if got, want := sb.Len(), len(want); got != want { - t.Fatalf("invalid len: got=%d, want=%d", got, want) - } - - if got, want := sb.NullN(), 1; got != want { - t.Fatalf("invalid nulls: got=%d, want=%d", got, want) - } - - arr := sb.NewLargeStringArray() - defer arr.Release() - - arr.Retain() - arr.Release() - - if got, want := arr.Len(), len(want); got != want { - t.Fatalf("invalid len: got=%d, want=%d", got, want) - } - - if got, want := arr.NullN(), 1; got != want { - t.Fatalf("invalid nulls: got=%d, want=%d", got, want) - } - - for i := range want { - if arr.IsNull(i) != !valids[i] { - t.Fatalf("arr[%d]-validity: got=%v want=%v", i, !arr.IsNull(i), valids[i]) - } - switch { - case arr.IsNull(i): - default: - got := arr.Value(i) - if got != want[i] { - t.Fatalf("arr[%d]: got=%q, want=%q", i, got, want[i]) - } - } - - if got, want := arr.ValueOffset(i), offsets[i]; got != want { - t.Fatalf("arr-offset-beg[%d]: got=%d, want=%d", i, got, want) - } - if got, want := arr.ValueOffset(i+1), offsets[i+1]; got != want { - t.Fatalf("arr-offset-end[%d]: got=%d, want=%d", i+1, got, want) - } - } - - if !reflect.DeepEqual(offsets, arr.ValueOffsets()) { - t.Fatalf("ValueOffsets got=%v, want=%v", arr.ValueOffsets(), offsets) - } - - sub := array.MakeFromData(arr.Data()) - defer sub.Release() - - if sub.DataType().ID() != arrow.LARGE_STRING { - t.Fatalf("invalid type: got=%q, want=large_string", sub.DataType().Name()) - } - - if _, ok := sub.(*array.LargeString); !ok { - t.Fatalf("could not type-assert to array.LargeString") - } - - if got, want := arr.String(), `["hello" "世界" (null) "bye"]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - if !bytes.Equal([]byte(`hello世界bye`), arr.ValueBytes()) { - t.Fatalf("got=%q, want=%q", string(arr.ValueBytes()), `hello世界bye`) - } - - slice := array.NewSliceData(arr.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.LargeString) - if !ok { - t.Fatalf("could not type-assert to array.LargeString") - } - - if got, want := v.String(), `[(null) "bye"]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - if !bytes.Equal(v.ValueBytes(), []byte("bye")) { - t.Fatalf("got=%q, want=%q", string(v.ValueBytes()), "bye") - } - - for i := 0; i < v.Len(); i++ { - if got, want := v.ValueOffset(0), offsets[i+slice.Offset()]; got != want { - t.Fatalf("val-offset-with-offset[%d]: got=%q, want=%q", i, got, want) - } - } - - if !reflect.DeepEqual(offsets[2:5], v.ValueOffsets()) { - t.Fatalf("ValueOffsets got=%v, want=%v", v.ValueOffsets(), offsets[2:5]) - } -} - -func TestLargeStringBuilder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - want := []string{"hello", "世界", "", "bye"} - - ab := array.NewLargeStringBuilder(mem) - defer ab.Release() - - stringValues := func(a *array.LargeString) []string { - vs := make([]string, a.Len()) - for i := range vs { - vs[i] = a.Value(i) - } - return vs - } - - ab.AppendValues([]string{}, nil) - a := ab.NewLargeStringArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewLargeStringArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]string{}, nil) - ab.AppendValues(want, nil) - a = ab.NewLargeStringArray() - assert.Equal(t, want, stringValues(a)) - a.Release() - - ab.AppendValues(want, nil) - ab.AppendValues([]string{}, nil) - a = ab.NewLargeStringArray() - assert.Equal(t, want, stringValues(a)) - a.Release() -} - -// TestStringReset tests the Reset() method on the String type by creating two different Strings and then -// resetting the contents of string2 with the values from string1. -func TestLargeStringReset(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - sb1 := array.NewLargeStringBuilder(mem) - sb2 := array.NewLargeStringBuilder(mem) - defer sb1.Release() - defer sb2.Release() - - sb1.Append("string1") - sb1.AppendNull() - - var ( - string1 = sb1.NewLargeStringArray() - string2 = sb2.NewLargeStringArray() - - string1Data = string1.Data() - ) - string2.Reset(string1Data) - - assert.Equal(t, "string1", string2.Value(0)) -} - -func TestLargeStringInvalidOffsets(t *testing.T) { - const expectedPanic = "arrow/array: string offsets out of bounds of data buffer" - - makeBuffers := func(valids []bool, offsets []int64, data string) []*memory.Buffer { - offsetBuf := memory.NewBufferBytes(arrow.Int64Traits.CastToBytes(offsets)) - var nullBufBytes []byte - var nullBuf *memory.Buffer - if valids != nil { - nullBufBytes = make([]byte, bitutil.BytesForBits(int64(len(valids)))) - for i, v := range valids { - bitutil.SetBitTo(nullBufBytes, i, v) - } - nullBuf = memory.NewBufferBytes(nullBufBytes) - } - return []*memory.Buffer{nullBuf, offsetBuf, memory.NewBufferBytes([]byte(data))} - } - - assert.NotPanics(t, func() { - buffers := makeBuffers(nil, []int64{}, "") - array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 0, buffers, nil, 0, 0)) - }, "empty array with no offsets") - - assert.NotPanics(t, func() { - buffers := makeBuffers(nil, []int64{0, 5}, "") - array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 0, buffers, nil, 0, 0)) - }, "empty array, offsets ignored") - - assert.NotPanics(t, func() { - buffers := makeBuffers(nil, []int64{0, 3, 4, 9}, "oooabcdef") - array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 1, buffers, nil, 0, 2)) - }, "data has offset and value offsets are valid") - - assert.NotPanics(t, func() { - buffers := makeBuffers(nil, []int64{0, 3, 6, 9, 9}, "012345678") - arr := array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 4, buffers, nil, 0, 0)) - if assert.Equal(t, 4, arr.Len()) && assert.Zero(t, arr.NullN()) { - assert.Equal(t, "012", arr.Value(0)) - assert.Equal(t, "345", arr.Value(1)) - assert.Equal(t, "678", arr.Value(2)) - assert.Equal(t, "", arr.Value(3), "trailing empty string value will have offset past end") - } - }, "simple valid case") - - assert.NotPanics(t, func() { - buffers := makeBuffers([]bool{true, false, true, false}, []int64{0, 3, 4, 9, 9}, "oooabcdef") - arr := array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 4, buffers, nil, 2, 0)) - if assert.Equal(t, 4, arr.Len()) && assert.Equal(t, 2, arr.NullN()) { - assert.Equal(t, "ooo", arr.Value(0)) - assert.True(t, arr.IsNull(1)) - assert.Equal(t, "bcdef", arr.Value(2)) - assert.True(t, arr.IsNull(3)) - } - }, "simple valid case with nulls") - - assert.PanicsWithValue(t, expectedPanic, func() { - buffers := makeBuffers(nil, []int64{0, 5}, "abc") - array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 1, buffers, nil, 0, 0)) - }, "last offset is overflowing") - - assert.PanicsWithError(t, "arrow/array: string offset buffer must have at least 2 values", func() { - buffers := makeBuffers(nil, []int64{0}, "abc") - array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 1, buffers, nil, 0, 0)) - }, "last offset is missing") - - assert.PanicsWithValue(t, expectedPanic, func() { - buffers := makeBuffers(nil, []int64{0, 3, 10, 15}, "oooabcdef") - array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 1, buffers, nil, 0, 2)) - }, "data has offset and value offset is overflowing") -} - -func TestLargeStringStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - var ( - values = []string{"hello", "世界", "", "bye"} - valid = []bool{true, true, false, true} - ) - - b := array.NewLargeStringBuilder(mem) - defer b.Release() - - b.AppendValues(values, valid) - - arr := b.NewArray().(*array.LargeString) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewLargeStringBuilder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.LargeString) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestStringValueLen(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} - valids := []bool{true, true, false, false, true, true, true, true, false, true} - - b := array.NewStringBuilder(mem) - defer b.Release() - - b.AppendStringValues(values, valids) - - arr := b.NewArray().(*array.String) - defer arr.Release() - - slice := array.NewSlice(arr, 2, 9).(*array.String) - defer slice.Release() - - vs := values[2:9] - - for i, v := range vs { - assert.Equal(t, len(v), slice.ValueLen(i)) - } -} -func TestStringViewArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - var ( - // only the last string is long enough to not get inlined - want = []string{"hello", "世界", "", "say goodbye daffy"} - valids = []bool{true, true, false, true} - ) - - sb := array.NewStringViewBuilder(mem) - defer sb.Release() - - sb.Retain() - sb.Release() - - assert.NoError(t, sb.AppendValueFromString(want[0])) - sb.AppendValues(want[1:2], nil) - - sb.AppendNull() - sb.Append(want[3]) - - if got, want := sb.Len(), len(want); got != want { - t.Fatalf("invalid len: got=%d, want=%d", got, want) - } - - if got, want := sb.NullN(), 1; got != want { - t.Fatalf("invalid nulls: got=%d, want=%d", got, want) - } - - arr := sb.NewStringViewArray() - defer arr.Release() - - arr.Retain() - arr.Release() - - assert.Equal(t, "hello", arr.ValueStr(0)) - - if got, want := arr.Len(), len(want); got != want { - t.Fatalf("invalid len: got=%d, want=%d", got, want) - } - - if got, want := arr.NullN(), 1; got != want { - t.Fatalf("invalid nulls: got=%d, want=%d", got, want) - } - - for i := range want { - if arr.IsNull(i) != !valids[i] { - t.Fatalf("arr[%d]-validity: got=%v want=%v", i, !arr.IsNull(i), valids[i]) - } - switch { - case arr.IsNull(i): - default: - got := arr.Value(i) - if got != want[i] { - t.Fatalf("arr[%d]: got=%q, want=%q", i, got, want[i]) - } - } - } - - sub := array.MakeFromData(arr.Data()) - defer sub.Release() - - if sub.DataType().ID() != arrow.STRING_VIEW { - t.Fatalf("invalid type: got=%q, want=string view", sub.DataType().Name()) - } - - if _, ok := sub.(*array.StringView); !ok { - t.Fatalf("could not type-assert to array.String") - } - - if got, want := arr.String(), `["hello" "世界" (null) "say goodbye daffy"]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - // only the last string gets stuck into a buffer the rest are inlined - // in the headers. - if !bytes.Equal([]byte(`say goodbye daffy`), arr.Data().Buffers()[2].Bytes()) { - t.Fatalf("got=%q, want=%q", string(arr.Data().Buffers()[2].Bytes()), `say goodbye daffy`) - } - - // check the prefix for the non-inlined value - if [4]byte{'s', 'a', 'y', ' '} != arr.ValueHeader(3).Prefix() { - t.Fatalf("got=%q, want=%q", arr.ValueHeader(3).Prefix(), `say `) - } - - slice := array.NewSliceData(arr.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.StringView) - if !ok { - t.Fatalf("could not type-assert to array.StringView") - } - - if got, want := v.String(), `[(null) "say goodbye daffy"]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - if !bytes.Equal([]byte(`say goodbye daffy`), v.Data().Buffers()[2].Bytes()) { - t.Fatalf("got=%q, want=%q", string(v.Data().Buffers()[2].Bytes()), `say goodbye daffy`) - } - - // check the prefix for the non-inlined value - if [4]byte{'s', 'a', 'y', ' '} != v.ValueHeader(1).Prefix() { - t.Fatalf("got=%q, want=%q", v.ValueHeader(1).Prefix(), `say `) - } -} - -func TestStringViewBuilder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - want := []string{"hello", "世界", "", "say goodbye daffy"} - - ab := array.NewStringViewBuilder(mem) - defer ab.Release() - - stringValues := func(a *array.StringView) []string { - vs := make([]string, a.Len()) - for i := range vs { - vs[i] = a.Value(i) - } - return vs - } - - ab.AppendValues([]string{}, nil) - a := ab.NewStringViewArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewStringViewArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]string{}, nil) - ab.AppendValues(want, nil) - a = ab.NewStringViewArray() - assert.Equal(t, want, stringValues(a)) - a.Release() - - ab.AppendValues(want, nil) - ab.AppendValues([]string{}, nil) - a = ab.NewStringViewArray() - assert.Equal(t, want, stringValues(a)) - a.Release() -} - -// TestStringReset tests the Reset() method on the String type by creating two different Strings and then -// resetting the contents of string2 with the values from string1. -func TestStringViewReset(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - sb1 := array.NewStringViewBuilder(mem) - sb2 := array.NewStringViewBuilder(mem) - defer sb1.Release() - defer sb2.Release() - - sb1.Append("string1") - sb1.AppendNull() - - var ( - string1 = sb1.NewStringViewArray() - string2 = sb2.NewStringViewArray() - - string1Data = string1.Data() - ) - string2.Reset(string1Data) - - assert.Equal(t, "string1", string2.Value(0)) -} diff --git a/go/arrow/array/struct.go b/go/arrow/array/struct.go deleted file mode 100644 index 279ac1d87b25b..0000000000000 --- a/go/arrow/array/struct.go +++ /dev/null @@ -1,491 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "errors" - "fmt" - "strings" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -// Struct represents an ordered sequence of relative types. -type Struct struct { - array - fields []arrow.Array -} - -// NewStructArray constructs a new Struct Array out of the columns passed -// in and the field names. The length of all cols must be the same and -// there should be the same number of columns as names. -func NewStructArray(cols []arrow.Array, names []string) (*Struct, error) { - return NewStructArrayWithNulls(cols, names, nil, 0, 0) -} - -// NewStructArrayWithNulls is like NewStructArray as a convenience function, -// but also takes in a null bitmap, the number of nulls, and an optional offset -// to use for creating the Struct Array. -func NewStructArrayWithNulls(cols []arrow.Array, names []string, nullBitmap *memory.Buffer, nullCount int, offset int) (*Struct, error) { - if len(cols) != len(names) { - return nil, fmt.Errorf("%w: mismatching number of fields and child arrays", arrow.ErrInvalid) - } - if len(cols) == 0 { - return nil, fmt.Errorf("%w: can't infer struct array length with 0 child arrays", arrow.ErrInvalid) - } - length := cols[0].Len() - children := make([]arrow.ArrayData, len(cols)) - fields := make([]arrow.Field, len(cols)) - for i, c := range cols { - if length != c.Len() { - return nil, fmt.Errorf("%w: mismatching child array lengths", arrow.ErrInvalid) - } - children[i] = c.Data() - fields[i].Name = names[i] - fields[i].Type = c.DataType() - fields[i].Nullable = true - } - data := NewData(arrow.StructOf(fields...), length, []*memory.Buffer{nullBitmap}, children, nullCount, offset) - defer data.Release() - return NewStructData(data), nil -} - -// NewStructData returns a new Struct array value from data. -func NewStructData(data arrow.ArrayData) *Struct { - a := &Struct{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *Struct) NumField() int { return len(a.fields) } -func (a *Struct) Field(i int) arrow.Array { return a.fields[i] } - -// ValueStr returns the string representation (as json) of the value at index i. -func (a *Struct) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - - data, err := json.Marshal(a.GetOneForMarshal(i)) - if err != nil { - panic(err) - } - return string(data) -} - -func (a *Struct) String() string { - o := new(strings.Builder) - o.WriteString("{") - - structBitmap := a.NullBitmapBytes() - for i, v := range a.fields { - if i > 0 { - o.WriteString(" ") - } - if arrow.IsUnion(v.DataType().ID()) { - fmt.Fprintf(o, "%v", v) - continue - } else if !bytes.Equal(structBitmap, v.NullBitmapBytes()) { - masked := a.newStructFieldWithParentValidityMask(i) - fmt.Fprintf(o, "%v", masked) - masked.Release() - continue - } - fmt.Fprintf(o, "%v", v) - } - o.WriteString("}") - return o.String() -} - -// newStructFieldWithParentValidityMask returns the Interface at fieldIndex -// with a nullBitmapBytes adjusted according on the parent struct nullBitmapBytes. -// From the docs: -// -// "When reading the struct array the parent validity bitmap takes priority." -func (a *Struct) newStructFieldWithParentValidityMask(fieldIndex int) arrow.Array { - field := a.Field(fieldIndex) - nullBitmapBytes := field.NullBitmapBytes() - maskedNullBitmapBytes := make([]byte, len(nullBitmapBytes)) - copy(maskedNullBitmapBytes, nullBitmapBytes) - for i := 0; i < field.Len(); i++ { - if a.IsNull(i) { - bitutil.ClearBit(maskedNullBitmapBytes, i) - } - } - data := NewSliceData(field.Data(), 0, int64(field.Len())).(*Data) - defer data.Release() - bufs := make([]*memory.Buffer, len(data.Buffers())) - copy(bufs, data.buffers) - bufs[0].Release() - bufs[0] = memory.NewBufferBytes(maskedNullBitmapBytes) - data.buffers = bufs - maskedField := MakeFromData(data) - return maskedField -} - -func (a *Struct) setData(data *Data) { - a.array.setData(data) - a.fields = make([]arrow.Array, len(data.childData)) - for i, child := range data.childData { - if data.offset != 0 || child.Len() != data.length { - sub := NewSliceData(child, int64(data.offset), int64(data.offset+data.length)) - a.fields[i] = MakeFromData(sub) - sub.Release() - } else { - a.fields[i] = MakeFromData(child) - } - } -} - -func (a *Struct) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - tmp := make(map[string]interface{}) - fieldList := a.data.dtype.(*arrow.StructType).Fields() - for j, d := range a.fields { - tmp[fieldList[j].Name] = d.GetOneForMarshal(i) - } - return tmp -} - -func (a *Struct) MarshalJSON() ([]byte, error) { - var buf bytes.Buffer - enc := json.NewEncoder(&buf) - - buf.WriteByte('[') - for i := 0; i < a.Len(); i++ { - if i != 0 { - buf.WriteByte(',') - } - if err := enc.Encode(a.GetOneForMarshal(i)); err != nil { - return nil, err - } - } - buf.WriteByte(']') - return buf.Bytes(), nil -} - -func arrayEqualStruct(left, right *Struct) bool { - for i, lf := range left.fields { - rf := right.fields[i] - if !Equal(lf, rf) { - return false - } - } - return true -} - -func (a *Struct) Retain() { - a.array.Retain() - for _, f := range a.fields { - f.Retain() - } -} - -func (a *Struct) Release() { - a.array.Release() - for _, f := range a.fields { - f.Release() - } -} - -type StructBuilder struct { - builder - - dtype arrow.DataType - fields []Builder -} - -// NewStructBuilder returns a builder, using the provided memory allocator. -func NewStructBuilder(mem memory.Allocator, dtype *arrow.StructType) *StructBuilder { - b := &StructBuilder{ - builder: builder{refCount: 1, mem: mem}, - dtype: dtype, - fields: make([]Builder, dtype.NumFields()), - } - for i, f := range dtype.Fields() { - b.fields[i] = NewBuilder(b.mem, f.Type) - } - return b -} - -func (b *StructBuilder) Type() arrow.DataType { - fields := make([]arrow.Field, len(b.fields)) - copy(fields, b.dtype.(*arrow.StructType).Fields()) - for i, b := range b.fields { - fields[i].Type = b.Type() - } - return arrow.StructOf(fields...) -} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *StructBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - - for _, f := range b.fields { - f.Release() - } - } -} - -func (b *StructBuilder) Append(v bool) { - // Intentionally not calling `Reserve` as it will recursively call - // `Reserve` on the child builders, which during profiling has shown to be - // very expensive due to iterating over children, dynamic dispatch and all - // other code that gets executed even if previously `Reserve` was called to - // preallocate. Not calling `Reserve` has no downsides as when appending to - // the underlying children they already ensure they have enough space - // reserved. The only thing we must do is ensure we have enough space in - // the validity bitmap of the struct builder itself. - b.builder.reserve(1, b.resizeHelper) - b.unsafeAppendBoolToBitmap(v) - if !v { - for _, f := range b.fields { - f.AppendNull() - } - } -} - -func (b *StructBuilder) AppendValues(valids []bool) { - b.Reserve(len(valids)) - b.builder.unsafeAppendBoolsToBitmap(valids, len(valids)) -} - -func (b *StructBuilder) AppendNull() { b.Append(false) } - -func (b *StructBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *StructBuilder) AppendEmptyValue() { - b.Append(true) - for _, f := range b.fields { - f.AppendEmptyValue() - } -} - -func (b *StructBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *StructBuilder) unsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -func (b *StructBuilder) init(capacity int) { - b.builder.init(capacity) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *StructBuilder) Reserve(n int) { - b.builder.reserve(n, b.resizeHelper) - for _, f := range b.fields { - f.Reserve(n) - } -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *StructBuilder) Resize(n int) { - b.resizeHelper(n) - for _, f := range b.fields { - f.Resize(n) - } -} - -func (b *StructBuilder) resizeHelper(n int) { - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(n, b.builder.init) - } -} - -func (b *StructBuilder) NumField() int { return len(b.fields) } -func (b *StructBuilder) FieldBuilder(i int) Builder { return b.fields[i] } - -// NewArray creates a Struct array from the memory buffers used by the builder and resets the StructBuilder -// so it can be used to build a new array. -func (b *StructBuilder) NewArray() arrow.Array { - return b.NewStructArray() -} - -// NewStructArray creates a Struct array from the memory buffers used by the builder and resets the StructBuilder -// so it can be used to build a new array. -func (b *StructBuilder) NewStructArray() (a *Struct) { - data := b.newData() - a = NewStructData(data) - data.Release() - return -} - -func (b *StructBuilder) newData() (data *Data) { - fields := make([]arrow.ArrayData, len(b.fields)) - for i, f := range b.fields { - arr := f.NewArray() - defer arr.Release() - fields[i] = arr.Data() - } - - data = NewData( - b.Type(), b.length, - []*memory.Buffer{ - b.nullBitmap, - }, - fields, - b.nulls, - 0, - ) - b.reset() - - return -} - -func (b *StructBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - - if !strings.HasPrefix(s, "{") && !strings.HasSuffix(s, "}") { - return fmt.Errorf("%w: invalid string for struct should be be of form: {*}", arrow.ErrInvalid) - } - dec := json.NewDecoder(strings.NewReader(s)) - return b.UnmarshalOne(dec) -} - -func (b *StructBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch t { - case json.Delim('{'): - b.Append(true) - keylist := make(map[string]bool) - for dec.More() { - keyTok, err := dec.Token() - if err != nil { - return err - } - - key, ok := keyTok.(string) - if !ok { - return errors.New("missing key") - } - - if keylist[key] { - return fmt.Errorf("key %s is specified twice", key) - } - - keylist[key] = true - - idx, ok := b.dtype.(*arrow.StructType).FieldIdx(key) - if !ok { - var extra interface{} - dec.Decode(&extra) - continue - } - - if err := b.fields[idx].UnmarshalOne(dec); err != nil { - return err - } - } - - // Append null values to all optional fields that were not presented in the json input - for _, field := range b.dtype.(*arrow.StructType).Fields() { - if !field.Nullable { - continue - } - idx, _ := b.dtype.(*arrow.StructType).FieldIdx(field.Name) - if _, hasKey := keylist[field.Name]; !hasKey { - b.fields[idx].AppendNull() - } - } - - // consume '}' - _, err := dec.Token() - return err - case nil: - b.AppendNull() - default: - return &json.UnmarshalTypeError{ - Offset: dec.InputOffset(), - Struct: fmt.Sprint(b.dtype), - } - } - return nil -} - -func (b *StructBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *StructBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("struct builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -var ( - _ arrow.Array = (*Struct)(nil) - _ Builder = (*StructBuilder)(nil) -) diff --git a/go/arrow/array/struct_test.go b/go/arrow/array/struct_test.go deleted file mode 100644 index 4338bbd0b136e..0000000000000 --- a/go/arrow/array/struct_test.go +++ /dev/null @@ -1,532 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "reflect" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestStructArray(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - f1s = []byte{'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'} - f2s = []int32{1, 2, 3, 4} - - f1Lengths = []int{3, 0, 3, 4} - f1Offsets = []int32{0, 3, 3, 6, 10} - f1Valids = []bool{true, false, true, true} - - isValid = []bool{true, true, true, true} - - fields = []arrow.Field{ - {Name: "f1", Type: arrow.ListOf(arrow.PrimitiveTypes.Uint8)}, - {Name: "f2", Type: arrow.PrimitiveTypes.Int32}, - } - dtype = arrow.StructOf(fields...) - ) - - sb := array.NewStructBuilder(pool, dtype) - defer sb.Release() - - for i := 0; i < 10; i++ { - f1b := sb.FieldBuilder(0).(*array.ListBuilder) - f1vb := f1b.ValueBuilder().(*array.Uint8Builder) - f2b := sb.FieldBuilder(1).(*array.Int32Builder) - - if got, want := sb.NumField(), 2; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - sb.Resize(len(f1Lengths)) - f1vb.Resize(len(f1s)) - f2b.Resize(len(f2s)) - - pos := 0 - for i, length := range f1Lengths { - f1b.Append(f1Valids[i]) - for j := 0; j < length; j++ { - f1vb.Append(f1s[pos]) - pos++ - } - f2b.Append(f2s[i]) - } - - for _, valid := range isValid { - sb.Append(valid) - } - - arr := sb.NewArray().(*array.Struct) - defer arr.Release() - - arr.Retain() - arr.Release() - - if got, want := arr.DataType().ID(), arrow.STRUCT; got != want { - t.Fatalf("got=%v, want=%v", got, want) - } - if got, want := arr.Len(), len(isValid); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - for i, valid := range isValid { - if got, want := arr.IsValid(i), valid; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - } - - { - f1arr := arr.Field(0).(*array.List) - if got, want := f1arr.Len(), len(f1Lengths); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - for i := range f1Lengths { - if got, want := f1arr.IsValid(i), f1Valids[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - if got, want := f1arr.IsNull(i), f1Lengths[i] == 0; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - - } - - if got, want := f1arr.Offsets(), f1Offsets; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - varr := f1arr.ListValues().(*array.Uint8) - if got, want := varr.Uint8Values(), f1s; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - } - - { - f2arr := arr.Field(1).(*array.Int32) - if got, want := f2arr.Len(), len(f2s); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := f2arr.Int32Values(), f2s; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%d, want=%d", got, want) - } - } - } -} - -func TestStructStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dt := arrow.StructOf( - arrow.Field{Name: "nullable_bool", Type: new(arrow.BooleanType), Nullable: true}, - arrow.Field{Name: "non_nullable_bool", Type: new(arrow.BooleanType)}, - ) - - builder := array.NewStructBuilder(memory.DefaultAllocator, dt) - nullableBld := builder.FieldBuilder(0).(*array.BooleanBuilder) - nonNullableBld := builder.FieldBuilder(1).(*array.BooleanBuilder) - - builder.Append(true) - nullableBld.Append(true) - nonNullableBld.Append(true) - - builder.Append(true) - nullableBld.AppendNull() - nonNullableBld.Append(true) - - builder.AppendNull() - - arr := builder.NewArray().(*array.Struct) - - // 2. create array via AppendValueFromString - b1 := array.NewStructBuilder(mem, dt) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Struct) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestStructArrayEmpty(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - sb := array.NewStructBuilder(pool, arrow.StructOf()) - defer sb.Release() - - if got, want := sb.NumField(), 0; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - arr := sb.NewArray().(*array.Struct) - - if got, want := arr.Len(), 0; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.NumField(), 0; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } -} - -func TestStructArrayBulkAppend(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - f1s = []byte{'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'} - f2s = []int32{1, 2, 3, 4} - - f1Lengths = []int{3, 0, 3, 4} - f1Offsets = []int32{0, 3, 3, 6, 10} - f1Valids = []bool{true, false, true, true} - - isValid = []bool{true, true, true, true} - - fields = []arrow.Field{ - {Name: "f1", Type: arrow.ListOf(arrow.PrimitiveTypes.Uint8)}, - {Name: "f2", Type: arrow.PrimitiveTypes.Int32}, - } - dtype = arrow.StructOf(fields...) - ) - - sb := array.NewStructBuilder(pool, dtype) - defer sb.Release() - - for i := 0; i < 10; i++ { - f1b := sb.FieldBuilder(0).(*array.ListBuilder) - f1vb := f1b.ValueBuilder().(*array.Uint8Builder) - f2b := sb.FieldBuilder(1).(*array.Int32Builder) - - if got, want := sb.NumField(), 2; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - sb.Resize(len(f1Lengths)) - f1vb.Resize(len(f1s)) - f2b.Resize(len(f2s)) - - sb.AppendValues(isValid) - f1b.AppendValues(f1Offsets, f1Valids) - f1vb.AppendValues(f1s, nil) - f2b.AppendValues(f2s, nil) - - arr := sb.NewArray().(*array.Struct) - defer arr.Release() - - if got, want := arr.DataType().ID(), arrow.STRUCT; got != want { - t.Fatalf("got=%v, want=%v", got, want) - } - if got, want := arr.Len(), len(isValid); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - for i, valid := range isValid { - if got, want := arr.IsValid(i), valid; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - } - - { - f1arr := arr.Field(0).(*array.List) - if got, want := f1arr.Len(), len(f1Lengths); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - for i := range f1Lengths { - if got, want := f1arr.IsValid(i), f1Valids[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - if got, want := f1arr.IsNull(i), f1Lengths[i] == 0; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - - } - - if got, want := f1arr.Offsets(), f1Offsets; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - varr := f1arr.ListValues().(*array.Uint8) - if got, want := varr.Uint8Values(), f1s; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - } - - { - f2arr := arr.Field(1).(*array.Int32) - if got, want := f2arr.Len(), len(f2s); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := f2arr.Int32Values(), f2s; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%d, want=%d", got, want) - } - } - } -} - -func TestStructArrayStringer(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - f1s = []float64{1.1, 1.2, 1.3, 1.4} - f2s = []int32{1, 2, 3, 4} - - fields = []arrow.Field{ - {Name: "f1", Type: arrow.PrimitiveTypes.Float64}, - {Name: "f2", Type: arrow.PrimitiveTypes.Int32}, - } - dtype = arrow.StructOf(fields...) - ) - - sb := array.NewStructBuilder(pool, dtype) - defer sb.Release() - - f1b := sb.FieldBuilder(0).(*array.Float64Builder) - f2b := sb.FieldBuilder(1).(*array.Int32Builder) - - if got, want := sb.NumField(), 2; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - for i := range f1s { - sb.Append(true) - switch i { - case 1: - f1b.AppendNull() - f2b.Append(f2s[i]) - case 2: - f1b.Append(f1s[i]) - f2b.AppendNull() - default: - f1b.Append(f1s[i]) - f2b.Append(f2s[i]) - } - } - assert.NoError(t, sb.AppendValueFromString(`{"f1": 1.1, "f2": 1}`)) - arr := sb.NewArray().(*array.Struct) - defer arr.Release() - - assert.Equal(t, `{"f1":1.1,"f2":1}`, arr.ValueStr(4)) - want := "{[1.1 (null) 1.3 1.4 1.1] [1 2 (null) 4 1]}" - got := arr.String() - if got != want { - t.Fatalf("invalid string representation:\ngot = %q\nwant= %q", got, want) - } -} - -func TestStructArraySlice(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - f1s = []float64{1.1, 1.2, 1.3, 1.4} - f2s = []int32{1, 2, 3, 4} - valids = []bool{true, true, true, true} - - fields = []arrow.Field{ - {Name: "f1", Type: arrow.PrimitiveTypes.Float64}, - {Name: "f2", Type: arrow.PrimitiveTypes.Int32}, - } - dtype = arrow.StructOf(fields...) - ) - - sb := array.NewStructBuilder(pool, dtype) - defer sb.Release() - - f1b := sb.FieldBuilder(0).(*array.Float64Builder) - - f2b := sb.FieldBuilder(1).(*array.Int32Builder) - - if got, want := sb.NumField(), 2; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - for i := range f1s { - sb.Append(valids[i]) - switch i { - case 1: - f1b.AppendNull() - f2b.Append(f2s[i]) - case 2: - f1b.Append(f1s[i]) - f2b.AppendNull() - default: - f1b.Append(f1s[i]) - f2b.Append(f2s[i]) - } - } - - arr := sb.NewArray().(*array.Struct) - defer arr.Release() - - // Slice - arrSlice := array.NewSlice(arr, 2, 4).(*array.Struct) - defer arrSlice.Release() - - want := "{[1.3 1.4] [(null) 4]}" - got := arrSlice.String() - if got != want { - t.Fatalf("invalid string representation:\ngot = %q\nwant= %q", got, want) - } -} - -func TestStructArrayNullBitmap(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - f1s = []float64{1.1, 1.2, 1.3, 1.4} - f2s = []int32{1, 2, 3, 4} - valids = []bool{true, true, true, false} - - fields = []arrow.Field{ - {Name: "f1", Type: arrow.PrimitiveTypes.Float64}, - {Name: "f2", Type: arrow.PrimitiveTypes.Int32}, - } - dtype = arrow.StructOf(fields...) - ) - - sb := array.NewStructBuilder(pool, dtype) - defer sb.Release() - - f1b := sb.FieldBuilder(0).(*array.Float64Builder) - - f2b := sb.FieldBuilder(1).(*array.Int32Builder) - - if got, want := sb.NumField(), 2; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - sb.AppendValues(valids) - for i := range f1s { - f1b.Append(f1s[i]) - switch i { - case 1: - f2b.AppendNull() - default: - f2b.Append(f2s[i]) - } - } - - arr := sb.NewArray().(*array.Struct) - defer arr.Release() - - want := "{[1.1 1.2 1.3 (null)] [1 (null) 3 (null)]}" - got := arr.String() - if got != want { - t.Fatalf("invalid string representation:\ngot = %q\nwant= %q", got, want) - } -} - -func TestStructArrayUnmarshalJSONMissingFields(t *testing.T) { - pool := memory.NewGoAllocator() - - var ( - fields = []arrow.Field{ - {Name: "f1", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - {Name: "f2", Type: arrow.PrimitiveTypes.Int32}, - { - Name: "f3", Type: arrow.StructOf( - []arrow.Field{ - {Name: "f3_1", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "f3_2", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "f3_3", Type: arrow.BinaryTypes.String, Nullable: false}, - }..., - ), - }, - } - dtype = arrow.StructOf(fields...) - ) - - tests := []struct { - name string - jsonInput string - want string - panic bool - }{ - { - name: "missing required field", - jsonInput: `[{"f2": 3, "f3": {"f3_1": "test"}}]`, - panic: true, - want: "", - }, - { - name: "missing optional fields", - jsonInput: `[{"f2": 3, "f3": {"f3_3": "test"}}]`, - panic: false, - want: `{[(null)] [3] {[(null)] [(null)] ["test"]}}`, - }, - } - - for _, tc := range tests { - t.Run( - tc.name, func(t *testing.T) { - - var val bool - - sb := array.NewStructBuilder(pool, dtype) - defer sb.Release() - - if tc.panic { - defer func() { - e := recover() - if e == nil { - t.Fatalf("this should have panicked, but did not; slice value %v", val) - } - if got, want := e.(string), "arrow/array: index out of range"; got != want { - t.Fatalf("invalid error. got=%q, want=%q", got, want) - } - }() - } else { - defer func() { - if e := recover(); e != nil { - t.Fatalf("unexpected panic: %v", e) - } - }() - } - - err := sb.UnmarshalJSON([]byte(tc.jsonInput)) - if err != nil { - t.Fatal(err) - } - - arr := sb.NewArray().(*array.Struct) - defer arr.Release() - - got := arr.String() - if got != tc.want { - t.Fatalf("invalid string representation:\ngot = %q\nwant= %q", got, tc.want) - } - }, - ) - } -} diff --git a/go/arrow/array/table.go b/go/arrow/array/table.go deleted file mode 100644 index 3b742ae78803d..0000000000000 --- a/go/arrow/array/table.go +++ /dev/null @@ -1,421 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "errors" - "fmt" - "math" - "strings" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/internal/debug" -) - -// NewColumnSlice returns a new zero-copy slice of the column with the indicated -// indices i and j, corresponding to the column's array[i:j]. -// The returned column must be Release()'d after use. -// -// NewColSlice panics if the slice is outside the valid range of the column's array. -// NewColSlice panics if j < i. -func NewColumnSlice(col *arrow.Column, i, j int64) *arrow.Column { - slice := NewChunkedSlice(col.Data(), i, j) - defer slice.Release() - return arrow.NewColumn(col.Field(), slice) -} - -// NewChunkedSlice constructs a zero-copy slice of the chunked array with the indicated -// indices i and j, corresponding to array[i:j]. -// The returned chunked array must be Release()'d after use. -// -// NewSlice panics if the slice is outside the valid range of the input array. -// NewSlice panics if j < i. -func NewChunkedSlice(a *arrow.Chunked, i, j int64) *arrow.Chunked { - if j > int64(a.Len()) || i > j || i > int64(a.Len()) { - panic("arrow/array: index out of range") - } - - var ( - cur = 0 - beg = i - sz = j - i - chunks = make([]arrow.Array, 0, len(a.Chunks())) - ) - - for cur < len(a.Chunks()) && beg >= int64(a.Chunks()[cur].Len()) { - beg -= int64(a.Chunks()[cur].Len()) - cur++ - } - - for cur < len(a.Chunks()) && sz > 0 { - arr := a.Chunks()[cur] - end := beg + sz - if end > int64(arr.Len()) { - end = int64(arr.Len()) - } - chunks = append(chunks, NewSlice(arr, beg, end)) - sz -= int64(arr.Len()) - beg - beg = 0 - cur++ - } - chunks = chunks[:len(chunks):len(chunks)] - defer func() { - for _, chunk := range chunks { - chunk.Release() - } - }() - - return arrow.NewChunked(a.DataType(), chunks) -} - -// simpleTable is a basic, non-lazy in-memory table. -type simpleTable struct { - refCount int64 - - rows int64 - cols []arrow.Column - - schema *arrow.Schema -} - -// NewTable returns a new basic, non-lazy in-memory table. -// If rows is negative, the number of rows will be inferred from the height -// of the columns. -// -// NewTable panics if the columns and schema are inconsistent. -// NewTable panics if rows is larger than the height of the columns. -func NewTable(schema *arrow.Schema, cols []arrow.Column, rows int64) arrow.Table { - tbl := simpleTable{ - refCount: 1, - rows: rows, - cols: cols, - schema: schema, - } - - if tbl.rows < 0 { - switch len(tbl.cols) { - case 0: - tbl.rows = 0 - default: - tbl.rows = int64(tbl.cols[0].Len()) - } - } - - // validate the table and its constituents. - // note we retain the columns after having validated the table - // in case the validation fails and panics (and would otherwise leak - // a ref-count on the columns.) - tbl.validate() - - for i := range tbl.cols { - tbl.cols[i].Retain() - } - - return &tbl -} - -// NewTableFromSlice is a convenience function to create a table from a slice -// of slices of arrow.Array. -// -// Like other NewTable functions this can panic if: -// - len(schema.Fields) != len(data) -// - the total length of each column's array slice (ie: number of rows -// in the column) aren't the same for all columns. -func NewTableFromSlice(schema *arrow.Schema, data [][]arrow.Array) arrow.Table { - if len(data) != schema.NumFields() { - panic("array/table: mismatch in number of columns and data for creating a table") - } - - cols := make([]arrow.Column, schema.NumFields()) - for i, arrs := range data { - field := schema.Field(i) - chunked := arrow.NewChunked(field.Type, arrs) - cols[i] = *arrow.NewColumn(field, chunked) - chunked.Release() - } - - tbl := simpleTable{ - refCount: 1, - schema: schema, - cols: cols, - rows: int64(cols[0].Len()), - } - - defer func() { - if r := recover(); r != nil { - // if validate panics, let's release the columns - // so that we don't leak them, then propagate the panic - for _, c := range cols { - c.Release() - } - panic(r) - } - }() - // validate the table and its constituents. - tbl.validate() - - return &tbl -} - -// NewTableFromRecords returns a new basic, non-lazy in-memory table. -// -// NewTableFromRecords panics if the records and schema are inconsistent. -func NewTableFromRecords(schema *arrow.Schema, recs []arrow.Record) arrow.Table { - arrs := make([]arrow.Array, len(recs)) - cols := make([]arrow.Column, schema.NumFields()) - - defer func(cols []arrow.Column) { - for i := range cols { - cols[i].Release() - } - }(cols) - - for i := range cols { - field := schema.Field(i) - for j, rec := range recs { - arrs[j] = rec.Column(i) - } - chunk := arrow.NewChunked(field.Type, arrs) - cols[i] = *arrow.NewColumn(field, chunk) - chunk.Release() - } - - return NewTable(schema, cols, -1) -} - -func (tbl *simpleTable) Schema() *arrow.Schema { return tbl.schema } - -func (tbl *simpleTable) AddColumn(i int, field arrow.Field, column arrow.Column) (arrow.Table, error) { - if int64(column.Len()) != tbl.rows { - return nil, fmt.Errorf("arrow/array: column length mismatch: %d != %d", column.Len(), tbl.rows) - } - if field.Type != column.DataType() { - return nil, fmt.Errorf("arrow/array: column type mismatch: %v != %v", field.Type, column.DataType()) - } - newSchema, err := tbl.schema.AddField(i, field) - if err != nil { - return nil, err - } - cols := make([]arrow.Column, len(tbl.cols)+1) - copy(cols[:i], tbl.cols[:i]) - cols[i] = column - copy(cols[i+1:], tbl.cols[i:]) - newTable := NewTable(newSchema, cols, tbl.rows) - return newTable, nil -} - -func (tbl *simpleTable) NumRows() int64 { return tbl.rows } -func (tbl *simpleTable) NumCols() int64 { return int64(len(tbl.cols)) } -func (tbl *simpleTable) Column(i int) *arrow.Column { return &tbl.cols[i] } - -func (tbl *simpleTable) validate() { - if len(tbl.cols) != tbl.schema.NumFields() { - panic(errors.New("arrow/array: table schema mismatch")) - } - for i, col := range tbl.cols { - if !col.Field().Equal(tbl.schema.Field(i)) { - panic(fmt.Errorf("arrow/array: column field %q is inconsistent with schema", col.Name())) - } - - if int64(col.Len()) < tbl.rows { - panic(fmt.Errorf("arrow/array: column %q expected length >= %d but got length %d", col.Name(), tbl.rows, col.Len())) - } - } -} - -// Retain increases the reference count by 1. -// Retain may be called simultaneously from multiple goroutines. -func (tbl *simpleTable) Retain() { - atomic.AddInt64(&tbl.refCount, 1) -} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -// Release may be called simultaneously from multiple goroutines. -func (tbl *simpleTable) Release() { - debug.Assert(atomic.LoadInt64(&tbl.refCount) > 0, "too many releases") - - if atomic.AddInt64(&tbl.refCount, -1) == 0 { - for i := range tbl.cols { - tbl.cols[i].Release() - } - tbl.cols = nil - } -} - -func (tbl *simpleTable) String() string { - o := new(strings.Builder) - o.WriteString(tbl.Schema().String()) - o.WriteString("\n") - - for i := 0; i < int(tbl.NumCols()); i++ { - col := tbl.Column(i) - o.WriteString(col.Field().Name + ": [") - for j, chunk := range col.Data().Chunks() { - if j != 0 { - o.WriteString(", ") - } - o.WriteString(chunk.String()) - } - o.WriteString("]\n") - } - return o.String() -} - -// TableReader is a Record iterator over a (possibly chunked) Table -type TableReader struct { - refCount int64 - - tbl arrow.Table - cur int64 // current row - max int64 // total number of rows - rec arrow.Record // current Record - chksz int64 // chunk size - - chunks []*arrow.Chunked - slots []int // chunk indices - offsets []int64 // chunk offsets -} - -// NewTableReader returns a new TableReader to iterate over the (possibly chunked) Table. -// if chunkSize is <= 0, the biggest possible chunk will be selected. -func NewTableReader(tbl arrow.Table, chunkSize int64) *TableReader { - ncols := tbl.NumCols() - tr := &TableReader{ - refCount: 1, - tbl: tbl, - cur: 0, - max: int64(tbl.NumRows()), - chksz: chunkSize, - chunks: make([]*arrow.Chunked, ncols), - slots: make([]int, ncols), - offsets: make([]int64, ncols), - } - tr.tbl.Retain() - - if tr.chksz <= 0 { - tr.chksz = math.MaxInt64 - } - - for i := range tr.chunks { - col := tr.tbl.Column(i) - tr.chunks[i] = col.Data() - tr.chunks[i].Retain() - } - return tr -} - -func (tr *TableReader) Schema() *arrow.Schema { return tr.tbl.Schema() } -func (tr *TableReader) Record() arrow.Record { return tr.rec } - -func (tr *TableReader) Next() bool { - if tr.cur >= tr.max { - return false - } - - if tr.rec != nil { - tr.rec.Release() - } - - // determine the minimum contiguous slice across all columns - chunksz := imin64(tr.max, tr.chksz) - chunks := make([]arrow.Array, len(tr.chunks)) - for i := range chunks { - j := tr.slots[i] - chunk := tr.chunks[i].Chunk(j) - remain := int64(chunk.Len()) - tr.offsets[i] - if remain < chunksz { - chunksz = remain - } - - chunks[i] = chunk - } - - // slice the chunks, advance each chunk slot as appropriate. - batch := make([]arrow.Array, len(tr.chunks)) - for i, chunk := range chunks { - var slice arrow.Array - offset := tr.offsets[i] - switch int64(chunk.Len()) - offset { - case chunksz: - tr.slots[i]++ - tr.offsets[i] = 0 - if offset > 0 { - // need to slice - slice = NewSlice(chunk, offset, offset+chunksz) - } else { - // no need to slice - slice = chunk - slice.Retain() - } - default: - tr.offsets[i] += chunksz - slice = NewSlice(chunk, offset, offset+chunksz) - } - batch[i] = slice - } - - tr.cur += chunksz - tr.rec = NewRecord(tr.tbl.Schema(), batch, chunksz) - - for _, arr := range batch { - arr.Release() - } - - return true -} - -// Retain increases the reference count by 1. -// Retain may be called simultaneously from multiple goroutines. -func (tr *TableReader) Retain() { - atomic.AddInt64(&tr.refCount, 1) -} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -// Release may be called simultaneously from multiple goroutines. -func (tr *TableReader) Release() { - debug.Assert(atomic.LoadInt64(&tr.refCount) > 0, "too many releases") - - if atomic.AddInt64(&tr.refCount, -1) == 0 { - tr.tbl.Release() - for _, chk := range tr.chunks { - chk.Release() - } - if tr.rec != nil { - tr.rec.Release() - } - tr.tbl = nil - tr.chunks = nil - tr.slots = nil - tr.offsets = nil - } -} -func (tr *TableReader) Err() error { return nil } - -func imin64(a, b int64) int64 { - if a < b { - return a - } - return b -} - -var ( - _ arrow.Table = (*simpleTable)(nil) - _ RecordReader = (*TableReader)(nil) -) diff --git a/go/arrow/array/table_test.go b/go/arrow/array/table_test.go deleted file mode 100644 index e8357ac3dfb69..0000000000000 --- a/go/arrow/array/table_test.go +++ /dev/null @@ -1,833 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "errors" - "fmt" - "reflect" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" -) - -func TestChunked(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - c1 := arrow.NewChunked(arrow.PrimitiveTypes.Int32, nil) - c1.Retain() - c1.Release() - if got, want := c1.Len(), 0; got != want { - t.Fatalf("len differ. got=%d, want=%d", got, want) - } - if got, want := c1.NullN(), 0; got != want { - t.Fatalf("nulls: got=%d, want=%d", got, want) - } - if got, want := c1.DataType(), arrow.PrimitiveTypes.Int32; got != want { - t.Fatalf("dtype: got=%v, want=%v", got, want) - } - c1.Release() - - fb := array.NewFloat64Builder(mem) - defer fb.Release() - - fb.AppendValues([]float64{1, 2, 3, 4, 5}, nil) - f1 := fb.NewFloat64Array() - defer f1.Release() - - fb.AppendValues([]float64{6, 7}, nil) - f2 := fb.NewFloat64Array() - defer f2.Release() - - fb.AppendValues([]float64{8, 9, 10}, nil) - f3 := fb.NewFloat64Array() - defer f3.Release() - - c2 := arrow.NewChunked( - arrow.PrimitiveTypes.Float64, - []arrow.Array{f1, f2, f3}, - ) - defer c2.Release() - - if got, want := c2.Len(), 10; got != want { - t.Fatalf("len: got=%d, want=%d", got, want) - } - if got, want := c2.NullN(), 0; got != want { - t.Fatalf("nulls: got=%d, want=%d", got, want) - } - if got, want := c2.DataType(), arrow.PrimitiveTypes.Float64; got != want { - t.Fatalf("dtype: got=%v, want=%v", got, want) - } - if got, want := c2.Chunk(0), c2.Chunks()[0]; !reflect.DeepEqual(got, want) { - t.Fatalf("chunk: got=%v, want=%v", got, want) - } - - for _, tc := range []struct { - i, j int64 - len int - nulls int - chunks int - }{ - {i: 0, j: 10, len: 10, nulls: 0, chunks: 3}, - {i: 2, j: 3, len: 1, nulls: 0, chunks: 1}, - {i: 9, j: 10, len: 1, nulls: 0, chunks: 1}, - {i: 0, j: 5, len: 5, nulls: 0, chunks: 1}, - {i: 5, j: 7, len: 2, nulls: 0, chunks: 1}, - {i: 7, j: 10, len: 3, nulls: 0, chunks: 1}, - {i: 10, j: 10, len: 0, nulls: 0, chunks: 0}, - } { - t.Run("", func(t *testing.T) { - sub := array.NewChunkedSlice(c2, tc.i, tc.j) - defer sub.Release() - - if got, want := sub.Len(), tc.len; got != want { - t.Fatalf("len: got=%d, want=%d", got, want) - } - if got, want := sub.NullN(), tc.nulls; got != want { - t.Fatalf("nulls: got=%d, want=%d", got, want) - } - if got, want := sub.DataType(), arrow.PrimitiveTypes.Float64; got != want { - t.Fatalf("dtype: got=%v, want=%v", got, want) - } - if got, want := len(sub.Chunks()), tc.chunks; got != want { - t.Fatalf("chunks: got=%d, want=%d", got, want) - } - }) - } -} - -func TestChunkedEqualDataType(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - lb1 := array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32) - defer lb1.Release() - - v1 := lb1.NewArray() - defer v1.Release() - - lb2 := array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32) - defer lb2.Release() - - v2 := lb2.NewArray() - defer v2.Release() - - c1 := arrow.NewChunked(arrow.ListOf(arrow.PrimitiveTypes.Int32), []arrow.Array{ - v1, v2, - }) - defer c1.Release() -} - -func TestChunkedInvalid(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - fb := array.NewFloat64Builder(mem) - defer fb.Release() - - fb.AppendValues([]float64{1, 2, 3, 4, 5}, nil) - f1 := fb.NewFloat64Array() - defer f1.Release() - - ib := array.NewInt32Builder(mem) - defer ib.Release() - - ib.AppendValues([]int32{6, 7}, nil) - f2 := ib.NewInt32Array() - defer f2.Release() - - defer func() { - e := recover() - if e == nil { - t.Fatalf("expected a panic") - } - - err, ok := e.(error) - if !ok { - t.Fatalf("expected an error") - } - - if !errors.Is(err, arrow.ErrInvalid) { - t.Fatalf("should be an ErrInvalid") - } - - if got, want := err.Error(), fmt.Sprintf("%s: arrow/array: mismatch data type float64 vs int32", arrow.ErrInvalid); got != want { - t.Fatalf("invalid error. got=%q, want=%q", got, want) - } - }() - - c1 := arrow.NewChunked(arrow.PrimitiveTypes.Int32, []arrow.Array{ - f1, f2, - }) - defer c1.Release() -} - -func TestChunkedSliceInvalid(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - fb := array.NewFloat64Builder(mem) - defer fb.Release() - - fb.AppendValues([]float64{1, 2, 3, 4, 5}, nil) - f1 := fb.NewFloat64Array() - defer f1.Release() - - fb.AppendValues([]float64{6, 7}, nil) - f2 := fb.NewFloat64Array() - defer f2.Release() - - fb.AppendValues([]float64{8, 9, 10}, nil) - f3 := fb.NewFloat64Array() - defer f3.Release() - - c := arrow.NewChunked( - arrow.PrimitiveTypes.Float64, - []arrow.Array{f1, f2, f3}, - ) - defer c.Release() - - for _, tc := range []struct { - i, j int64 - }{ - {i: 2, j: 1}, - {i: 10, j: 11}, - {i: 11, j: 11}, - } { - t.Run("", func(t *testing.T) { - defer func() { - e := recover() - if e == nil { - t.Fatalf("expected a panic") - } - if got, want := e.(string), "arrow/array: index out of range"; got != want { - t.Fatalf("invalid error. got=%q, want=%q", got, want) - } - }() - sub := array.NewChunkedSlice(c, tc.i, tc.j) - defer sub.Release() - }) - } -} - -func TestColumn(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - type slice struct { - i, j int64 - len int - nulls int - chunks int - } - - for _, tc := range []struct { - chunk *arrow.Chunked - field arrow.Field - err error - slices []slice - }{ - { - chunk: func() *arrow.Chunked { - ib := array.NewInt32Builder(mem) - defer ib.Release() - - ib.AppendValues([]int32{1, 2, 3}, nil) - i1 := ib.NewInt32Array() - defer i1.Release() - - ib.AppendValues([]int32{4, 5, 6, 7, 8, 9, 10}, nil) - i2 := ib.NewInt32Array() - defer i2.Release() - - c := arrow.NewChunked( - arrow.PrimitiveTypes.Int32, - []arrow.Array{i1, i2}, - ) - return c - }(), - field: arrow.Field{Name: "i32", Type: arrow.PrimitiveTypes.Int32}, - slices: []slice{ - {i: 0, j: 10, len: 10, nulls: 0, chunks: 2}, - {i: 2, j: 3, len: 1, nulls: 0, chunks: 1}, - {i: 9, j: 10, len: 1, nulls: 0, chunks: 1}, - {i: 0, j: 5, len: 5, nulls: 0, chunks: 2}, - {i: 5, j: 7, len: 2, nulls: 0, chunks: 1}, - {i: 7, j: 10, len: 3, nulls: 0, chunks: 1}, - {i: 10, j: 10, len: 0, nulls: 0, chunks: 0}, - }, - }, - { - chunk: func() *arrow.Chunked { - fb := array.NewFloat64Builder(mem) - defer fb.Release() - - fb.AppendValues([]float64{1, 2, 3, 4, 5}, nil) - f1 := fb.NewFloat64Array() - defer f1.Release() - - fb.AppendValues([]float64{6, 7}, nil) - f2 := fb.NewFloat64Array() - defer f2.Release() - - fb.AppendValues([]float64{8, 9, 10}, nil) - f3 := fb.NewFloat64Array() - defer f3.Release() - - c := arrow.NewChunked( - arrow.PrimitiveTypes.Float64, - []arrow.Array{f1, f2, f3}, - ) - return c - }(), - field: arrow.Field{Name: "f64", Type: arrow.PrimitiveTypes.Float64}, - slices: []slice{ - {i: 0, j: 10, len: 10, nulls: 0, chunks: 3}, - {i: 2, j: 3, len: 1, nulls: 0, chunks: 1}, - {i: 9, j: 10, len: 1, nulls: 0, chunks: 1}, - {i: 0, j: 5, len: 5, nulls: 0, chunks: 1}, - {i: 5, j: 7, len: 2, nulls: 0, chunks: 1}, - {i: 7, j: 10, len: 3, nulls: 0, chunks: 1}, - {i: 10, j: 10, len: 0, nulls: 0, chunks: 0}, - }, - }, - { - chunk: func() *arrow.Chunked { - fb := array.NewFloat64Builder(mem) - defer fb.Release() - - fb.AppendValues([]float64{1, 2, 3, 4, 5}, nil) - f1 := fb.NewFloat64Array() - defer f1.Release() - - c := arrow.NewChunked( - arrow.PrimitiveTypes.Float64, - []arrow.Array{f1}, - ) - return c - }(), - field: arrow.Field{Name: "f32", Type: arrow.PrimitiveTypes.Float32}, - err: fmt.Errorf("%w: arrow/array: inconsistent data type float64 vs float32", arrow.ErrInvalid), - }, - } { - t.Run("", func(t *testing.T) { - defer tc.chunk.Release() - - if tc.err != nil { - defer func() { - e := recover() - if e == nil { - t.Fatalf("expected an error %q", tc.err) - } - switch err := e.(type) { - case string: - if err != tc.err.Error() { - t.Fatalf("invalid panic message. got=%q, want=%q", err, tc.err) - } - case error: - if err.Error() != tc.err.Error() { - t.Fatalf("invalid panic message. got=%q, want=%q", err, tc.err) - } - default: - t.Fatalf("invalid type for panic message: %T (err=%v)", err, err) - } - }() - } - - col := arrow.NewColumn(tc.field, tc.chunk) - defer col.Release() - - if got, want := col.Len(), tc.chunk.Len(); got != want { - t.Fatalf("invalid length: got=%d, want=%d", got, want) - } - if got, want := col.NullN(), tc.chunk.NullN(); got != want { - t.Fatalf("invalid nulls: got=%d, want=%d", got, want) - } - if got, want := col.Data(), tc.chunk; got != want { - t.Fatalf("invalid chunked: got=%#v, want=%#v", got, want) - } - if got, want := col.Field(), tc.field; !got.Equal(want) { - t.Fatalf("invalid field: got=%#v, want=%#v", got, want) - } - if got, want := col.Name(), tc.field.Name; got != want { - t.Fatalf("invalid name: got=%q, want=%q", got, want) - } - if got, want := col.DataType(), tc.field.Type; !reflect.DeepEqual(got, want) { - t.Fatalf("invalid data type: got=%#v, want=%#v", got, want) - } - - col.Retain() - col.Release() - - for _, slice := range tc.slices { - t.Run("", func(t *testing.T) { - sub := array.NewColumnSlice(col, slice.i, slice.j) - defer sub.Release() - - if got, want := sub.Len(), slice.len; got != want { - t.Fatalf("len: got=%d, want=%d", got, want) - } - if got, want := sub.NullN(), slice.nulls; got != want { - t.Fatalf("nulls: got=%d, want=%d", got, want) - } - if got, want := sub.DataType(), col.DataType(); got != want { - t.Fatalf("dtype: got=%v, want=%v", got, want) - } - if got, want := len(sub.Data().Chunks()), slice.chunks; got != want { - t.Fatalf("chunks: got=%d, want=%d", got, want) - } - }) - } - }) - } - -} - -func TestTable(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - preSchema := arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - }, - nil, - ) - schema := arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - {Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64}, - }, - nil, - ) - col1 := func() *arrow.Column { - chunk := func() *arrow.Chunked { - ib := array.NewInt32Builder(mem) - defer ib.Release() - - ib.AppendValues([]int32{1, 2, 3}, nil) - i1 := ib.NewInt32Array() - defer i1.Release() - - ib.AppendValues([]int32{4, 5, 6, 7, 8, 9, 10}, nil) - i2 := ib.NewInt32Array() - defer i2.Release() - - c := arrow.NewChunked( - arrow.PrimitiveTypes.Int32, - []arrow.Array{i1, i2}, - ) - return c - }() - defer chunk.Release() - - return arrow.NewColumn(schema.Field(0), chunk) - }() - defer col1.Release() - - col2 := func() *arrow.Column { - chunk := func() *arrow.Chunked { - fb := array.NewFloat64Builder(mem) - defer fb.Release() - - fb.AppendValues([]float64{1, 2, 3, 4, 5}, nil) - f1 := fb.NewFloat64Array() - defer f1.Release() - - fb.AppendValues([]float64{6, 7}, nil) - f2 := fb.NewFloat64Array() - defer f2.Release() - - fb.AppendValues([]float64{8, 9, 10}, nil) - f3 := fb.NewFloat64Array() - defer f3.Release() - - c := arrow.NewChunked( - arrow.PrimitiveTypes.Float64, - []arrow.Array{f1, f2, f3}, - ) - return c - }() - defer chunk.Release() - - return arrow.NewColumn(schema.Field(1), chunk) - }() - defer col2.Release() - - cols := []arrow.Column{*col1, *col2} - - slices := [][]arrow.Array{col1.Data().Chunks(), col2.Data().Chunks()} - - preTbl := array.NewTable(preSchema, []arrow.Column{*col1}, -1) - defer preTbl.Release() - tbl, err := preTbl.AddColumn( - 1, - arrow.Field{Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64}, - *col2, - ) - defer tbl.Release() - if err != nil { - t.Fatalf("could not add column: %+v", err) - } - - tbl2 := array.NewTableFromSlice(schema, slices) - defer tbl2.Release() - - tbl.Retain() - tbl.Release() - - if got, want := tbl.Schema(), schema; !got.Equal(want) { - t.Fatalf("invalid schema: got=%#v, want=%#v", got, want) - } - - if got, want := tbl.NumRows(), int64(10); got != want { - t.Fatalf("invalid number of rows: got=%d, want=%d", got, want) - } - if got, want := tbl.NumCols(), int64(2); got != want { - t.Fatalf("invalid number of columns: got=%d, want=%d", got, want) - } - if got, want := tbl.Column(0).Name(), col1.Name(); got != want { - t.Fatalf("invalid column: got=%q, want=%q", got, want) - } - - if got, want := tbl2.NumRows(), int64(10); got != want { - t.Fatalf("invalid number of rows: got=%d, want=%d", got, want) - } - if got, want := tbl2.NumCols(), int64(2); got != want { - t.Fatalf("invalid number of columns: got=%d, want=%d", got, want) - } - if got, want := tbl2.Column(0).Name(), col1.Name(); got != want { - t.Fatalf("invalid column: got=%q, want=%q", got, want) - } - - for _, tc := range []struct { - schema *arrow.Schema - cols []arrow.Column - rows int64 - err error - }{ - { - schema: schema, - cols: nil, - rows: -1, - err: fmt.Errorf("arrow/array: table schema mismatch"), - }, - { - schema: schema, - cols: cols[:1], - rows: 0, - err: fmt.Errorf("arrow/array: table schema mismatch"), - }, - { - schema: arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - }, - nil, - ), - cols: cols, - rows: 0, - err: fmt.Errorf("arrow/array: table schema mismatch"), - }, - { - schema: arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - {Name: "f2-f64", Type: arrow.PrimitiveTypes.Int32}, - }, - nil, - ), - cols: cols, - rows: 0, - err: fmt.Errorf(`arrow/array: column field "f2-f64" is inconsistent with schema`), - }, - { - schema: arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - {Name: "f2-f32", Type: arrow.PrimitiveTypes.Float64}, - }, - nil, - ), - cols: cols, - rows: 0, - err: fmt.Errorf(`arrow/array: column field "f2-f64" is inconsistent with schema`), - }, - { - schema: schema, - cols: cols, - rows: 11, - err: fmt.Errorf(`arrow/array: column "f1-i32" expected length >= 11 but got length 10`), - }, - { - schema: schema, - cols: cols, - rows: 3, - err: nil, - }, - } { - t.Run("", func(t *testing.T) { - if tc.err != nil { - defer func() { - e := recover() - if e == nil { - t.Fatalf("expected an error %q", tc.err) - } - switch err := e.(type) { - case string: - if err != tc.err.Error() { - t.Fatalf("invalid panic message. got=%q, want=%q", err, tc.err) - } - case error: - if err.Error() != tc.err.Error() { - t.Fatalf("invalid panic message. got=%q, want=%q", err, tc.err) - } - default: - t.Fatalf("invalid type for panic message: %T (err=%v)", err, err) - } - }() - } - tbl := array.NewTable(tc.schema, tc.cols, tc.rows) - defer tbl.Release() - if got, want := tbl.NumRows(), tc.rows; got != want { - t.Fatalf("invalid number of rows: got=%d, want=%d", got, want) - } - }) - } -} - -func TestTableFromRecords(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - schema := arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - {Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64}, - }, - nil, - ) - - b := array.NewRecordBuilder(mem, schema) - defer b.Release() - - b.Field(0).(*array.Int32Builder).AppendValues([]int32{1, 2, 3, 4, 5, 6}, nil) - b.Field(0).(*array.Int32Builder).AppendValues([]int32{7, 8, 9, 10}, []bool{true, true, false, true}) - b.Field(1).(*array.Float64Builder).AppendValues([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil) - - rec1 := b.NewRecord() - defer rec1.Release() - - b.Field(0).(*array.Int32Builder).AppendValues([]int32{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, nil) - b.Field(1).(*array.Float64Builder).AppendValues([]float64{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, nil) - - rec2 := b.NewRecord() - defer rec2.Release() - - tbl := array.NewTableFromRecords(schema, []arrow.Record{rec1, rec2}) - defer tbl.Release() - - if got, want := tbl.Schema(), schema; !got.Equal(want) { - t.Fatalf("invalid schema: got=%#v, want=%#v", got, want) - } - - if got, want := tbl.NumRows(), int64(20); got != want { - t.Fatalf("invalid number of rows: got=%d, want=%d", got, want) - } - if got, want := tbl.NumCols(), int64(2); got != want { - t.Fatalf("invalid number of columns: got=%d, want=%d", got, want) - } - if got, want := tbl.Column(0).Name(), schema.Field(0).Name; got != want { - t.Fatalf("invalid column: got=%q, want=%q", got, want) - } -} - -func TestTableReader(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - schema := arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - {Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64}, - }, - nil, - ) - col1 := func() *arrow.Column { - chunk := func() *arrow.Chunked { - ib := array.NewInt32Builder(mem) - defer ib.Release() - - ib.AppendValues([]int32{1, 2, 3}, nil) - i1 := ib.NewInt32Array() - defer i1.Release() - - ib.AppendValues([]int32{4, 5, 6, 7, 8, 9, 10}, nil) - i2 := ib.NewInt32Array() - defer i2.Release() - - c := arrow.NewChunked( - arrow.PrimitiveTypes.Int32, - []arrow.Array{i1, i2}, - ) - return c - }() - defer chunk.Release() - - return arrow.NewColumn(schema.Field(0), chunk) - }() - defer col1.Release() - - col2 := func() *arrow.Column { - chunk := func() *arrow.Chunked { - fb := array.NewFloat64Builder(mem) - defer fb.Release() - - fb.AppendValues([]float64{1, 2, 3, 4, 5}, nil) - f1 := fb.NewFloat64Array() - defer f1.Release() - - fb.AppendValues([]float64{6, 7}, nil) - f2 := fb.NewFloat64Array() - defer f2.Release() - - fb.AppendValues([]float64{8, 9, 10}, nil) - f3 := fb.NewFloat64Array() - defer f3.Release() - - c := arrow.NewChunked( - arrow.PrimitiveTypes.Float64, - []arrow.Array{f1, f2, f3}, - ) - return c - }() - defer chunk.Release() - - return arrow.NewColumn(schema.Field(1), chunk) - }() - defer col2.Release() - - cols := []arrow.Column{*col1, *col2} - tbl := array.NewTable(schema, cols, -1) - defer tbl.Release() - - tr := array.NewTableReader(tbl, 1) - defer tr.Release() - - tr.Retain() - tr.Release() - - for tr.Next() { - } - if err := tr.Err(); err != nil { - t.Fatalf("tr err: %#v", err) - } - - for _, tc := range []struct { - sz int64 - n int64 - rows []int64 - }{ - {sz: -1, n: 4, rows: []int64{3, 2, 2, 3}}, - {sz: +0, n: 4, rows: []int64{3, 2, 2, 3}}, - {sz: +1, n: 10, rows: []int64{1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}, - {sz: +2, n: 6, rows: []int64{2, 1, 2, 2, 2, 1}}, - } { - t.Run(fmt.Sprintf("chunksz=%d", tc.sz), func(t *testing.T) { - tr := array.NewTableReader(tbl, tc.sz) - defer tr.Release() - - if got, want := tr.Schema(), tbl.Schema(); !got.Equal(want) { - t.Fatalf("invalid schema: got=%#v, want=%#v", got, want) - } - - var ( - n int64 - sum int64 - ) - for tr.Next() { - rec := tr.Record() - if got, want := rec.Schema(), tbl.Schema(); !got.Equal(want) { - t.Fatalf("invalid schema: got=%#v, want=%#v", got, want) - } - if got, want := rec.NumRows(), tc.rows[n]; got != want { - t.Fatalf("invalid number of rows[%d]: got=%d, want=%d", n, got, want) - } - n++ - sum += rec.NumRows() - } - if err := tr.Err(); err != nil { - t.Fatalf("tr err: %#v", err) - } - - if got, want := n, tc.n; got != want { - t.Fatalf("invalid number of iterations: got=%d, want=%d", got, want) - } - if sum != tbl.NumRows() { - t.Fatalf("invalid number of rows iterated over: got=%d, want=%d", sum, tbl.NumRows()) - } - }) - } -} - -func TestTableToString(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - schema := arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - {Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64}, - }, - nil, - ) - - b := array.NewRecordBuilder(mem, schema) - defer b.Release() - - b.Field(0).(*array.Int32Builder).AppendValues([]int32{1, 2, 3, 4, 5, 6}, nil) - b.Field(0).(*array.Int32Builder).AppendValues([]int32{7, 8, 9, 10}, []bool{true, true, false, true}) - b.Field(1).(*array.Float64Builder).AppendValues([]float64{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, nil) - - rec1 := b.NewRecord() - defer rec1.Release() - - b.Field(0).(*array.Int32Builder).AppendValues([]int32{111, 112, 113, 114, 115, 116, 117, 118, 119, 120}, nil) - b.Field(1).(*array.Float64Builder).AppendValues([]float64{211, 212, 213, 214, 215, 216, 217, 218, 219, 220}, nil) - - rec2 := b.NewRecord() - defer rec2.Release() - - tbl := array.NewTableFromRecords(schema, []arrow.Record{rec1, rec2}) - defer tbl.Release() - - table_str := tbl.String() - expected_str := - `schema: - fields: 2 - - f1-i32: type=int32 - - f2-f64: type=float64 -f1-i32: [[1 2 3 4 5 6 7 8 (null) 10], [111 112 113 114 115 116 117 118 119 120]] -f2-f64: [[11 12 13 14 15 16 17 18 19 20], [211 212 213 214 215 216 217 218 219 220]] -` - if got, want := table_str, expected_str; table_str != expected_str { - t.Fatalf("invalid String: got=%#v, want=%#v", got, want) - } -} diff --git a/go/arrow/array/timestamp.go b/go/arrow/array/timestamp.go deleted file mode 100644 index 679d9a5a8a4cc..0000000000000 --- a/go/arrow/array/timestamp.go +++ /dev/null @@ -1,380 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "fmt" - "reflect" - "strings" - "sync/atomic" - "time" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -// Timestamp represents an immutable sequence of arrow.Timestamp values. -type Timestamp struct { - array - values []arrow.Timestamp -} - -// NewTimestampData creates a new Timestamp from Data. -func NewTimestampData(data arrow.ArrayData) *Timestamp { - a := &Timestamp{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Timestamp) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Timestamp) Value(i int) arrow.Timestamp { return a.values[i] } - -// TimestampValues returns the values. -func (a *Timestamp) TimestampValues() []arrow.Timestamp { return a.values } - -// String returns a string representation of the array. -func (a *Timestamp) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Timestamp) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.TimestampTraits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Timestamp) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - - toTime, _ := a.DataType().(*arrow.TimestampType).GetToTimeFunc() - return toTime(a.values[i]).Format("2006-01-02 15:04:05.999999999Z0700") -} - -func (a *Timestamp) GetOneForMarshal(i int) interface{} { - if val := a.ValueStr(i); val != NullValueStr { - return val - } - return nil -} - -func (a *Timestamp) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := range a.values { - vals[i] = a.GetOneForMarshal(i) - } - - return json.Marshal(vals) -} - -func arrayEqualTimestamp(left, right *Timestamp) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -type TimestampBuilder struct { - builder - - dtype *arrow.TimestampType - data *memory.Buffer - rawData []arrow.Timestamp -} - -func NewTimestampBuilder(mem memory.Allocator, dtype *arrow.TimestampType) *TimestampBuilder { - return &TimestampBuilder{builder: builder{refCount: 1, mem: mem}, dtype: dtype} -} - -func (b *TimestampBuilder) Type() arrow.DataType { return b.dtype } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *TimestampBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *TimestampBuilder) AppendTime(t time.Time) { - ts, err := arrow.TimestampFromTime(t, b.dtype.Unit) - if err != nil { - panic(err) - } - b.Append(ts) -} - -func (b *TimestampBuilder) Append(v arrow.Timestamp) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *TimestampBuilder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *TimestampBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *TimestampBuilder) AppendEmptyValue() { - b.Append(0) -} - -func (b *TimestampBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *TimestampBuilder) UnsafeAppend(v arrow.Timestamp) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *TimestampBuilder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *TimestampBuilder) AppendValues(v []arrow.Timestamp, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.TimestampTraits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *TimestampBuilder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.TimestampTraits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.TimestampTraits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *TimestampBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *TimestampBuilder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.TimestampTraits.BytesRequired(n)) - b.rawData = arrow.TimestampTraits.CastFromBytes(b.data.Bytes()) - } -} - -// NewArray creates a Timestamp array from the memory buffers used by the builder and resets the TimestampBuilder -// so it can be used to build a new array. -func (b *TimestampBuilder) NewArray() arrow.Array { - return b.NewTimestampArray() -} - -// NewTimestampArray creates a Timestamp array from the memory buffers used by the builder and resets the TimestampBuilder -// so it can be used to build a new array. -func (b *TimestampBuilder) NewTimestampArray() (a *Timestamp) { - data := b.newData() - a = NewTimestampData(data) - data.Release() - return -} - -func (b *TimestampBuilder) newData() (data *Data) { - bytesRequired := arrow.TimestampTraits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *TimestampBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - - loc, err := b.dtype.GetZone() - if err != nil { - return err - } - - v, _, err := arrow.TimestampFromStringInLocation(s, b.dtype.Unit, loc) - if err != nil { - b.AppendNull() - return err - } - b.Append(v) - return nil -} - -func (b *TimestampBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - case string: - loc, _ := b.dtype.GetZone() - tm, _, err := arrow.TimestampFromStringInLocation(v, b.dtype.Unit, loc) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(arrow.Timestamp(0)), - Offset: dec.InputOffset(), - } - } - - b.Append(tm) - case json.Number: - n, err := v.Int64() - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(arrow.Timestamp(0)), - Offset: dec.InputOffset(), - } - } - b.Append(arrow.Timestamp(n)) - case float64: - b.Append(arrow.Timestamp(v)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(arrow.Timestamp(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *TimestampBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *TimestampBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -var ( - _ arrow.Array = (*Timestamp)(nil) - _ Builder = (*TimestampBuilder)(nil) -) diff --git a/go/arrow/array/timestamp_test.go b/go/arrow/array/timestamp_test.go deleted file mode 100644 index cb9f957d3f255..0000000000000 --- a/go/arrow/array/timestamp_test.go +++ /dev/null @@ -1,300 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "testing" - "time" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestTimestampStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dt := &arrow.TimestampType{Unit: arrow.Second} - b := array.NewTimestampBuilder(mem, dt) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Timestamp) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewTimestampBuilder(mem, dt) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Timestamp) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewTimestampBuilder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - timestamp := time.Now() - dtype := &arrow.TimestampType{Unit: arrow.Second} - ab := array.NewTimestampBuilder(mem, dtype) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - ab.AppendTime(timestamp) - - // check state of builder before NewTimestampArray - assert.Equal(t, 11, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewTimestampArray() - - // check state of builder after NewTimestampArray - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewTimestampArray did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewTimestampArray did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewTimestampArray did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []arrow.Timestamp{1, 2, 3, 0, 5, 6, 0, 8, 9, 10, arrow.Timestamp(timestamp.Unix())}, a.TimestampValues(), "unexpected TimestampValues") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.TimestampValues(), 11, "unexpected length of TimestampValues") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewTimestampArray() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []arrow.Timestamp{7, 8}, a.TimestampValues()) - assert.Len(t, a.TimestampValues(), 2) - - a.Release() - - var ( - want = []arrow.Timestamp{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewTimestampArray() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Timestamp); !ok { - t.Fatalf("could not type-assert to array.Timestamp") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Timestamp) - if !ok { - t.Fatalf("could not type-assert to array.Timestamp") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestTimestampBuilder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.TimestampType{Unit: arrow.Second} - ab := array.NewTimestampBuilder(mem, dtype) - defer ab.Release() - - exp := []arrow.Timestamp{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewTimestampArray() - assert.Equal(t, exp, a.TimestampValues()) - - a.Release() -} - -func TestTimestampBuilder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.TimestampType{Unit: arrow.Second} - ab := array.NewTimestampBuilder(mem, dtype) - defer ab.Release() - - exp := []arrow.Timestamp{0, 1, 2, 3} - - ab.AppendValues([]arrow.Timestamp{}, nil) - a := ab.NewTimestampArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewTimestampArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]arrow.Timestamp{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewTimestampArray() - assert.Equal(t, exp, a.TimestampValues()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]arrow.Timestamp{}, nil) - a = ab.NewTimestampArray() - assert.Equal(t, exp, a.TimestampValues()) - a.Release() -} - -func TestTimestampBuilder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.TimestampType{Unit: arrow.Second} - ab := array.NewTimestampBuilder(mem, dtype) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestTimestampValueStr(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dt := &arrow.TimestampType{Unit: arrow.Second, TimeZone: "America/Phoenix"} - b := array.NewTimestampBuilder(mem, dt) - defer b.Release() - - b.Append(-34226955) - b.Append(1456767743) - - arr := b.NewArray() - defer arr.Release() - - assert.Equal(t, "1968-11-30 13:30:45-0700", arr.ValueStr(0)) - assert.Equal(t, "2016-02-29 10:42:23-0700", arr.ValueStr(1)) -} - -func TestTimestampEquality(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - tsDatatypes := []*arrow.TimestampType{ - {Unit: arrow.Second}, - {Unit: arrow.Second, TimeZone: "UTC"}, - {Unit: arrow.Second, TimeZone: "America/Phoenix"}, - } - - arrs := make([]*array.Timestamp, 0, len(tsDatatypes)) - for _, dt := range tsDatatypes { - bldr := array.NewTimestampBuilder(mem, dt) - defer bldr.Release() - - bldr.Append(-34226955) - bldr.Append(1456767743) - - arr := bldr.NewTimestampArray() - defer arr.Release() - - arrs = append(arrs, arr) - } - - // No timezone, "wall clock" semantics - // These timestamps have no actual timezone, but we still represent as UTC per Go conventions - assert.Equal(t, "1968-11-30 20:30:45Z", arrs[0].ValueStr(0)) - assert.Equal(t, "2016-02-29 17:42:23Z", arrs[0].ValueStr(1)) - - // UTC timezone, "instant" semantics - assert.Equal(t, "1968-11-30 20:30:45Z", arrs[1].ValueStr(0)) - assert.Equal(t, "2016-02-29 17:42:23Z", arrs[1].ValueStr(1)) - - // America/Phoenix timezone, "instant" semantics - assert.Equal(t, "1968-11-30 13:30:45-0700", arrs[2].ValueStr(0)) - assert.Equal(t, "2016-02-29 10:42:23-0700", arrs[2].ValueStr(1)) - - // Despite timezone and semantics, the physical values are equivalent - assert.Equal(t, arrs[0].Value(0), arrs[1].Value(0)) - assert.Equal(t, arrs[0].Value(0), arrs[2].Value(0)) - assert.Equal(t, arrs[1].Value(0), arrs[2].Value(0)) - - assert.Equal(t, arrs[0].Value(1), arrs[1].Value(1)) - assert.Equal(t, arrs[0].Value(1), arrs[2].Value(1)) - assert.Equal(t, arrs[1].Value(1), arrs[2].Value(1)) -} diff --git a/go/arrow/array/union.go b/go/arrow/array/union.go deleted file mode 100644 index 5d2a8b8ecb2f0..0000000000000 --- a/go/arrow/array/union.go +++ /dev/null @@ -1,1370 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "errors" - "fmt" - "math" - "reflect" - "strings" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/bitutils" - "github.com/apache/arrow/go/v18/internal/json" -) - -// Union is a convenience interface to encompass both Sparse and Dense -// union array types. -type Union interface { - arrow.Array - // NumFields returns the number of child fields in this union. - // Equivalent to len(UnionType().Fields()) - NumFields() int - // Validate returns an error if there are any issues with the lengths - // or types of the children arrays mismatching with the Type of the - // Union Array. nil is returned if there are no problems. - Validate() error - // ValidateFull runs the same checks that Validate() does, but additionally - // checks that all childIDs are valid (>= 0 || ==InvalidID) and for - // dense unions validates that all offsets are within the bounds of their - // respective child. - ValidateFull() error - // TypeCodes returns the type id buffer for the union Array, equivalent to - // Data().Buffers()[1]. Note: This will not account for any slice offset. - TypeCodes() *memory.Buffer - // RawTypeCodes returns a slice of UnionTypeCodes properly accounting for - // any slice offset. - RawTypeCodes() []arrow.UnionTypeCode - // TypeCode returns the logical type code of the value at the requested index - TypeCode(i int) arrow.UnionTypeCode - // ChildID returns the index of the physical child containing the value - // at the requested index. Equivalent to: - // - // arr.UnionType().ChildIDs()[arr.RawTypeCodes()[i+arr.Data().Offset()]] - ChildID(i int) int - // UnionType is a convenience function to retrieve the properly typed UnionType - // instead of having to call DataType() and manually assert the type. - UnionType() arrow.UnionType - // Mode returns the union mode of the underlying Array, either arrow.SparseMode - // or arrow.DenseMode. - Mode() arrow.UnionMode - // Field returns the requested child array for this union. Returns nil if a - // nonexistent position is passed in. - // - // The appropriate child for an index can be retrieved with Field(ChildID(index)) - Field(pos int) arrow.Array -} - -const kMaxElems = math.MaxInt32 - -type union struct { - array - - unionType arrow.UnionType - typecodes []arrow.UnionTypeCode - - children []arrow.Array -} - -func (a *union) Retain() { - a.array.Retain() - for _, c := range a.children { - c.Retain() - } -} - -func (a *union) Release() { - a.array.Release() - for _, c := range a.children { - c.Release() - } -} - -func (a *union) NumFields() int { return len(a.unionType.Fields()) } - -func (a *union) Mode() arrow.UnionMode { return a.unionType.Mode() } - -func (a *union) UnionType() arrow.UnionType { return a.unionType } - -func (a *union) TypeCodes() *memory.Buffer { - return a.data.buffers[1] -} - -func (a *union) RawTypeCodes() []arrow.UnionTypeCode { - if a.data.length > 0 { - return a.typecodes[a.data.offset:] - } - return []arrow.UnionTypeCode{} -} - -func (a *union) TypeCode(i int) arrow.UnionTypeCode { - return a.typecodes[i+a.data.offset] -} - -func (a *union) ChildID(i int) int { - return a.unionType.ChildIDs()[a.typecodes[i+a.data.offset]] -} - -func (a *union) setData(data *Data) { - a.unionType = data.dtype.(arrow.UnionType) - debug.Assert(len(data.buffers) >= 2, "arrow/array: invalid number of union array buffers") - - if data.length > 0 { - a.typecodes = arrow.Int8Traits.CastFromBytes(data.buffers[1].Bytes()) - } else { - a.typecodes = []int8{} - } - a.children = make([]arrow.Array, len(data.childData)) - for i, child := range data.childData { - if a.unionType.Mode() == arrow.SparseMode && (data.offset != 0 || child.Len() != data.length) { - child = NewSliceData(child, int64(data.offset), int64(data.offset+data.length)) - defer child.Release() - } - a.children[i] = MakeFromData(child) - } - a.array.setData(data) -} - -func (a *union) Field(pos int) (result arrow.Array) { - if pos < 0 || pos >= len(a.children) { - return nil - } - - return a.children[pos] -} - -func (a *union) Validate() error { - fields := a.unionType.Fields() - for i, f := range fields { - fieldData := a.data.childData[i] - if a.unionType.Mode() == arrow.SparseMode && fieldData.Len() < a.data.length+a.data.offset { - return fmt.Errorf("arrow/array: sparse union child array #%d has length smaller than expected for union array (%d < %d)", - i, fieldData.Len(), a.data.length+a.data.offset) - } - - if !arrow.TypeEqual(f.Type, fieldData.DataType()) { - return fmt.Errorf("arrow/array: union child array #%d does not match type field %s vs %s", - i, fieldData.DataType(), f.Type) - } - } - return nil -} - -func (a *union) ValidateFull() error { - if err := a.Validate(); err != nil { - return err - } - - childIDs := a.unionType.ChildIDs() - codesMap := a.unionType.TypeCodes() - codes := a.RawTypeCodes() - - for i := 0; i < a.data.length; i++ { - code := codes[i] - if code < 0 || childIDs[code] == arrow.InvalidUnionChildID { - return fmt.Errorf("arrow/array: union value at position %d has invalid type id %d", i, code) - } - } - - if a.unionType.Mode() == arrow.DenseMode { - // validate offsets - - // map logical typeid to child length - var childLengths [256]int64 - for i := range a.unionType.Fields() { - childLengths[codesMap[i]] = int64(a.data.childData[i].Len()) - } - - // check offsets are in bounds - var lastOffsets [256]int64 - offsets := arrow.Int32Traits.CastFromBytes(a.data.buffers[2].Bytes())[a.data.offset:] - for i := int64(0); i < int64(a.data.length); i++ { - code := codes[i] - offset := offsets[i] - switch { - case offset < 0: - return fmt.Errorf("arrow/array: union value at position %d has negative offset %d", i, offset) - case offset >= int32(childLengths[code]): - return fmt.Errorf("arrow/array: union value at position %d has offset larger than child length (%d >= %d)", - i, offset, childLengths[code]) - case offset < int32(lastOffsets[code]): - return fmt.Errorf("arrow/array: union value at position %d has non-monotonic offset %d", i, offset) - } - lastOffsets[code] = int64(offset) - } - } - - return nil -} - -// SparseUnion represents an array where each logical value is taken from -// a single child. A buffer of 8-bit type ids indicates which child a given -// logical value is to be taken from. This is represented as the ChildID, -// which is the index into the list of children. -// -// In a sparse union, each child array will have the same length as the -// union array itself, regardless of how many values in the union actually -// refer to it. -// -// Unlike most other arrays, unions do not have a top-level validity bitmap. -type SparseUnion struct { - union -} - -// NewSparseUnion constructs a union array using the given type, length, list of -// children and buffer of typeIDs with the given offset. -func NewSparseUnion(dt *arrow.SparseUnionType, length int, children []arrow.Array, typeIDs *memory.Buffer, offset int) *SparseUnion { - childData := make([]arrow.ArrayData, len(children)) - for i, c := range children { - childData[i] = c.Data() - } - data := NewData(dt, length, []*memory.Buffer{nil, typeIDs}, childData, 0, offset) - defer data.Release() - return NewSparseUnionData(data) -} - -// NewSparseUnionData constructs a SparseUnion array from the given ArrayData object. -func NewSparseUnionData(data arrow.ArrayData) *SparseUnion { - a := &SparseUnion{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// NewSparseUnionFromArrays constructs a new SparseUnion array with the provided -// values. -// -// typeIDs *must* be an INT8 array with no nulls -// len(codes) *must* be either 0 or equal to len(children). If len(codes) is 0, -// the type codes used will be sequentially numeric starting at 0. -func NewSparseUnionFromArrays(typeIDs arrow.Array, children []arrow.Array, codes ...arrow.UnionTypeCode) (*SparseUnion, error) { - return NewSparseUnionFromArraysWithFieldCodes(typeIDs, children, []string{}, codes) -} - -// NewSparseUnionFromArrayWithFields constructs a new SparseUnion array like -// NewSparseUnionFromArrays, but allows specifying the field names. Type codes -// will be auto-generated sequentially starting at 0. -// -// typeIDs *must* be an INT8 array with no nulls. -// len(fields) *must* either be 0 or equal to len(children). If len(fields) is 0, -// then the fields will be named sequentially starting at "0". -func NewSparseUnionFromArraysWithFields(typeIDs arrow.Array, children []arrow.Array, fields []string) (*SparseUnion, error) { - return NewSparseUnionFromArraysWithFieldCodes(typeIDs, children, fields, []arrow.UnionTypeCode{}) -} - -// NewSparseUnionFromArraysWithFieldCodes combines the other constructors -// for constructing a new SparseUnion array with the provided field names -// and type codes, along with children and type ids. -// -// All the requirements mentioned in NewSparseUnionFromArrays and -// NewSparseUnionFromArraysWithFields apply. -func NewSparseUnionFromArraysWithFieldCodes(typeIDs arrow.Array, children []arrow.Array, fields []string, codes []arrow.UnionTypeCode) (*SparseUnion, error) { - switch { - case typeIDs.DataType().ID() != arrow.INT8: - return nil, errors.New("arrow/array: union array type ids must be signed int8") - case typeIDs.NullN() != 0: - return nil, errors.New("arrow/array: union type ids may not have nulls") - case len(fields) > 0 && len(fields) != len(children): - return nil, errors.New("arrow/array: field names must have the same length as children") - case len(codes) > 0 && len(codes) != len(children): - return nil, errors.New("arrow/array: type codes must have same length as children") - } - - buffers := []*memory.Buffer{nil, typeIDs.Data().Buffers()[1]} - ty := arrow.SparseUnionFromArrays(children, fields, codes) - - childData := make([]arrow.ArrayData, len(children)) - for i, c := range children { - childData[i] = c.Data() - if c.Len() != typeIDs.Len() { - return nil, errors.New("arrow/array: sparse union array must have len(child) == len(typeids) for all children") - } - } - - data := NewData(ty, typeIDs.Len(), buffers, childData, 0, typeIDs.Data().Offset()) - defer data.Release() - return NewSparseUnionData(data), nil -} - -func (a *SparseUnion) setData(data *Data) { - a.union.setData(data) - debug.Assert(a.data.dtype.ID() == arrow.SPARSE_UNION, "arrow/array: invalid data type for SparseUnion") - debug.Assert(len(a.data.buffers) == 2, "arrow/array: sparse unions should have exactly 2 buffers") - debug.Assert(a.data.buffers[0] == nil, "arrow/array: validity bitmap for sparse unions should be nil") -} - -func (a *SparseUnion) GetOneForMarshal(i int) interface{} { - typeID := a.RawTypeCodes()[i] - - childID := a.ChildID(i) - data := a.Field(childID) - - if data.IsNull(i) { - return nil - } - - return []interface{}{typeID, data.GetOneForMarshal(i)} -} - -func (a *SparseUnion) MarshalJSON() ([]byte, error) { - var buf bytes.Buffer - enc := json.NewEncoder(&buf) - - buf.WriteByte('[') - for i := 0; i < a.Len(); i++ { - if i != 0 { - buf.WriteByte(',') - } - if err := enc.Encode(a.GetOneForMarshal(i)); err != nil { - return nil, err - } - } - buf.WriteByte(']') - return buf.Bytes(), nil -} - -func (a *SparseUnion) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - - val := a.GetOneForMarshal(i) - if val == nil { - // child is nil - return NullValueStr - } - - data, err := json.Marshal(val) - if err != nil { - panic(err) - } - return string(data) -} - -func (a *SparseUnion) String() string { - var b strings.Builder - b.WriteByte('[') - - fieldList := a.unionType.Fields() - for i := 0; i < a.Len(); i++ { - if i > 0 { - b.WriteString(" ") - } - - field := fieldList[a.ChildID(i)] - f := a.Field(a.ChildID(i)) - fmt.Fprintf(&b, "{%s=%v}", field.Name, f.GetOneForMarshal(i)) - } - b.WriteByte(']') - return b.String() -} - -// GetFlattenedField returns a child array, adjusting its validity bitmap -// where the union array type codes don't match. -// -// ie: the returned array will have a null in every index that it is -// not referenced by union. -func (a *SparseUnion) GetFlattenedField(mem memory.Allocator, index int) (arrow.Array, error) { - if index < 0 || index >= a.NumFields() { - return nil, fmt.Errorf("arrow/array: index out of range: %d", index) - } - - childData := a.data.childData[index] - if a.data.offset != 0 || a.data.length != childData.Len() { - childData = NewSliceData(childData, int64(a.data.offset), int64(a.data.offset+a.data.length)) - // NewSliceData doesn't break the slice reference for buffers - // since we're going to replace the null bitmap buffer we need to break the - // slice reference so that we don't affect a.children's references - newBufs := make([]*memory.Buffer, len(childData.Buffers())) - copy(newBufs, childData.(*Data).buffers) - childData.(*Data).buffers = newBufs - } else { - childData = childData.(*Data).Copy() - } - defer childData.Release() - - // synthesize a null bitmap based on the union discriminant - // make sure the bitmap has extra bits corresponding to the child's offset - flattenedNullBitmap := memory.NewResizableBuffer(mem) - flattenedNullBitmap.Resize(childData.Len() + childData.Offset()) - - var ( - childNullBitmap = childData.Buffers()[0] - childOffset = childData.Offset() - typeCode = a.unionType.TypeCodes()[index] - codes = a.RawTypeCodes() - offset int64 = 0 - ) - bitutils.GenerateBitsUnrolled(flattenedNullBitmap.Bytes(), int64(childOffset), int64(a.data.length), - func() bool { - b := codes[offset] == typeCode - offset++ - return b - }) - - if childNullBitmap != nil { - defer childNullBitmap.Release() - bitutil.BitmapAnd(flattenedNullBitmap.Bytes(), childNullBitmap.Bytes(), - int64(childOffset), int64(childOffset), flattenedNullBitmap.Bytes(), - int64(childOffset), int64(childData.Len())) - } - childData.(*Data).buffers[0] = flattenedNullBitmap - childData.(*Data).nulls = childData.Len() - bitutil.CountSetBits(flattenedNullBitmap.Bytes(), childOffset, childData.Len()) - return MakeFromData(childData), nil -} - -func arraySparseUnionEqual(l, r *SparseUnion) bool { - childIDs := l.unionType.ChildIDs() - leftCodes, rightCodes := l.RawTypeCodes(), r.RawTypeCodes() - - for i := 0; i < l.data.length; i++ { - typeID := leftCodes[i] - if typeID != rightCodes[i] { - return false - } - - childNum := childIDs[typeID] - eq := SliceEqual(l.children[childNum], int64(i), int64(i+1), - r.children[childNum], int64(i), int64(i+1)) - if !eq { - return false - } - } - return true -} - -func arraySparseUnionApproxEqual(l, r *SparseUnion, opt equalOption) bool { - childIDs := l.unionType.ChildIDs() - leftCodes, rightCodes := l.RawTypeCodes(), r.RawTypeCodes() - - for i := 0; i < l.data.length; i++ { - typeID := leftCodes[i] - if typeID != rightCodes[i] { - return false - } - - childNum := childIDs[typeID] - eq := sliceApproxEqual(l.children[childNum], int64(i+l.data.offset), int64(i+l.data.offset+1), - r.children[childNum], int64(i+r.data.offset), int64(i+r.data.offset+1), opt) - if !eq { - return false - } - } - return true -} - -// DenseUnion represents an array where each logical value is taken from -// a single child, at a specific offset. A buffer of 8-bit type ids -// indicates which child a given logical value is to be taken from and -// a buffer of 32-bit offsets indicating which physical position in the -// given child array has the logical value for that index. -// -// Unlike a sparse union, a dense union allows encoding only the child values -// which are actually referred to by the union array. This is counterbalanced -// by the additional footprint of the offsets buffer, and the additional -// indirection cost when looking up values. -// -// Unlike most other arrays, unions do not have a top-level validity bitmap. -type DenseUnion struct { - union - offsets []int32 -} - -// NewDenseUnion constructs a union array using the given type, length, list of -// children and buffers of typeIDs and offsets, with the given array offset. -func NewDenseUnion(dt *arrow.DenseUnionType, length int, children []arrow.Array, typeIDs, valueOffsets *memory.Buffer, offset int) *DenseUnion { - childData := make([]arrow.ArrayData, len(children)) - for i, c := range children { - childData[i] = c.Data() - } - - data := NewData(dt, length, []*memory.Buffer{nil, typeIDs, valueOffsets}, childData, 0, offset) - defer data.Release() - return NewDenseUnionData(data) -} - -// NewDenseUnionData constructs a DenseUnion array from the given ArrayData object. -func NewDenseUnionData(data arrow.ArrayData) *DenseUnion { - a := &DenseUnion{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// NewDenseUnionFromArrays constructs a new DenseUnion array with the provided -// values. -// -// typeIDs *must* be an INT8 array with no nulls -// offsets *must* be an INT32 array with no nulls -// len(codes) *must* be either 0 or equal to len(children). If len(codes) is 0, -// the type codes used will be sequentially numeric starting at 0. -func NewDenseUnionFromArrays(typeIDs, offsets arrow.Array, children []arrow.Array, codes ...arrow.UnionTypeCode) (*DenseUnion, error) { - return NewDenseUnionFromArraysWithFieldCodes(typeIDs, offsets, children, []string{}, codes) -} - -// NewDenseUnionFromArrayWithFields constructs a new DenseUnion array like -// NewDenseUnionFromArrays, but allows specifying the field names. Type codes -// will be auto-generated sequentially starting at 0. -// -// typeIDs *must* be an INT8 array with no nulls. -// offsets *must* be an INT32 array with no nulls. -// len(fields) *must* either be 0 or equal to len(children). If len(fields) is 0, -// then the fields will be named sequentially starting at "0". -func NewDenseUnionFromArraysWithFields(typeIDs, offsets arrow.Array, children []arrow.Array, fields []string) (*DenseUnion, error) { - return NewDenseUnionFromArraysWithFieldCodes(typeIDs, offsets, children, fields, []arrow.UnionTypeCode{}) -} - -// NewDenseUnionFromArraysWithFieldCodes combines the other constructors -// for constructing a new DenseUnion array with the provided field names -// and type codes, along with children and type ids. -// -// All the requirements mentioned in NewDenseUnionFromArrays and -// NewDenseUnionFromArraysWithFields apply. -func NewDenseUnionFromArraysWithFieldCodes(typeIDs, offsets arrow.Array, children []arrow.Array, fields []string, codes []arrow.UnionTypeCode) (*DenseUnion, error) { - switch { - case offsets.DataType().ID() != arrow.INT32: - return nil, errors.New("arrow/array: union offsets must be signed int32") - case typeIDs.DataType().ID() != arrow.INT8: - return nil, errors.New("arrow/array: union type_ids must be signed int8") - case typeIDs.NullN() != 0: - return nil, errors.New("arrow/array: union typeIDs may not have nulls") - case offsets.NullN() != 0: - return nil, errors.New("arrow/array: nulls are not allowed in offsets for NewDenseUnionFromArrays*") - case len(fields) > 0 && len(fields) != len(children): - return nil, errors.New("arrow/array: fields must be the same length as children") - case len(codes) > 0 && len(codes) != len(children): - return nil, errors.New("arrow/array: typecodes must have the same length as children") - } - - ty := arrow.DenseUnionFromArrays(children, fields, codes) - buffers := []*memory.Buffer{nil, typeIDs.Data().Buffers()[1], offsets.Data().Buffers()[1]} - - childData := make([]arrow.ArrayData, len(children)) - for i, c := range children { - childData[i] = c.Data() - } - - data := NewData(ty, typeIDs.Len(), buffers, childData, 0, typeIDs.Data().Offset()) - defer data.Release() - return NewDenseUnionData(data), nil -} - -func (a *DenseUnion) ValueOffsets() *memory.Buffer { return a.data.buffers[2] } - -func (a *DenseUnion) ValueOffset(i int) int32 { return a.offsets[i+a.data.offset] } - -func (a *DenseUnion) RawValueOffsets() []int32 { return a.offsets[a.data.offset:] } - -func (a *DenseUnion) setData(data *Data) { - a.union.setData(data) - debug.Assert(a.data.dtype.ID() == arrow.DENSE_UNION, "arrow/array: invalid data type for DenseUnion") - debug.Assert(len(a.data.buffers) == 3, "arrow/array: dense unions should have exactly 3 buffers") - debug.Assert(a.data.buffers[0] == nil, "arrow/array: validity bitmap for dense unions should be nil") - - if data.length > 0 { - a.offsets = arrow.Int32Traits.CastFromBytes(a.data.buffers[2].Bytes()) - } else { - a.offsets = []int32{} - } -} - -func (a *DenseUnion) GetOneForMarshal(i int) interface{} { - typeID := a.RawTypeCodes()[i] - - childID := a.ChildID(i) - data := a.Field(childID) - - offset := int(a.RawValueOffsets()[i]) - if data.IsNull(offset) { - return nil - } - - return []interface{}{typeID, data.GetOneForMarshal(offset)} -} - -func (a *DenseUnion) MarshalJSON() ([]byte, error) { - var buf bytes.Buffer - enc := json.NewEncoder(&buf) - - buf.WriteByte('[') - for i := 0; i < a.Len(); i++ { - if i != 0 { - buf.WriteByte(',') - } - if err := enc.Encode(a.GetOneForMarshal(i)); err != nil { - return nil, err - } - } - buf.WriteByte(']') - return buf.Bytes(), nil -} - -func (a *DenseUnion) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - - val := a.GetOneForMarshal(i) - if val == nil { - // child in nil - return NullValueStr - } - - data, err := json.Marshal(val) - if err != nil { - panic(err) - } - return string(data) -} - -func (a *DenseUnion) String() string { - var b strings.Builder - b.WriteByte('[') - - offsets := a.RawValueOffsets() - - fieldList := a.unionType.Fields() - for i := 0; i < a.Len(); i++ { - if i > 0 { - b.WriteString(" ") - } - - field := fieldList[a.ChildID(i)] - f := a.Field(a.ChildID(i)) - fmt.Fprintf(&b, "{%s=%v}", field.Name, f.GetOneForMarshal(int(offsets[i]))) - } - b.WriteByte(']') - return b.String() -} - -func arrayDenseUnionEqual(l, r *DenseUnion) bool { - childIDs := l.unionType.ChildIDs() - leftCodes, rightCodes := l.RawTypeCodes(), r.RawTypeCodes() - leftOffsets, rightOffsets := l.RawValueOffsets(), r.RawValueOffsets() - - for i := 0; i < l.data.length; i++ { - typeID := leftCodes[i] - if typeID != rightCodes[i] { - return false - } - - childNum := childIDs[typeID] - eq := SliceEqual(l.children[childNum], int64(leftOffsets[i]), int64(leftOffsets[i]+1), - r.children[childNum], int64(rightOffsets[i]), int64(rightOffsets[i]+1)) - if !eq { - return false - } - } - return true -} - -func arrayDenseUnionApproxEqual(l, r *DenseUnion, opt equalOption) bool { - childIDs := l.unionType.ChildIDs() - leftCodes, rightCodes := l.RawTypeCodes(), r.RawTypeCodes() - leftOffsets, rightOffsets := l.RawValueOffsets(), r.RawValueOffsets() - - for i := 0; i < l.data.length; i++ { - typeID := leftCodes[i] - if typeID != rightCodes[i] { - return false - } - - childNum := childIDs[typeID] - eq := sliceApproxEqual(l.children[childNum], int64(leftOffsets[i]), int64(leftOffsets[i]+1), - r.children[childNum], int64(rightOffsets[i]), int64(rightOffsets[i]+1), opt) - if !eq { - return false - } - } - return true -} - -// UnionBuilder is a convenience interface for building Union arrays of -// either Dense or Sparse mode. -type UnionBuilder interface { - Builder - // AppendChild allows constructing the union type on the fly by making new - // new array builder available to the union builder. The type code (index) - // of the new child is returned, which should be passed to the Append method - // when adding a new element to the union array. - AppendChild(newChild Builder, fieldName string) (newCode arrow.UnionTypeCode) - // Append adds an element to the UnionArray indicating which typecode the - // new element should use. This *must* be followed up by an append to the - // appropriate child builder. - Append(arrow.UnionTypeCode) - // Mode returns what kind of Union is being built, either arrow.SparseMode - // or arrow.DenseMode - Mode() arrow.UnionMode - // Child returns the builder for the requested child index. - // If an invalid index is requested (e.g. <0 or >len(children)) - // then this will panic. - Child(idx int) Builder -} - -type unionBuilder struct { - builder - - childFields []arrow.Field - codes []arrow.UnionTypeCode - mode arrow.UnionMode - - children []Builder - typeIDtoBuilder []Builder - typeIDtoChildID []int - // for all typeID < denseTypeID, typeIDtoBuilder[typeID] != nil - denseTypeID arrow.UnionTypeCode - typesBuilder *int8BufferBuilder -} - -func newUnionBuilder(mem memory.Allocator, children []Builder, typ arrow.UnionType) unionBuilder { - if children == nil { - children = make([]Builder, 0) - } - b := unionBuilder{ - builder: builder{refCount: 1, mem: mem}, - mode: typ.Mode(), - codes: typ.TypeCodes(), - children: children, - typeIDtoChildID: make([]int, int(typ.MaxTypeCode())+1), // convert to int as int8(127) +1 panics - typeIDtoBuilder: make([]Builder, int(typ.MaxTypeCode())+1), // convert to int as int8(127) +1 panics - childFields: make([]arrow.Field, len(children)), - typesBuilder: newInt8BufferBuilder(mem), - } - - b.typeIDtoChildID[0] = arrow.InvalidUnionChildID - for i := 1; i < len(b.typeIDtoChildID); i *= 2 { - copy(b.typeIDtoChildID[i:], b.typeIDtoChildID[:i]) - } - - debug.Assert(len(children) == len(typ.TypeCodes()), "mismatched typecodes and children") - debug.Assert(len(b.typeIDtoBuilder)-1 <= int(arrow.MaxUnionTypeCode), "too many typeids") - - copy(b.childFields, typ.Fields()) - for i, c := range children { - c.Retain() - typeID := typ.TypeCodes()[i] - b.typeIDtoChildID[typeID] = i - b.typeIDtoBuilder[typeID] = c - } - - return b -} - -func (b *unionBuilder) NumChildren() int { - return len(b.children) -} - -func (b *unionBuilder) Child(idx int) Builder { - if idx < 0 || idx > len(b.children) { - panic("arrow/array: invalid child index for union builder") - } - return b.children[idx] -} - -// Len returns the current number of elements in the builder. -func (b *unionBuilder) Len() int { return b.typesBuilder.Len() } - -func (b *unionBuilder) Mode() arrow.UnionMode { return b.mode } - -func (b *unionBuilder) reserve(elements int, resize func(int)) { - // union has no null bitmap, ever so we can skip that handling - if b.length+elements > b.capacity { - b.capacity = bitutil.NextPowerOf2(b.length + elements) - resize(b.capacity) - } -} - -func (b *unionBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - for _, c := range b.children { - c.Release() - } - b.typesBuilder.Release() - } -} - -func (b *unionBuilder) Type() arrow.DataType { - fields := make([]arrow.Field, len(b.childFields)) - for i, f := range b.childFields { - fields[i] = f - fields[i].Type = b.children[i].Type() - } - - switch b.mode { - case arrow.SparseMode: - return arrow.SparseUnionOf(fields, b.codes) - case arrow.DenseMode: - return arrow.DenseUnionOf(fields, b.codes) - default: - panic("invalid union builder mode") - } -} - -func (b *unionBuilder) AppendChild(newChild Builder, fieldName string) arrow.UnionTypeCode { - newChild.Retain() - b.children = append(b.children, newChild) - newType := b.nextTypeID() - - b.typeIDtoChildID[newType] = len(b.children) - 1 - b.typeIDtoBuilder[newType] = newChild - b.childFields = append(b.childFields, arrow.Field{Name: fieldName, Nullable: true}) - b.codes = append(b.codes, newType) - - return newType -} - -func (b *unionBuilder) nextTypeID() arrow.UnionTypeCode { - // find typeID such that typeIDtoBuilder[typeID] == nil - // use that for the new child. Start searching at denseTypeID - // since typeIDtoBuilder is densely packed up at least to denseTypeID - for ; int(b.denseTypeID) < len(b.typeIDtoBuilder); b.denseTypeID++ { - if b.typeIDtoBuilder[b.denseTypeID] == nil { - id := b.denseTypeID - b.denseTypeID++ - return id - } - } - - debug.Assert(len(b.typeIDtoBuilder) < int(arrow.MaxUnionTypeCode), "too many children typeids") - // typeIDtoBuilder is already densely packed, so just append the new child - b.typeIDtoBuilder = append(b.typeIDtoBuilder, nil) - b.typeIDtoChildID = append(b.typeIDtoChildID, arrow.InvalidUnionChildID) - id := b.denseTypeID - b.denseTypeID++ - return id - -} - -func (b *unionBuilder) newData() *Data { - length := b.typesBuilder.Len() - typesBuffer := b.typesBuilder.Finish() - defer typesBuffer.Release() - childData := make([]arrow.ArrayData, len(b.children)) - for i, b := range b.children { - childData[i] = b.newData() - defer childData[i].Release() - } - - return NewData(b.Type(), length, []*memory.Buffer{nil, typesBuffer}, childData, 0, 0) -} - -// SparseUnionBuilder is used to build a Sparse Union array using the Append -// methods. You can also add new types to the union on the fly by using -// AppendChild. -// -// Keep in mind: All children of a SparseUnion should be the same length -// as the union itself. If you add new children with AppendChild, ensure -// that they have the correct number of preceding elements that have been -// added to the builder beforehand. -type SparseUnionBuilder struct { - unionBuilder -} - -// NewEmptySparseUnionBuilder is a helper to construct a SparseUnionBuilder -// without having to predefine the union types. It creates a builder with no -// children and AppendChild will have to be called before appending any -// elements to this builder. -func NewEmptySparseUnionBuilder(mem memory.Allocator) *SparseUnionBuilder { - return &SparseUnionBuilder{ - unionBuilder: newUnionBuilder(mem, nil, arrow.SparseUnionOf([]arrow.Field{}, []arrow.UnionTypeCode{})), - } -} - -// NewSparseUnionBuilder constructs a new SparseUnionBuilder with the provided -// children and type codes. Builders will be constructed for each child -// using the fields in typ -func NewSparseUnionBuilder(mem memory.Allocator, typ *arrow.SparseUnionType) *SparseUnionBuilder { - children := make([]Builder, typ.NumFields()) - for i, f := range typ.Fields() { - children[i] = NewBuilder(mem, f.Type) - defer children[i].Release() - } - return NewSparseUnionBuilderWithBuilders(mem, typ, children) -} - -// NewSparseUnionWithBuilders returns a new SparseUnionBuilder using the -// provided type and builders. -func NewSparseUnionBuilderWithBuilders(mem memory.Allocator, typ *arrow.SparseUnionType, children []Builder) *SparseUnionBuilder { - return &SparseUnionBuilder{ - unionBuilder: newUnionBuilder(mem, children, typ), - } -} - -func (b *SparseUnionBuilder) Reserve(n int) { - b.reserve(n, b.Resize) -} - -func (b *SparseUnionBuilder) Resize(n int) { - b.typesBuilder.resize(n) -} - -// AppendNull will append a null to the first child and an empty value -// (implementation-defined) to the rest of the children. -func (b *SparseUnionBuilder) AppendNull() { - firstChildCode := b.codes[0] - b.typesBuilder.AppendValue(firstChildCode) - b.typeIDtoBuilder[firstChildCode].AppendNull() - for _, c := range b.codes[1:] { - b.typeIDtoBuilder[c].AppendEmptyValue() - } -} - -// AppendNulls is identical to calling AppendNull() n times, except -// it will pre-allocate with reserve for all the nulls beforehand. -func (b *SparseUnionBuilder) AppendNulls(n int) { - firstChildCode := b.codes[0] - b.Reserve(n) - for _, c := range b.codes { - b.typeIDtoBuilder[c].Reserve(n) - } - for i := 0; i < n; i++ { - b.typesBuilder.AppendValue(firstChildCode) - b.typeIDtoBuilder[firstChildCode].AppendNull() - for _, c := range b.codes[1:] { - b.typeIDtoBuilder[c].AppendEmptyValue() - } - } -} - -// AppendEmptyValue appends an empty value (implementation defined) -// to each child, and appends the type of the first typecode to the typeid -// buffer. -func (b *SparseUnionBuilder) AppendEmptyValue() { - b.typesBuilder.AppendValue(b.codes[0]) - for _, c := range b.codes { - b.typeIDtoBuilder[c].AppendEmptyValue() - } -} - -// AppendEmptyValues is identical to calling AppendEmptyValue() n times, -// except it pre-allocates first so it is more efficient. -func (b *SparseUnionBuilder) AppendEmptyValues(n int) { - b.Reserve(n) - firstChildCode := b.codes[0] - for _, c := range b.codes { - b.typeIDtoBuilder[c].Reserve(n) - } - for i := 0; i < n; i++ { - b.typesBuilder.AppendValue(firstChildCode) - for _, c := range b.codes { - b.typeIDtoBuilder[c].AppendEmptyValue() - } - } -} - -// Append appends an element to the UnionArray and must be followed up -// by an append to the appropriate child builder. The parameter should -// be the type id of the child to which the next value will be appended. -// -// After appending to the corresponding child builder, all other child -// builders should have a null or empty value appended to them (although -// this is not enforced and any value is theoretically allowed and will be -// ignored). -func (b *SparseUnionBuilder) Append(nextType arrow.UnionTypeCode) { - b.typesBuilder.AppendValue(nextType) -} - -func (b *SparseUnionBuilder) NewArray() arrow.Array { - return b.NewSparseUnionArray() -} - -func (b *SparseUnionBuilder) NewSparseUnionArray() (a *SparseUnion) { - data := b.newData() - a = NewSparseUnionData(data) - data.Release() - return -} - -func (b *SparseUnionBuilder) UnmarshalJSON(data []byte) (err error) { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("sparse union builder must unpack from json array, found %s", t) - } - return b.Unmarshal(dec) -} - -func (b *SparseUnionBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *SparseUnionBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - dec := json.NewDecoder(strings.NewReader(s)) - return b.UnmarshalOne(dec) -} - -func (b *SparseUnionBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch t { - case json.Delim('['): - // should be [type_id, Value] - typeID, err := dec.Token() - if err != nil { - return err - } - - var typeCode int8 - - switch tid := typeID.(type) { - case json.Number: - id, err := tid.Int64() - if err != nil { - return err - } - typeCode = int8(id) - case float64: - if tid != float64(int64(tid)) { - return &json.UnmarshalTypeError{ - Offset: dec.InputOffset(), - Type: reflect.TypeOf(int8(0)), - Struct: fmt.Sprint(b.Type()), - Value: "float", - } - } - typeCode = int8(tid) - } - - childNum := b.typeIDtoChildID[typeCode] - if childNum == arrow.InvalidUnionChildID { - return &json.UnmarshalTypeError{ - Offset: dec.InputOffset(), - Value: "invalid type code", - } - } - - for i, c := range b.children { - if i != childNum { - c.AppendNull() - } - } - - b.Append(typeCode) - if err := b.children[childNum].UnmarshalOne(dec); err != nil { - return err - } - - endArr, err := dec.Token() - if err != nil { - return err - } - - if endArr != json.Delim(']') { - return &json.UnmarshalTypeError{ - Offset: dec.InputOffset(), - Value: "union value array should have exactly 2 elements", - } - } - case nil: - b.AppendNull() - default: - return &json.UnmarshalTypeError{ - Offset: dec.InputOffset(), - Value: fmt.Sprint(t), - Struct: fmt.Sprint(b.Type()), - } - } - return nil -} - -// DenseUnionBuilder is used to build a Dense Union array using the Append -// methods. You can also add new types to the union on the fly by using -// AppendChild. -type DenseUnionBuilder struct { - unionBuilder - - offsetsBuilder *int32BufferBuilder -} - -// NewEmptyDenseUnionBuilder is a helper to construct a DenseUnionBuilder -// without having to predefine the union types. It creates a builder with no -// children and AppendChild will have to be called before appending any -// elements to this builder. -func NewEmptyDenseUnionBuilder(mem memory.Allocator) *DenseUnionBuilder { - return &DenseUnionBuilder{ - unionBuilder: newUnionBuilder(mem, nil, arrow.DenseUnionOf([]arrow.Field{}, []arrow.UnionTypeCode{})), - offsetsBuilder: newInt32BufferBuilder(mem), - } -} - -// NewDenseUnionBuilder constructs a new DenseUnionBuilder with the provided -// children and type codes. Builders will be constructed for each child -// using the fields in typ -func NewDenseUnionBuilder(mem memory.Allocator, typ *arrow.DenseUnionType) *DenseUnionBuilder { - children := make([]Builder, 0, typ.NumFields()) - defer func() { - for _, child := range children { - child.Release() - } - }() - - for _, f := range typ.Fields() { - children = append(children, NewBuilder(mem, f.Type)) - } - return NewDenseUnionBuilderWithBuilders(mem, typ, children) -} - -// NewDenseUnionWithBuilders returns a new DenseUnionBuilder using the -// provided type and builders. -func NewDenseUnionBuilderWithBuilders(mem memory.Allocator, typ *arrow.DenseUnionType, children []Builder) *DenseUnionBuilder { - return &DenseUnionBuilder{ - unionBuilder: newUnionBuilder(mem, children, typ), - offsetsBuilder: newInt32BufferBuilder(mem), - } -} - -func (b *DenseUnionBuilder) Reserve(n int) { - b.reserve(n, b.Resize) -} - -func (b *DenseUnionBuilder) Resize(n int) { - b.typesBuilder.resize(n) - b.offsetsBuilder.resize(n * arrow.Int32SizeBytes) -} - -// AppendNull will only append a null value arbitrarily to the first child -// and use that offset for this element of the array. -func (b *DenseUnionBuilder) AppendNull() { - firstChildCode := b.codes[0] - childBuilder := b.typeIDtoBuilder[firstChildCode] - b.typesBuilder.AppendValue(firstChildCode) - b.offsetsBuilder.AppendValue(int32(childBuilder.Len())) - childBuilder.AppendNull() -} - -// AppendNulls will only append a single null arbitrarily to the first child -// and use the same offset multiple times to point to it. The result is that -// for a DenseUnion this is more efficient than calling AppendNull multiple -// times in a loop -func (b *DenseUnionBuilder) AppendNulls(n int) { - // only append 1 null to the child builder, use the same offset twice - firstChildCode := b.codes[0] - childBuilder := b.typeIDtoBuilder[firstChildCode] - b.Reserve(n) - for i := 0; i < n; i++ { - b.typesBuilder.AppendValue(firstChildCode) - b.offsetsBuilder.AppendValue(int32(childBuilder.Len())) - } - // only append a single null to the child builder, the offsets all refer to the same value - childBuilder.AppendNull() -} - -// AppendEmptyValue only appends an empty value arbitrarily to the first child, -// and then uses that offset to identify the value. -func (b *DenseUnionBuilder) AppendEmptyValue() { - firstChildCode := b.codes[0] - childBuilder := b.typeIDtoBuilder[firstChildCode] - b.typesBuilder.AppendValue(firstChildCode) - b.offsetsBuilder.AppendValue(int32(childBuilder.Len())) - childBuilder.AppendEmptyValue() -} - -// AppendEmptyValues, like AppendNulls, will only append a single empty value -// (implementation defined) to the first child arbitrarily, and then point -// at that value using the offsets n times. That makes this more efficient -// than calling AppendEmptyValue multiple times. -func (b *DenseUnionBuilder) AppendEmptyValues(n int) { - // only append 1 null to the child builder, use the same offset twice - firstChildCode := b.codes[0] - childBuilder := b.typeIDtoBuilder[firstChildCode] - b.Reserve(n) - for i := 0; i < n; i++ { - b.typesBuilder.AppendValue(firstChildCode) - b.offsetsBuilder.AppendValue(int32(childBuilder.Len())) - } - // only append a single empty value to the child builder, the offsets all - // refer to the same value - childBuilder.AppendEmptyValue() -} - -// Append appends the necessary offset and type code to the builder -// and must be followed up with an append to the appropriate child builder -func (b *DenseUnionBuilder) Append(nextType arrow.UnionTypeCode) { - b.typesBuilder.AppendValue(nextType) - bldr := b.typeIDtoBuilder[nextType] - if bldr.Len() == kMaxElems { - panic("a dense UnionArray cannot contain more than 2^31 - 1 elements from a single child") - } - - b.offsetsBuilder.AppendValue(int32(bldr.Len())) -} - -func (b *DenseUnionBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - for _, c := range b.children { - c.Release() - } - b.typesBuilder.Release() - b.offsetsBuilder.Release() - } -} - -func (b *DenseUnionBuilder) newData() *Data { - data := b.unionBuilder.newData() - data.buffers = append(data.buffers, b.offsetsBuilder.Finish()) - return data -} - -func (b *DenseUnionBuilder) NewArray() arrow.Array { - return b.NewDenseUnionArray() -} - -func (b *DenseUnionBuilder) NewDenseUnionArray() (a *DenseUnion) { - data := b.newData() - a = NewDenseUnionData(data) - data.Release() - return -} - -func (b *DenseUnionBuilder) UnmarshalJSON(data []byte) (err error) { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("dense union builder must unpack from json array, found %s", t) - } - return b.Unmarshal(dec) -} - -func (b *DenseUnionBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (d *DenseUnionBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - d.AppendNull() - return nil - } - dec := json.NewDecoder(strings.NewReader(s)) - return d.UnmarshalOne(dec) -} - -func (b *DenseUnionBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch t { - case json.Delim('['): - // should be [type_id, Value] - typeID, err := dec.Token() - if err != nil { - return err - } - - var typeCode int8 - - switch tid := typeID.(type) { - case json.Number: - id, err := tid.Int64() - if err != nil { - return err - } - typeCode = int8(id) - case float64: - if tid != float64(int64(tid)) { - return &json.UnmarshalTypeError{ - Offset: dec.InputOffset(), - Type: reflect.TypeOf(int8(0)), - Struct: fmt.Sprint(b.Type()), - Value: "float", - } - } - typeCode = int8(tid) - } - - childNum := b.typeIDtoChildID[typeCode] - if childNum == arrow.InvalidUnionChildID { - return &json.UnmarshalTypeError{ - Offset: dec.InputOffset(), - Value: "invalid type code", - } - } - - b.Append(typeCode) - if err := b.children[childNum].UnmarshalOne(dec); err != nil { - return err - } - - endArr, err := dec.Token() - if err != nil { - return err - } - - if endArr != json.Delim(']') { - return &json.UnmarshalTypeError{ - Offset: dec.InputOffset(), - Value: "union value array should have exactly 2 elements", - } - } - case nil: - b.AppendNull() - default: - return &json.UnmarshalTypeError{ - Offset: dec.InputOffset(), - Value: fmt.Sprint(t), - Struct: fmt.Sprint(b.Type()), - } - } - return nil -} - -var ( - _ arrow.Array = (*SparseUnion)(nil) - _ arrow.Array = (*DenseUnion)(nil) - _ Union = (*SparseUnion)(nil) - _ Union = (*DenseUnion)(nil) - _ Builder = (*SparseUnionBuilder)(nil) - _ Builder = (*DenseUnionBuilder)(nil) - _ UnionBuilder = (*SparseUnionBuilder)(nil) - _ UnionBuilder = (*DenseUnionBuilder)(nil) -) diff --git a/go/arrow/array/union_test.go b/go/arrow/array/union_test.go deleted file mode 100644 index 43e7afd693b6c..0000000000000 --- a/go/arrow/array/union_test.go +++ /dev/null @@ -1,1117 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "fmt" - "strings" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/suite" -) - -func uint8ArrFromSlice(ids ...uint8) arrow.Array { - data := array.NewData(arrow.PrimitiveTypes.Uint8, len(ids), - []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Uint8Traits.CastToBytes(ids))}, nil, 0, 0) - defer data.Release() - return array.MakeFromData(data) -} - -func int32ArrFromSlice(offsets ...int32) arrow.Array { - data := array.NewData(arrow.PrimitiveTypes.Int32, len(offsets), - []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Int32Traits.CastToBytes(offsets))}, nil, 0, 0) - defer data.Release() - return array.MakeFromData(data) -} - -func TestUnionSliceEquals(t *testing.T) { - unionFields := []arrow.Field{ - {Name: "u0", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, - {Name: "u1", Type: arrow.PrimitiveTypes.Uint8, Nullable: true}, - } - - typeCodes := []arrow.UnionTypeCode{5, 10} - sparseType := arrow.SparseUnionOf(unionFields, typeCodes) - denseType := arrow.DenseUnionOf(unionFields, typeCodes) - - schema := arrow.NewSchema([]arrow.Field{ - {Name: "sparse", Type: sparseType, Nullable: true}, - {Name: "dense", Type: denseType, Nullable: true}, - }, nil) - - sparseChildren := make([]arrow.Array, 2) - denseChildren := make([]arrow.Array, 2) - - const length = 7 - - typeIDsBuffer := memory.NewBufferBytes(arrow.Uint8Traits.CastToBytes([]uint8{5, 10, 5, 5, 10, 10, 5})) - sparseChildren[0] = int32ArrFromSlice(0, 1, 2, 3, 4, 5, 6) - defer sparseChildren[0].Release() - sparseChildren[1] = uint8ArrFromSlice(10, 11, 12, 13, 14, 15, 16) - defer sparseChildren[1].Release() - - denseChildren[0] = int32ArrFromSlice(0, 2, 3, 7) - defer denseChildren[0].Release() - denseChildren[1] = uint8ArrFromSlice(11, 14, 15) - defer denseChildren[1].Release() - - offsetsBuffer := memory.NewBufferBytes(arrow.Int32Traits.CastToBytes([]int32{0, 0, 1, 2, 1, 2, 3})) - sparse := array.NewSparseUnion(sparseType, length, sparseChildren, typeIDsBuffer, 0) - dense := array.NewDenseUnion(denseType, length, denseChildren, typeIDsBuffer, offsetsBuffer, 0) - - defer sparse.Release() - defer dense.Release() - - batch := array.NewRecord(schema, []arrow.Array{sparse, dense}, -1) - defer batch.Release() - - checkUnion := func(arr arrow.Array) { - size := arr.Len() - slice := array.NewSlice(arr, 2, int64(size)) - defer slice.Release() - assert.EqualValues(t, size-2, slice.Len()) - - slice2 := array.NewSlice(arr, 2, int64(arr.Len())) - defer slice2.Release() - assert.EqualValues(t, size-2, slice2.Len()) - - assert.True(t, array.Equal(slice, slice2)) - assert.True(t, array.SliceEqual(arr, 2, int64(arr.Len()), slice, 0, int64(slice.Len()))) - - // chain slices - slice2 = array.NewSlice(arr, 1, int64(arr.Len())) - defer slice2.Release() - slice2 = array.NewSlice(slice2, 1, int64(slice2.Len())) - defer slice2.Release() - assert.True(t, array.Equal(slice, slice2)) - - slice, slice2 = array.NewSlice(arr, 1, 6), array.NewSlice(arr, 1, 6) - defer slice.Release() - defer slice2.Release() - assert.EqualValues(t, 5, slice.Len()) - - assert.True(t, array.Equal(slice, slice2)) - assert.True(t, array.SliceEqual(arr, 1, 6, slice, 0, 5)) - } - - checkUnion(batch.Column(0)) - checkUnion(batch.Column(1)) -} - -func TestSparseUnionGetFlattenedField(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - ty := arrow.SparseUnionOf([]arrow.Field{ - {Name: "ints", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - {Name: "strs", Type: arrow.BinaryTypes.String, Nullable: true}, - }, []arrow.UnionTypeCode{2, 7}) - ints, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[0, 1, 2, 3]`)) - defer ints.Release() - strs, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["a", null, "c", "d"]`)) - defer strs.Release() - idsArr, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[2, 7, 2, 7]`)) - defer idsArr.Release() - ids := idsArr.Data().Buffers()[1] - - const length = 4 - - t.Run("flattened", func(t *testing.T) { - scoped := memory.NewCheckedAllocatorScope(mem) - defer scoped.CheckSize(t) - - arr := array.NewSparseUnion(ty, length, []arrow.Array{ints, strs}, ids, 0) - defer arr.Release() - - flattened, err := arr.GetFlattenedField(mem, 0) - assert.NoError(t, err) - defer flattened.Release() - expected, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[0, null, 2, null]`)) - defer expected.Release() - - assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) - - flattened, err = arr.GetFlattenedField(mem, 1) - assert.NoError(t, err) - defer flattened.Release() - expected, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`[null, null, null, "d"]`)) - defer expected.Release() - - assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) - - sliced := array.NewSlice(arr, 1, 3).(*array.SparseUnion) - defer sliced.Release() - - flattened, err = sliced.GetFlattenedField(mem, 0) - assert.NoError(t, err) - defer flattened.Release() - expected, _, _ = array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[null, 2]`)) - defer expected.Release() - - assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) - - flattened, err = sliced.GetFlattenedField(mem, 1) - assert.NoError(t, err) - defer flattened.Release() - expected, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`[null, null]`)) - defer expected.Release() - - assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) - - _, err = arr.GetFlattenedField(mem, -1) - assert.Error(t, err) - _, err = arr.GetFlattenedField(mem, 2) - assert.Error(t, err) - }) - - t.Run("offset children", func(t *testing.T) { - scoped := memory.NewCheckedAllocatorScope(mem) - defer scoped.CheckSize(t) - - strSlice, intSlice := array.NewSlice(strs, 1, 3), array.NewSlice(ints, 1, 3) - defer strSlice.Release() - defer intSlice.Release() - - arr := array.NewSparseUnion(ty, length-2, []arrow.Array{intSlice, strSlice}, ids, 0) - defer arr.Release() - - flattened, err := arr.GetFlattenedField(mem, 0) - assert.NoError(t, err) - defer flattened.Release() - expected, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[1, null]`)) - defer expected.Release() - - assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) - - flattened, err = arr.GetFlattenedField(mem, 1) - assert.NoError(t, err) - defer flattened.Release() - expected, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`[null, "c"]`)) - defer expected.Release() - - assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) - - sliced := array.NewSlice(arr, 1, 2).(*array.SparseUnion) - defer sliced.Release() - - flattened, err = sliced.GetFlattenedField(mem, 0) - assert.NoError(t, err) - defer flattened.Release() - expected, _, _ = array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[null]`)) - defer expected.Release() - - assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) - - flattened, err = sliced.GetFlattenedField(mem, 1) - assert.NoError(t, err) - defer flattened.Release() - expected, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["c"]`)) - defer expected.Release() - - assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) - }) - - t.Run("empty flattened", func(t *testing.T) { - scoped := memory.NewCheckedAllocatorScope(mem) - defer scoped.CheckSize(t) - - strSlice, intSlice := array.NewSlice(strs, length, length), array.NewSlice(ints, length, length) - defer strSlice.Release() - defer intSlice.Release() - - arr := array.NewSparseUnion(ty, 0, []arrow.Array{intSlice, strSlice}, ids, 0) - defer arr.Release() - - flattened, err := arr.GetFlattenedField(mem, 0) - assert.NoError(t, err) - defer flattened.Release() - expected, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[]`)) - defer expected.Release() - - assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) - - flattened, err = arr.GetFlattenedField(mem, 1) - assert.NoError(t, err) - defer flattened.Release() - expected, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`[]`)) - defer expected.Release() - - assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) - }) -} - -func TestSparseUnionValidate(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - a, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[4, 5]`)) - defer a.Release() - dt := arrow.SparseUnionOf([]arrow.Field{{Name: "a", Type: arrow.PrimitiveTypes.Int32, Nullable: true}}, []arrow.UnionTypeCode{0}) - children := []arrow.Array{a} - - typeIDsArr, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[0, 0, 0]`)) - defer typeIDsArr.Release() - typeIDs := typeIDsArr.Data().Buffers()[1] - - arr := array.NewSparseUnion(dt, 2, children, typeIDs, 0) - assert.NoError(t, arr.ValidateFull()) - arr.Release() - - arr = array.NewSparseUnion(dt, 1, children, typeIDs, 1) - assert.NoError(t, arr.ValidateFull()) - arr.Release() - - arr = array.NewSparseUnion(dt, 0, children, typeIDs, 2) - assert.NoError(t, arr.ValidateFull()) - arr.Release() - - // length + offset < child length but that's ok! - arr = array.NewSparseUnion(dt, 1, children, typeIDs, 0) - assert.NoError(t, arr.ValidateFull()) - arr.Release() - - // length + offset > child length! BAD! - assert.Panics(t, func() { - arr = array.NewSparseUnion(dt, 1, children, typeIDs, 2) - }) - - // offset > child length - assert.Panics(t, func() { - arr = array.NewSparseUnion(dt, 0, children, typeIDs, 3) - }) -} - -type UnionFactorySuite struct { - suite.Suite - - mem *memory.CheckedAllocator - codes []arrow.UnionTypeCode - typeIDs arrow.Array - logicalTypeIDs arrow.Array - invalidTypeIDs arrow.Array - invalidTypeIDs2 arrow.Array -} - -func (s *UnionFactorySuite) typeidsFromSlice(ids ...int8) arrow.Array { - data := array.NewData(arrow.PrimitiveTypes.Int8, len(ids), - []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Int8Traits.CastToBytes(ids))}, nil, 0, 0) - defer data.Release() - return array.MakeFromData(data) -} - -func (s *UnionFactorySuite) offsetsFromSlice(offsets ...int32) arrow.Array { - data := array.NewData(arrow.PrimitiveTypes.Int32, len(offsets), - []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Int32Traits.CastToBytes(offsets))}, nil, 0, 0) - defer data.Release() - return array.MakeFromData(data) -} - -func (s *UnionFactorySuite) SetupTest() { - s.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) - s.codes = []arrow.UnionTypeCode{1, 2, 4, 127} - s.typeIDs = s.typeidsFromSlice(0, 1, 2, 0, 1, 3, 2, 0, 2, 1) - s.logicalTypeIDs = s.typeidsFromSlice(1, 2, 4, 1, 2, 127, 4, 1, 4, 2) - s.invalidTypeIDs = s.typeidsFromSlice(1, 2, 4, 1, -2, 127, 4, 1, 4, 2) - s.invalidTypeIDs2 = s.typeidsFromSlice(1, 2, 4, 1, 3, 127, 4, 1, 4, 2) -} - -func (s *UnionFactorySuite) TearDownTest() { - s.typeIDs.Release() - s.logicalTypeIDs.Release() - s.invalidTypeIDs.Release() - s.invalidTypeIDs2.Release() - s.mem.AssertSize(s.T(), 0) -} - -func (s *UnionFactorySuite) checkFields(arr array.Union, fields []string) { - ty := arr.DataType().(arrow.UnionType) - s.Len(ty.Fields(), len(fields)) - for i, f := range ty.Fields() { - s.Equal(fields[i], f.Name) - } -} - -func (s *UnionFactorySuite) checkCodes(arr array.Union, codes []arrow.UnionTypeCode) { - ty := arr.DataType().(arrow.UnionType) - s.Equal(codes, ty.TypeCodes()) -} - -func (s *UnionFactorySuite) checkUnion(arr array.Union, mode arrow.UnionMode, fields []string, codes []arrow.UnionTypeCode) { - s.Equal(mode, arr.Mode()) - s.checkFields(arr, fields) - s.checkCodes(arr, codes) - typeIDs := s.typeIDs.(*array.Int8) - for i := 0; i < typeIDs.Len(); i++ { - s.EqualValues(typeIDs.Value(i), arr.ChildID(i)) - } - s.Nil(arr.Field(-1)) - s.Nil(arr.Field(typeIDs.Len())) -} - -func (s *UnionFactorySuite) TestMakeDenseUnions() { - // typeIDs: {0, 1, 2, 0, 1, 3, 2, 0, 2, 1} - offsets := s.offsetsFromSlice(0, 0, 0, 1, 1, 0, 1, 2, 1, 2) - defer offsets.Release() - - children := make([]arrow.Array, 4) - children[0], _, _ = array.FromJSON(s.mem, arrow.BinaryTypes.String, strings.NewReader(`["abc", "def", "xyz"]`)) - defer children[0].Release() - children[1], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Uint8, strings.NewReader(`[10, 20, 30]`)) - defer children[1].Release() - children[2], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, strings.NewReader(`[1.618, 2.718, 3.142]`)) - defer children[2].Release() - children[3], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[-12]`)) - defer children[3].Release() - - fieldNames := []string{"str", "int1", "real", "int2"} - - s.Run("without fields and codes", func() { - result, err := array.NewDenseUnionFromArrays(s.typeIDs, offsets, children) - s.NoError(err) - defer result.Release() - s.NoError(result.ValidateFull()) - s.checkUnion(result, arrow.DenseMode, []string{"0", "1", "2", "3"}, []arrow.UnionTypeCode{0, 1, 2, 3}) - }) - - s.Run("with fields", func() { - _, err := array.NewDenseUnionFromArraysWithFields(s.typeIDs, offsets, children, []string{"one"}) - s.Error(err) - result, err := array.NewDenseUnionFromArraysWithFields(s.typeIDs, offsets, children, fieldNames) - s.NoError(err) - defer result.Release() - s.NoError(result.ValidateFull()) - s.checkUnion(result, arrow.DenseMode, fieldNames, []arrow.UnionTypeCode{0, 1, 2, 3}) - }) - - s.Run("with codes", func() { - _, err := array.NewDenseUnionFromArrays(s.logicalTypeIDs, offsets, children, 0) - s.Error(err) - result, err := array.NewDenseUnionFromArrays(s.logicalTypeIDs, offsets, children, s.codes...) - s.NoError(err) - defer result.Release() - s.NoError(result.ValidateFull()) - s.checkUnion(result, arrow.DenseMode, []string{"0", "1", "2", "3"}, s.codes) - }) - - s.Run("with fields and codes", func() { - _, err := array.NewDenseUnionFromArraysWithFieldCodes(s.logicalTypeIDs, offsets, children, []string{"one"}, s.codes) - s.Error(err) - result, err := array.NewDenseUnionFromArraysWithFieldCodes(s.logicalTypeIDs, offsets, children, fieldNames, s.codes) - s.NoError(err) - defer result.Release() - s.NoError(result.ValidateFull()) - s.checkUnion(result, arrow.DenseMode, fieldNames, s.codes) - }) - - s.Run("invalid type codes", func() { - result, err := array.NewDenseUnionFromArrays(s.invalidTypeIDs, offsets, children, s.codes...) - s.NoError(err) - defer result.Release() - s.Error(result.ValidateFull()) - result, err = array.NewDenseUnionFromArrays(s.invalidTypeIDs2, offsets, children, s.codes...) - s.NoError(err) - defer result.Release() - s.Error(result.ValidateFull()) - }) - - s.Run("invalid offsets", func() { - // offset out of bounds at index 5 - invalidOffsets := s.offsetsFromSlice(0, 0, 0, 1, 1, 1, 1, 2, 1, 2) - defer invalidOffsets.Release() - result, err := array.NewDenseUnionFromArrays(s.typeIDs, invalidOffsets, children) - s.NoError(err) - defer result.Release() - s.Error(result.ValidateFull()) - - // negative offset at index 5 - invalidOffsets = s.offsetsFromSlice(0, 0, 0, 1, 1, -1, 1, 2, 1, 2) - defer invalidOffsets.Release() - result, err = array.NewDenseUnionFromArrays(s.typeIDs, invalidOffsets, children) - s.NoError(err) - defer result.Release() - s.Error(result.ValidateFull()) - - // non-monotonic offset at index 3 - invalidOffsets = s.offsetsFromSlice(1, 0, 0, 0, 1, 0, 1, 2, 1, 2) - defer invalidOffsets.Release() - result, err = array.NewDenseUnionFromArrays(s.typeIDs, invalidOffsets, children) - s.NoError(err) - defer result.Release() - s.Error(result.ValidateFull()) - }) -} - -func (s *UnionFactorySuite) TestDenseUnionStringRoundTrip() { - // typeIDs: {0, 1, 2, 0, 1, 3, 2, 0, 2, 1} - offsets := s.offsetsFromSlice(0, 0, 0, 1, 1, 0, 1, 2, 1, 2) - defer offsets.Release() - - children := make([]arrow.Array, 4) - children[0], _, _ = array.FromJSON(s.mem, arrow.BinaryTypes.String, strings.NewReader(`["abc", "def", "xyz"]`)) - defer children[0].Release() - children[1], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Uint8, strings.NewReader(`[10, 20, 30]`)) - defer children[1].Release() - children[2], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, strings.NewReader(`[1.618, 2.718, 3.142]`)) - defer children[2].Release() - children[3], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[-12]`)) - defer children[3].Release() - - fields := []string{"str", "int1", "real", "int2"} - - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(s.T(), 0) - - dt := arrow.DenseUnionFromArrays(children, fields, s.codes) - arr, err := array.NewDenseUnionFromArraysWithFieldCodes(s.logicalTypeIDs, offsets, children, fields, s.codes) - s.NoError(err) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewDenseUnionBuilder(mem, dt) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - s.NoError(b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.DenseUnion) - defer arr1.Release() - - s.True(array.Equal(arr, arr1)) -} - -func (s *UnionFactorySuite) TestMakeSparse() { - children := make([]arrow.Array, 4) - children[0], _, _ = array.FromJSON(s.mem, arrow.BinaryTypes.String, - strings.NewReader(`["abc", "", "", "def", "", "", "", "xyz", "", ""]`)) - children[1], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Uint8, - strings.NewReader(`[0, 10, 0, 0, 20, 0, 0, 0, 0, 30]`)) - children[2], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, - strings.NewReader(`[0.0, 0.0, 1.618, 0.0, 0.0, 0.0, 2.718, 0.0, 3.142, 0.0]`)) - children[3], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, - strings.NewReader(`[0, 0, 0, 0, 0, -12, 0, 0, 0, 0]`)) - for _, c := range children { - defer c.Release() - } - - fieldNames := []string{"str", "int1", "real", "int2"} - - s.Run("without fields and codes", func() { - result, err := array.NewSparseUnionFromArrays(s.typeIDs, children) - s.NoError(err) - defer result.Release() - s.NoError(result.ValidateFull()) - s.checkUnion(result, arrow.SparseMode, []string{"0", "1", "2", "3"}, []arrow.UnionTypeCode{0, 1, 2, 3}) - }) - - s.Run("with fields", func() { - _, err := array.NewSparseUnionFromArraysWithFields(s.typeIDs, children, []string{"one"}) - s.Error(err) - result, err := array.NewSparseUnionFromArraysWithFields(s.typeIDs, children, fieldNames) - s.NoError(err) - defer result.Release() - s.NoError(result.ValidateFull()) - s.checkUnion(result, arrow.SparseMode, fieldNames, []arrow.UnionTypeCode{0, 1, 2, 3}) - }) - - s.Run("with codes", func() { - _, err := array.NewSparseUnionFromArrays(s.logicalTypeIDs, children, 0) - s.Error(err) - result, err := array.NewSparseUnionFromArrays(s.logicalTypeIDs, children, s.codes...) - s.NoError(err) - defer result.Release() - s.NoError(result.ValidateFull()) - s.checkUnion(result, arrow.SparseMode, []string{"0", "1", "2", "3"}, s.codes) - }) - - s.Run("with fields and codes", func() { - _, err := array.NewSparseUnionFromArraysWithFieldCodes(s.logicalTypeIDs, children, []string{"one"}, s.codes) - s.Error(err) - result, err := array.NewSparseUnionFromArraysWithFieldCodes(s.logicalTypeIDs, children, fieldNames, s.codes) - s.NoError(err) - defer result.Release() - s.NoError(result.ValidateFull()) - s.checkUnion(result, arrow.SparseMode, fieldNames, s.codes) - }) - - s.Run("invalid type codes", func() { - result, err := array.NewSparseUnionFromArrays(s.invalidTypeIDs, children, s.codes...) - s.NoError(err) - defer result.Release() - s.Error(result.ValidateFull()) - result, err = array.NewSparseUnionFromArrays(s.invalidTypeIDs2, children, s.codes...) - s.NoError(err) - defer result.Release() - s.Error(result.ValidateFull()) - }) - - s.Run("invalid child length", func() { - children[3], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, - strings.NewReader(`[0, 0, 0, 0, 0, -12, 0, 0, 0]`)) - defer children[3].Release() - - _, err := array.NewSparseUnionFromArrays(s.typeIDs, children) - s.Error(err) - }) -} - -func (s *UnionFactorySuite) TestSparseUnionStringRoundTrip() { - children := make([]arrow.Array, 4) - children[0], _, _ = array.FromJSON(s.mem, arrow.BinaryTypes.String, - strings.NewReader(`["abc", "", "", "def", "", "", "", "xyz", "", ""]`)) - defer children[0].Release() - children[1], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Uint8, - strings.NewReader(`[0, 10, 0, 0, 20, 0, 0, 0, 0, 30]`)) - defer children[1].Release() - children[2], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, - strings.NewReader(`[0.0, 0.0, 1.618, 0.0, 0.0, 0.0, 2.718, 0.0, 3.142, 0.0]`)) - defer children[2].Release() - children[3], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, - strings.NewReader(`[0, 0, 0, 0, 0, -12, 0, 0, 0, 0]`)) - defer children[3].Release() - - fields := []string{"str", "int1", "real", "int2"} - - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(s.T(), 0) - - dt := arrow.SparseUnionFromArrays(children, fields, s.codes) - - arr, err := array.NewSparseUnionFromArraysWithFieldCodes(s.logicalTypeIDs, children, fields, s.codes) - s.NoError(err) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewSparseUnionBuilder(mem, dt) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - s.NoError(b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.SparseUnion) - defer arr1.Release() - - s.True(array.Equal(arr, arr1)) -} - -type UnionBuilderSuite struct { - suite.Suite - - I8 arrow.UnionTypeCode - STR arrow.UnionTypeCode - DBL arrow.UnionTypeCode - - mem *memory.CheckedAllocator - expectedTypes []arrow.UnionTypeCode - expectedTypesArr arrow.Array - i8Bldr *array.Int8Builder - strBldr *array.StringBuilder - dblBldr *array.Float64Builder - unionBldr array.UnionBuilder - actual array.Union -} - -func (s *UnionBuilderSuite) SetupTest() { - s.I8, s.STR, s.DBL = 8, 13, 7 - - s.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) - s.expectedTypes = make([]arrow.UnionTypeCode, 0) - - s.i8Bldr = array.NewInt8Builder(s.mem) - s.strBldr = array.NewStringBuilder(s.mem) - s.dblBldr = array.NewFloat64Builder(s.mem) -} - -func (s *UnionBuilderSuite) TearDownTest() { - if s.expectedTypesArr != nil { - s.expectedTypesArr.Release() - s.expectedTypesArr = nil - } - s.i8Bldr.Release() - s.strBldr.Release() - s.dblBldr.Release() - if s.actual != nil { - s.actual.Release() - s.actual = nil - } - - s.mem.AssertSize(s.T(), 0) -} - -func (s *UnionBuilderSuite) createExpectedTypesArr() { - data := array.NewData(arrow.PrimitiveTypes.Int8, len(s.expectedTypes), - []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Int8Traits.CastToBytes(s.expectedTypes))}, nil, 0, 0) - defer data.Release() - s.expectedTypesArr = array.MakeFromData(data) -} - -func (s *UnionBuilderSuite) appendInt(i int8) { - s.expectedTypes = append(s.expectedTypes, s.I8) - s.unionBldr.Append(s.I8) - s.i8Bldr.Append(i) - if s.unionBldr.Mode() == arrow.SparseMode { - s.strBldr.AppendEmptyValue() - s.dblBldr.AppendEmptyValue() - } -} - -func (s *UnionBuilderSuite) appendString(str string) { - s.expectedTypes = append(s.expectedTypes, s.STR) - s.unionBldr.Append(s.STR) - s.strBldr.Append(str) - if s.unionBldr.Mode() == arrow.SparseMode { - s.i8Bldr.AppendEmptyValue() - s.dblBldr.AppendEmptyValue() - } -} - -func (s *UnionBuilderSuite) appendDbl(dbl float64) { - s.expectedTypes = append(s.expectedTypes, s.DBL) - s.unionBldr.Append(s.DBL) - s.dblBldr.Append(dbl) - if s.unionBldr.Mode() == arrow.SparseMode { - s.strBldr.AppendEmptyValue() - s.i8Bldr.AppendEmptyValue() - } -} - -func (s *UnionBuilderSuite) appendBasics() { - s.appendInt(33) - s.appendString("abc") - s.appendDbl(1.0) - s.appendDbl(-1.0) - s.appendString("") - s.appendInt(10) - s.appendString("def") - s.appendInt(-10) - s.appendDbl(0.5) - - s.Equal(9, s.unionBldr.Len()) - - s.actual = s.unionBldr.NewArray().(array.Union) - s.NoError(s.actual.ValidateFull()) - s.createExpectedTypesArr() -} - -func (s *UnionBuilderSuite) appendNullsAndEmptyValues() { - s.appendString("abc") - s.unionBldr.AppendNull() - s.unionBldr.AppendEmptyValue() - s.expectedTypes = append(s.expectedTypes, s.I8, s.I8, s.I8) - s.appendInt(42) - s.unionBldr.AppendNulls(2) - s.unionBldr.AppendEmptyValues(2) - s.expectedTypes = append(s.expectedTypes, s.I8, s.I8, s.I8) - - s.Equal(8, s.unionBldr.Len()) - - s.actual = s.unionBldr.NewArray().(array.Union) - s.NoError(s.actual.ValidateFull()) - s.createExpectedTypesArr() -} - -func (s *UnionBuilderSuite) appendInferred() { - s.I8 = s.unionBldr.AppendChild(s.i8Bldr, "i8") - s.EqualValues(0, s.I8) - s.appendInt(33) - s.appendInt(10) - - s.STR = s.unionBldr.AppendChild(s.strBldr, "str") - s.EqualValues(1, s.STR) - s.appendString("abc") - s.appendString("") - s.appendString("def") - s.appendInt(-10) - - s.DBL = s.unionBldr.AppendChild(s.dblBldr, "dbl") - s.EqualValues(2, s.DBL) - s.appendDbl(1.0) - s.appendDbl(-1.0) - s.appendDbl(0.5) - - s.Equal(9, s.unionBldr.Len()) - - s.actual = s.unionBldr.NewArray().(array.Union) - s.NoError(s.actual.ValidateFull()) - s.createExpectedTypesArr() - - s.EqualValues(0, s.I8) - s.EqualValues(1, s.STR) - s.EqualValues(2, s.DBL) -} - -func (s *UnionBuilderSuite) appendListOfInferred(utyp arrow.UnionType) *array.List { - listBldr := array.NewListBuilder(s.mem, utyp) - defer listBldr.Release() - - s.unionBldr = listBldr.ValueBuilder().(array.UnionBuilder) - - listBldr.Append(true) - s.I8 = s.unionBldr.AppendChild(s.i8Bldr, "i8") - s.EqualValues(0, s.I8) - s.appendInt(10) - - listBldr.Append(true) - s.STR = s.unionBldr.AppendChild(s.strBldr, "str") - s.EqualValues(1, s.STR) - s.appendString("abc") - s.appendInt(-10) - - listBldr.Append(true) - s.DBL = s.unionBldr.AppendChild(s.dblBldr, "dbl") - s.EqualValues(2, s.DBL) - s.appendDbl(0.5) - - s.Equal(4, s.unionBldr.Len()) - - s.createExpectedTypesArr() - return listBldr.NewListArray() -} - -func (s *UnionBuilderSuite) assertArraysEqual(expected, actual arrow.Array) { - s.Truef(array.Equal(expected, actual), "expected: %s, got: %s", expected, actual) -} - -func (s *UnionBuilderSuite) TestDenseUnionBasics() { - s.unionBldr = array.NewDenseUnionBuilderWithBuilders(s.mem, - arrow.DenseUnionOf([]arrow.Field{ - {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "dbl", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - }, []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}), - []array.Builder{s.i8Bldr, s.strBldr, s.dblBldr}) - defer s.unionBldr.Release() - - s.appendBasics() - - expectedI8, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[33, 10, -10]`)) - expectedStr, _, _ := array.FromJSON(s.mem, arrow.BinaryTypes.String, strings.NewReader(`["abc", "", "def"]`)) - expectedDbl, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, strings.NewReader(`[1.0, -1.0, 0.5]`)) - expectedOffsets, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[0, 0, 0, 1, 1, 1, 2, 2, 2]`)) - - defer func() { - expectedI8.Release() - expectedStr.Release() - expectedDbl.Release() - expectedOffsets.Release() - }() - - expected, err := array.NewDenseUnionFromArraysWithFieldCodes(s.expectedTypesArr, - expectedOffsets, - []arrow.Array{expectedI8, expectedStr, expectedDbl}, - []string{"i8", "str", "dbl"}, - []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}) - s.NoError(err) - defer expected.Release() - - s.Equal(expected.DataType().String(), s.actual.DataType().String()) - s.assertArraysEqual(expected, s.actual) -} - -func (s *UnionBuilderSuite) TestDenseBuilderNullsAndEmpty() { - s.unionBldr = array.NewDenseUnionBuilderWithBuilders(s.mem, - arrow.DenseUnionOf([]arrow.Field{ - {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "dbl", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - }, []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}), - []array.Builder{s.i8Bldr, s.strBldr, s.dblBldr}) - defer s.unionBldr.Release() - - s.appendNullsAndEmptyValues() - - // four null / empty values (the latter implementation-defined) appended to I8 - expectedI8, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[null, 0, 42, null, 0]`)) - expectedStr, _, _ := array.FromJSON(s.mem, arrow.BinaryTypes.String, strings.NewReader(`["abc"]`)) - expectedDbl, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, strings.NewReader(`[]`)) - expectedOffsets, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[0, 0, 1, 2, 3, 3, 4, 4]`)) - - defer func() { - expectedI8.Release() - expectedStr.Release() - expectedDbl.Release() - expectedOffsets.Release() - }() - - expected, err := array.NewDenseUnionFromArraysWithFieldCodes(s.expectedTypesArr, - expectedOffsets, - []arrow.Array{expectedI8, expectedStr, expectedDbl}, - []string{"i8", "str", "dbl"}, - []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}) - s.NoError(err) - defer expected.Release() - - s.Equal(expected.DataType().String(), s.actual.DataType().String()) - s.assertArraysEqual(expected, s.actual) - - // physical arrays must be as expected - s.assertArraysEqual(expectedI8, s.actual.Field(0)) - s.assertArraysEqual(expectedStr, s.actual.Field(1)) - s.assertArraysEqual(expectedDbl, s.actual.Field(2)) -} - -func (s *UnionBuilderSuite) TestDenseUnionInferredTyped() { - s.unionBldr = array.NewEmptyDenseUnionBuilder(s.mem) - defer s.unionBldr.Release() - - s.appendInferred() - - expectedI8, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[33, 10, -10]`)) - expectedStr, _, _ := array.FromJSON(s.mem, arrow.BinaryTypes.String, strings.NewReader(`["abc", "", "def"]`)) - expectedDbl, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, strings.NewReader(`[1.0, -1.0, 0.5]`)) - expectedOffsets, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[0, 1, 0, 1, 2, 2, 0, 1, 2]`)) - - defer func() { - expectedI8.Release() - expectedStr.Release() - expectedDbl.Release() - expectedOffsets.Release() - }() - - expected, err := array.NewDenseUnionFromArraysWithFieldCodes(s.expectedTypesArr, - expectedOffsets, - []arrow.Array{expectedI8, expectedStr, expectedDbl}, - []string{"i8", "str", "dbl"}, - []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}) - s.NoError(err) - defer expected.Release() - - s.Equal(expected.DataType().String(), s.actual.DataType().String()) - s.assertArraysEqual(expected, s.actual) -} - -func (s *UnionBuilderSuite) TestDenseUnionListOfInferredType() { - actual := s.appendListOfInferred(arrow.DenseUnionOf([]arrow.Field{}, []arrow.UnionTypeCode{})) - defer actual.Release() - - expectedType := arrow.ListOf(arrow.DenseUnionOf( - []arrow.Field{ - {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "dbl", Type: arrow.PrimitiveTypes.Float64, Nullable: true}}, - []arrow.UnionTypeCode{s.I8, s.STR, s.DBL})) - s.Equal(expectedType.String(), actual.DataType().String()) -} - -func (s *UnionBuilderSuite) TestSparseUnionBasics() { - s.unionBldr = array.NewSparseUnionBuilderWithBuilders(s.mem, - arrow.SparseUnionOf([]arrow.Field{ - {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "dbl", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - }, []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}), - []array.Builder{s.i8Bldr, s.strBldr, s.dblBldr}) - defer s.unionBldr.Release() - - s.appendBasics() - - expectedI8, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, - strings.NewReader(`[33, null, null, null, null, 10, null, -10, null]`)) - expectedStr, _, _ := array.FromJSON(s.mem, arrow.BinaryTypes.String, - strings.NewReader(`[null, "abc", null, null, "", null, "def", null, null]`)) - expectedDbl, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, - strings.NewReader(`[null, null, 1.0, -1.0, null, null, null, null, 0.5]`)) - - defer func() { - expectedI8.Release() - expectedStr.Release() - expectedDbl.Release() - }() - - expected, err := array.NewSparseUnionFromArraysWithFieldCodes(s.expectedTypesArr, - []arrow.Array{expectedI8, expectedStr, expectedDbl}, - []string{"i8", "str", "dbl"}, - []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}) - s.NoError(err) - defer expected.Release() - - s.Equal(expected.DataType().String(), s.actual.DataType().String()) - s.assertArraysEqual(expected, s.actual) -} - -func (s *UnionBuilderSuite) TestSparseBuilderNullsAndEmpty() { - s.unionBldr = array.NewSparseUnionBuilderWithBuilders(s.mem, - arrow.SparseUnionOf([]arrow.Field{ - {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "dbl", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - }, []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}), - []array.Builder{s.i8Bldr, s.strBldr, s.dblBldr}) - defer s.unionBldr.Release() - - s.appendNullsAndEmptyValues() - - // "abc", null, 0, 42, null, null, 0, 0 - // getting 0 for empty values is implementation-defined - expectedI8, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, - strings.NewReader(`[0, null, 0, 42, null, null, 0, 0]`)) - expectedStr, _, _ := array.FromJSON(s.mem, arrow.BinaryTypes.String, - strings.NewReader(`["abc", "", "", "", "", "", "", ""]`)) - expectedDbl, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, - strings.NewReader(`[0, 0, 0, 0, 0, 0, 0, 0]`)) - - defer func() { - expectedI8.Release() - expectedStr.Release() - expectedDbl.Release() - }() - - expected, err := array.NewSparseUnionFromArraysWithFieldCodes(s.expectedTypesArr, - []arrow.Array{expectedI8, expectedStr, expectedDbl}, - []string{"i8", "str", "dbl"}, - []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}) - s.NoError(err) - defer expected.Release() - - s.Equal(expected.DataType().String(), s.actual.DataType().String()) - s.assertArraysEqual(expected, s.actual) - - // physical arrays must be as expected - s.assertArraysEqual(expectedI8, s.actual.Field(0)) - s.assertArraysEqual(expectedStr, s.actual.Field(1)) - s.assertArraysEqual(expectedDbl, s.actual.Field(2)) -} - -func (s *UnionBuilderSuite) TestSparseUnionInferredType() { - s.unionBldr = array.NewEmptySparseUnionBuilder(s.mem) - defer s.unionBldr.Release() - - s.appendInferred() - - expectedI8, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, - strings.NewReader(`[33, 10, null, null, null, -10, null, null, null]`)) - expectedStr, _, _ := array.FromJSON(s.mem, arrow.BinaryTypes.String, - strings.NewReader(`[null, null, "abc", "", "def", null, null, null, null]`)) - expectedDbl, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, - strings.NewReader(`[null, null, null, null, null, null,1.0, -1.0, 0.5]`)) - - defer func() { - expectedI8.Release() - expectedStr.Release() - expectedDbl.Release() - }() - - expected, err := array.NewSparseUnionFromArraysWithFieldCodes(s.expectedTypesArr, - []arrow.Array{expectedI8, expectedStr, expectedDbl}, - []string{"i8", "str", "dbl"}, - []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}) - s.NoError(err) - defer expected.Release() - - s.Equal(expected.DataType().String(), s.actual.DataType().String()) - s.assertArraysEqual(expected, s.actual) -} - -func (s *UnionBuilderSuite) TestSparseUnionStructWithUnion() { - bldr := array.NewStructBuilder(s.mem, arrow.StructOf(arrow.Field{Name: "u", Type: arrow.SparseUnionFromArrays(nil, nil, nil)})) - defer bldr.Release() - - unionBldr := bldr.FieldBuilder(0).(array.UnionBuilder) - int32Bldr := array.NewInt32Builder(s.mem) - defer int32Bldr.Release() - - s.EqualValues(0, unionBldr.AppendChild(int32Bldr, "i")) - expectedType := arrow.StructOf(arrow.Field{Name: "u", - Type: arrow.SparseUnionOf([]arrow.Field{{Name: "i", Type: arrow.PrimitiveTypes.Int32, Nullable: true}}, []arrow.UnionTypeCode{0})}) - s.Truef(arrow.TypeEqual(expectedType, bldr.Type()), "expected: %s, got: %s", expectedType, bldr.Type()) -} - -func ExampleSparseUnionBuilder() { - dt1 := arrow.SparseUnionOf([]arrow.Field{ - {Name: "c", Type: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint16, ValueType: arrow.BinaryTypes.String}}, - }, []arrow.UnionTypeCode{0}) - dt2 := arrow.StructOf(arrow.Field{Name: "a", Type: dt1}) - - pool := memory.DefaultAllocator - bldr := array.NewStructBuilder(pool, dt2) - defer bldr.Release() - - bldrDt1 := bldr.FieldBuilder(0).(*array.SparseUnionBuilder) - binDictBldr := bldrDt1.Child(0).(*array.BinaryDictionaryBuilder) - - bldr.Append(true) - bldrDt1.Append(0) - binDictBldr.AppendString("foo") - - bldr.Append(true) - bldrDt1.Append(0) - binDictBldr.AppendString("bar") - - out := bldr.NewArray().(*array.Struct) - defer out.Release() - - fmt.Println(out) - - // Output: - // {[{c=foo} {c=bar}]} -} - -func TestUnions(t *testing.T) { - suite.Run(t, new(UnionFactorySuite)) - suite.Run(t, new(UnionBuilderSuite)) -} - -func TestNestedUnionStructDict(t *testing.T) { - // ARROW-18274 - dt1 := arrow.SparseUnionOf([]arrow.Field{ - {Name: "c", Type: &arrow.DictionaryType{ - IndexType: arrow.PrimitiveTypes.Uint16, - ValueType: arrow.BinaryTypes.String, - Ordered: false, - }}, - }, []arrow.UnionTypeCode{0}) - dt2 := arrow.StructOf( - arrow.Field{Name: "b", Type: dt1}, - ) - dt3 := arrow.SparseUnionOf([]arrow.Field{ - {Name: "a", Type: dt2}, - }, []arrow.UnionTypeCode{0}) - pool := memory.NewGoAllocator() - - builder := array.NewSparseUnionBuilder(pool, dt3) - defer builder.Release() - arr := builder.NewArray() - defer arr.Release() - assert.Equal(t, 0, arr.Len()) -} - -func TestNestedUnionDictUnion(t *testing.T) { - dt1 := arrow.SparseUnionOf([]arrow.Field{ - {Name: "c", Type: &arrow.DictionaryType{ - IndexType: arrow.PrimitiveTypes.Uint16, - ValueType: arrow.BinaryTypes.String, - Ordered: false, - }}, - }, []arrow.UnionTypeCode{0}) - dt2 := arrow.SparseUnionOf([]arrow.Field{ - {Name: "a", Type: dt1}, - }, []arrow.UnionTypeCode{0}) - pool := memory.NewGoAllocator() - - builder := array.NewSparseUnionBuilder(pool, dt2) - defer builder.Release() - arr := builder.NewArray() - defer arr.Release() - assert.Equal(t, 0, arr.Len()) -} diff --git a/go/arrow/array/util.go b/go/arrow/array/util.go deleted file mode 100644 index 2b41dadaf4bfc..0000000000000 --- a/go/arrow/array/util.go +++ /dev/null @@ -1,523 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "errors" - "fmt" - "io" - "strings" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/hashing" - "github.com/apache/arrow/go/v18/internal/json" -) - -func min(a, b int) int { - if a < b { - return a - } - return b -} - -type fromJSONCfg struct { - multiDocument bool - startOffset int64 - useNumber bool -} - -type FromJSONOption func(*fromJSONCfg) - -func WithMultipleDocs() FromJSONOption { - return func(c *fromJSONCfg) { - c.multiDocument = true - } -} - -// WithStartOffset attempts to start decoding from the reader at the offset -// passed in. If using this option the reader must fulfill the io.ReadSeeker -// interface, or else an error will be returned. -// -// It will call Seek(off, io.SeekStart) on the reader -func WithStartOffset(off int64) FromJSONOption { - return func(c *fromJSONCfg) { - c.startOffset = off - } -} - -// WithUseNumber enables the 'UseNumber' option on the json decoder, using -// the json.Number type instead of assuming float64 for numbers. This is critical -// if you have numbers that are larger than what can fit into the 53 bits of -// an IEEE float64 mantissa and want to preserve its value. -func WithUseNumber() FromJSONOption { - return func(c *fromJSONCfg) { - c.useNumber = true - } -} - -// FromJSON creates an arrow.Array from a corresponding JSON stream and defined data type. If the types in the -// json do not match the type provided, it will return errors. This is *not* the integration test format -// and should not be used as such. This intended to be used by consumers more similarly to the current exposing of -// the csv reader/writer. It also returns the input offset in the reader where it finished decoding since buffering -// by the decoder could leave the reader's cursor past where the parsing finished if attempting to parse multiple json -// arrays from one stream. -// -// All the Array types implement json.Marshaller and thus can be written to json -// using the json.Marshal function -// -// The JSON provided must be formatted in one of two ways: -// -// Default: the top level of the json must be a list which matches the type specified exactly -// Example: `[1, 2, 3, 4, 5]` for any integer type or `[[...], null, [], .....]` for a List type -// Struct arrays are represented a list of objects: `[{"foo": 1, "bar": "moo"}, {"foo": 5, "bar": "baz"}]` -// -// Using WithMultipleDocs: -// If the JSON provided is multiple newline separated json documents, then use this option -// and each json document will be treated as a single row of the array. This is most useful for record batches -// and interacting with other processes that use json. For example: -// `{"col1": 1, "col2": "row1", "col3": ...}\n{"col1": 2, "col2": "row2", "col3": ...}\n.....` -// -// Duration values get formated upon marshalling as a string consisting of their numeric -// value followed by the unit suffix such as "10s" for a value of 10 and unit of Seconds. -// with "ms" for millisecond, "us" for microsecond, and "ns" for nanosecond as the suffixes. -// Unmarshalling duration values is more permissive since it first tries to use Go's -// time.ParseDuration function which means it allows values in the form 3h25m0.3s in addition -// to the same values which are output. -// -// Interval types are marshalled / unmarshalled as follows: -// -// MonthInterval is marshalled as an object with the format: -// { "months": #} -// DayTimeInterval is marshalled using Go's regular marshalling of structs: -// { "days": #, "milliseconds": # } -// MonthDayNanoInterval values are marshalled the same as DayTime using Go's struct marshalling: -// { "months": #, "days": #, "nanoseconds": # } -// -// Times use a format of HH:MM or HH:MM:SS[.zzz] where the fractions of a second cannot -// exceed the precision allowed by the time unit, otherwise unmarshalling will error. -// -// # Dates use YYYY-MM-DD format -// -// Timestamps use RFC3339Nano format except without a timezone, all of the following are valid: -// -// YYYY-MM-DD -// YYYY-MM-DD[T]HH -// YYYY-MM-DD[T]HH:MM -// YYYY-MM-DD[T]HH:MM:SS[.zzzzzzzzzz] -// -// The fractions of a second cannot exceed the precision allowed by the timeunit of the datatype. -// -// When processing structs as objects order of keys does not matter, but keys cannot be repeated. -func FromJSON(mem memory.Allocator, dt arrow.DataType, r io.Reader, opts ...FromJSONOption) (arr arrow.Array, offset int64, err error) { - var cfg fromJSONCfg - for _, o := range opts { - o(&cfg) - } - - if cfg.startOffset != 0 { - seeker, ok := r.(io.ReadSeeker) - if !ok { - return nil, 0, errors.New("using StartOffset option requires reader to be a ReadSeeker, cannot seek") - } - - seeker.Seek(cfg.startOffset, io.SeekStart) - } - - bldr := NewBuilder(mem, dt) - defer bldr.Release() - - dec := json.NewDecoder(r) - defer func() { - if errors.Is(err, io.EOF) { - err = fmt.Errorf("failed parsing json: %w", io.ErrUnexpectedEOF) - } - }() - - if cfg.useNumber { - dec.UseNumber() - } - - if !cfg.multiDocument { - t, err := dec.Token() - if err != nil { - return nil, dec.InputOffset(), err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return nil, dec.InputOffset(), fmt.Errorf("json doc must be an array, found %s", delim) - } - } - - if err = bldr.Unmarshal(dec); err != nil { - return nil, dec.InputOffset(), err - } - - if !cfg.multiDocument { - // consume the last ']' - if _, err = dec.Token(); err != nil { - return nil, dec.InputOffset(), err - } - } - - return bldr.NewArray(), dec.InputOffset(), nil -} - -// RecordToStructArray constructs a struct array from the columns of the record batch -// by referencing them, zero-copy. -func RecordToStructArray(rec arrow.Record) *Struct { - cols := make([]arrow.ArrayData, rec.NumCols()) - for i, c := range rec.Columns() { - cols[i] = c.Data() - } - - data := NewData(arrow.StructOf(rec.Schema().Fields()...), int(rec.NumRows()), []*memory.Buffer{nil}, cols, 0, 0) - defer data.Release() - - return NewStructData(data) -} - -// RecordFromStructArray is a convenience function for converting a struct array into -// a record batch without copying the data. If the passed in schema is nil, the fields -// of the struct will be used to define the record batch. Otherwise the passed in -// schema will be used to create the record batch. If passed in, the schema must match -// the fields of the struct column. -func RecordFromStructArray(in *Struct, schema *arrow.Schema) arrow.Record { - if schema == nil { - schema = arrow.NewSchema(in.DataType().(*arrow.StructType).Fields(), nil) - } - - return NewRecord(schema, in.fields, int64(in.Len())) -} - -// RecordFromJSON creates a record batch from JSON data. See array.FromJSON for the details -// of formatting and logic. -// -// A record batch from JSON is equivalent to reading a struct array in from json and then -// converting it to a record batch. -func RecordFromJSON(mem memory.Allocator, schema *arrow.Schema, r io.Reader, opts ...FromJSONOption) (arrow.Record, int64, error) { - st := arrow.StructOf(schema.Fields()...) - arr, off, err := FromJSON(mem, st, r, opts...) - if err != nil { - return nil, off, err - } - defer arr.Release() - - return RecordFromStructArray(arr.(*Struct), schema), off, nil -} - -// RecordToJSON writes out the given record following the format of each row is a single object -// on a single line of the output. -func RecordToJSON(rec arrow.Record, w io.Writer) error { - enc := json.NewEncoder(w) - - fields := rec.Schema().Fields() - - cols := make(map[string]interface{}) - for i := 0; int64(i) < rec.NumRows(); i++ { - for j, c := range rec.Columns() { - cols[fields[j].Name] = c.GetOneForMarshal(i) - } - if err := enc.Encode(cols); err != nil { - return err - } - } - return nil -} - -func TableFromJSON(mem memory.Allocator, sc *arrow.Schema, recJSON []string, opt ...FromJSONOption) (arrow.Table, error) { - batches := make([]arrow.Record, len(recJSON)) - for i, batchJSON := range recJSON { - batch, _, err := RecordFromJSON(mem, sc, strings.NewReader(batchJSON), opt...) - if err != nil { - return nil, err - } - defer batch.Release() - batches[i] = batch - } - return NewTableFromRecords(sc, batches), nil -} - -func GetDictArrayData(mem memory.Allocator, valueType arrow.DataType, memoTable hashing.MemoTable, startOffset int) (*Data, error) { - dictLen := memoTable.Size() - startOffset - buffers := []*memory.Buffer{nil, nil} - - buffers[1] = memory.NewResizableBuffer(mem) - defer buffers[1].Release() - - switch tbl := memoTable.(type) { - case hashing.NumericMemoTable: - nbytes := tbl.TypeTraits().BytesRequired(dictLen) - buffers[1].Resize(nbytes) - tbl.WriteOutSubset(startOffset, buffers[1].Bytes()) - case *hashing.BinaryMemoTable: - switch valueType.ID() { - case arrow.BINARY, arrow.STRING: - buffers = append(buffers, memory.NewResizableBuffer(mem)) - defer buffers[2].Release() - - buffers[1].Resize(arrow.Int32Traits.BytesRequired(dictLen + 1)) - offsets := arrow.Int32Traits.CastFromBytes(buffers[1].Bytes()) - tbl.CopyOffsetsSubset(startOffset, offsets) - - valuesz := offsets[len(offsets)-1] - offsets[0] - buffers[2].Resize(int(valuesz)) - tbl.CopyValuesSubset(startOffset, buffers[2].Bytes()) - case arrow.LARGE_BINARY, arrow.LARGE_STRING: - buffers = append(buffers, memory.NewResizableBuffer(mem)) - defer buffers[2].Release() - - buffers[1].Resize(arrow.Int64Traits.BytesRequired(dictLen + 1)) - offsets := arrow.Int64Traits.CastFromBytes(buffers[1].Bytes()) - tbl.CopyLargeOffsetsSubset(startOffset, offsets) - - valuesz := offsets[len(offsets)-1] - offsets[0] - buffers[2].Resize(int(valuesz)) - tbl.CopyValuesSubset(startOffset, buffers[2].Bytes()) - default: // fixed size - bw := int(bitutil.BytesForBits(int64(valueType.(arrow.FixedWidthDataType).BitWidth()))) - buffers[1].Resize(dictLen * bw) - tbl.CopyFixedWidthValues(startOffset, bw, buffers[1].Bytes()) - } - default: - return nil, fmt.Errorf("arrow/array: dictionary unifier unimplemented type: %s", valueType) - } - - var nullcount int - if idx, ok := memoTable.GetNull(); ok && idx >= startOffset { - buffers[0] = memory.NewResizableBuffer(mem) - defer buffers[0].Release() - nullcount = 1 - buffers[0].Resize(int(bitutil.BytesForBits(int64(dictLen)))) - memory.Set(buffers[0].Bytes(), 0xFF) - bitutil.ClearBit(buffers[0].Bytes(), idx) - } - - return NewData(valueType, dictLen, buffers, nil, nullcount, 0), nil -} - -func DictArrayFromJSON(mem memory.Allocator, dt *arrow.DictionaryType, indicesJSON, dictJSON string) (arrow.Array, error) { - indices, _, err := FromJSON(mem, dt.IndexType, strings.NewReader(indicesJSON)) - if err != nil { - return nil, err - } - defer indices.Release() - - dict, _, err := FromJSON(mem, dt.ValueType, strings.NewReader(dictJSON)) - if err != nil { - return nil, err - } - defer dict.Release() - - return NewDictionaryArray(dt, indices, dict), nil -} - -func ChunkedFromJSON(mem memory.Allocator, dt arrow.DataType, chunkStrs []string, opts ...FromJSONOption) (*arrow.Chunked, error) { - chunks := make([]arrow.Array, len(chunkStrs)) - defer func() { - for _, c := range chunks { - if c != nil { - c.Release() - } - } - }() - - var err error - for i, c := range chunkStrs { - chunks[i], _, err = FromJSON(mem, dt, strings.NewReader(c), opts...) - if err != nil { - return nil, err - } - } - - return arrow.NewChunked(dt, chunks), nil -} - -func getMaxBufferLen(dt arrow.DataType, length int) int { - bufferLen := int(bitutil.BytesForBits(int64(length))) - - maxOf := func(bl int) int { - if bl > bufferLen { - return bl - } - return bufferLen - } - - switch dt := dt.(type) { - case *arrow.DictionaryType: - bufferLen = maxOf(getMaxBufferLen(dt.ValueType, length)) - return maxOf(getMaxBufferLen(dt.IndexType, length)) - case *arrow.FixedSizeBinaryType: - return maxOf(dt.ByteWidth * length) - case arrow.FixedWidthDataType: - return maxOf(int(bitutil.BytesForBits(int64(dt.BitWidth()))) * length) - case *arrow.StructType: - for _, f := range dt.Fields() { - bufferLen = maxOf(getMaxBufferLen(f.Type, length)) - } - return bufferLen - case *arrow.SparseUnionType: - // type codes - bufferLen = maxOf(length) - // creates children of the same length of the union - for _, f := range dt.Fields() { - bufferLen = maxOf(getMaxBufferLen(f.Type, length)) - } - return bufferLen - case *arrow.DenseUnionType: - // type codes - bufferLen = maxOf(length) - // offsets - bufferLen = maxOf(arrow.Int32SizeBytes * length) - // create children of length 1 - for _, f := range dt.Fields() { - bufferLen = maxOf(getMaxBufferLen(f.Type, 1)) - } - return bufferLen - case arrow.OffsetsDataType: - return maxOf(dt.OffsetTypeTraits().BytesRequired(length + 1)) - case *arrow.FixedSizeListType: - return maxOf(getMaxBufferLen(dt.Elem(), int(dt.Len())*length)) - case arrow.ExtensionType: - return maxOf(getMaxBufferLen(dt.StorageType(), length)) - default: - panic(fmt.Errorf("arrow/array: arrayofnull not implemented for type %s", dt)) - } -} - -type nullArrayFactory struct { - mem memory.Allocator - dt arrow.DataType - len int - buf *memory.Buffer -} - -func (n *nullArrayFactory) create() *Data { - if n.buf == nil { - bufLen := getMaxBufferLen(n.dt, n.len) - n.buf = memory.NewResizableBuffer(n.mem) - n.buf.Resize(bufLen) - defer n.buf.Release() - } - - var ( - dt = n.dt - bufs = []*memory.Buffer{memory.SliceBuffer(n.buf, 0, int(bitutil.BytesForBits(int64(n.len))))} - childData []arrow.ArrayData - dictData arrow.ArrayData - ) - defer bufs[0].Release() - - if ex, ok := dt.(arrow.ExtensionType); ok { - dt = ex.StorageType() - } - - if nf, ok := dt.(arrow.NestedType); ok { - childData = make([]arrow.ArrayData, nf.NumFields()) - } - - switch dt := dt.(type) { - case *arrow.NullType: - case *arrow.DictionaryType: - bufs = append(bufs, n.buf) - arr := MakeArrayOfNull(n.mem, dt.ValueType, 0) - defer arr.Release() - dictData = arr.Data() - case arrow.FixedWidthDataType: - bufs = append(bufs, n.buf) - case arrow.BinaryDataType: - bufs = append(bufs, n.buf, n.buf) - case arrow.OffsetsDataType: - bufs = append(bufs, n.buf) - childData[0] = n.createChild(dt, 0, 0) - defer childData[0].Release() - case *arrow.FixedSizeListType: - childData[0] = n.createChild(dt, 0, n.len*int(dt.Len())) - defer childData[0].Release() - case *arrow.StructType: - for i := range dt.Fields() { - childData[i] = n.createChild(dt, i, n.len) - defer childData[i].Release() - } - case *arrow.RunEndEncodedType: - bldr := NewBuilder(n.mem, dt.RunEnds()) - defer bldr.Release() - - switch b := bldr.(type) { - case *Int16Builder: - b.Append(int16(n.len)) - case *Int32Builder: - b.Append(int32(n.len)) - case *Int64Builder: - b.Append(int64(n.len)) - } - - childData[0] = bldr.newData() - defer childData[0].Release() - childData[1] = n.createChild(dt.Encoded(), 1, 1) - defer childData[1].Release() - case arrow.UnionType: - bufs[0].Release() - bufs[0] = nil - bufs = append(bufs, n.buf) - // buffer is zeroed, but 0 may not be a valid type code - if dt.TypeCodes()[0] != 0 { - bufs[1] = memory.NewResizableBuffer(n.mem) - bufs[1].Resize(n.len) - defer bufs[1].Release() - memory.Set(bufs[1].Bytes(), byte(dt.TypeCodes()[0])) - } - - // for sparse unions we create children with the same length - childLen := n.len - if dt.Mode() == arrow.DenseMode { - // for dense unions, offsets are all 0 and make children - // with length 1 - bufs = append(bufs, n.buf) - childLen = 1 - } - for i := range dt.Fields() { - childData[i] = n.createChild(dt, i, childLen) - defer childData[i].Release() - } - } - - out := NewData(n.dt, n.len, bufs, childData, n.len, 0) - if dictData != nil { - out.SetDictionary(dictData) - } - return out -} - -func (n *nullArrayFactory) createChild(dt arrow.DataType, i, length int) *Data { - childFactory := &nullArrayFactory{ - mem: n.mem, dt: n.dt.(arrow.NestedType).Fields()[i].Type, - len: length, buf: n.buf} - return childFactory.create() -} - -// MakeArrayOfNull creates an array of size length which is all null of the given data type. -func MakeArrayOfNull(mem memory.Allocator, dt arrow.DataType, length int) arrow.Array { - if dt.ID() == arrow.NULL { - return NewNull(length) - } - - data := (&nullArrayFactory{mem: mem, dt: dt, len: length}).create() - defer data.Release() - return MakeFromData(data) -} diff --git a/go/arrow/array/util_test.go b/go/arrow/array/util_test.go deleted file mode 100644 index 114ea6e546649..0000000000000 --- a/go/arrow/array/util_test.go +++ /dev/null @@ -1,545 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "bufio" - "bytes" - "fmt" - "io" - "reflect" - "strings" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/decimal256" - "github.com/apache/arrow/go/v18/arrow/internal/arrdata" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -var typemap = map[arrow.DataType]reflect.Type{ - arrow.PrimitiveTypes.Int8: reflect.TypeOf(int8(0)), - arrow.PrimitiveTypes.Uint8: reflect.TypeOf(uint8(0)), - arrow.PrimitiveTypes.Int16: reflect.TypeOf(int16(0)), - arrow.PrimitiveTypes.Uint16: reflect.TypeOf(uint16(0)), - arrow.PrimitiveTypes.Int32: reflect.TypeOf(int32(0)), - arrow.PrimitiveTypes.Uint32: reflect.TypeOf(uint32(0)), - arrow.PrimitiveTypes.Int64: reflect.TypeOf(int64(0)), - arrow.PrimitiveTypes.Uint64: reflect.TypeOf(uint64(0)), -} - -func TestIntegerArrsJSON(t *testing.T) { - const N = 10 - types := []arrow.DataType{ - arrow.PrimitiveTypes.Int8, - arrow.PrimitiveTypes.Uint8, - arrow.PrimitiveTypes.Int16, - arrow.PrimitiveTypes.Uint16, - arrow.PrimitiveTypes.Int32, - arrow.PrimitiveTypes.Uint32, - arrow.PrimitiveTypes.Int64, - arrow.PrimitiveTypes.Uint64, - } - - for _, tt := range types { - t.Run(fmt.Sprint(tt), func(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - jsontest := make([]int, N) - vals := reflect.MakeSlice(reflect.SliceOf(typemap[tt]), N, N) - for i := 0; i < N; i++ { - vals.Index(i).Set(reflect.ValueOf(i).Convert(typemap[tt])) - jsontest[i] = i - } - - data, _ := json.Marshal(jsontest) - arr, _, err := array.FromJSON(mem, tt, bytes.NewReader(data)) - assert.NoError(t, err) - defer arr.Release() - - assert.EqualValues(t, N, arr.Len()) - assert.Zero(t, arr.NullN()) - - output, err := json.Marshal(arr) - assert.NoError(t, err) - assert.JSONEq(t, string(data), string(output)) - }) - t.Run(fmt.Sprint(tt)+" errors", func(t *testing.T) { - _, _, err := array.FromJSON(memory.DefaultAllocator, tt, strings.NewReader("")) - assert.Error(t, err) - - _, _, err = array.FromJSON(memory.DefaultAllocator, tt, strings.NewReader("[")) - assert.ErrorIs(t, err, io.ErrUnexpectedEOF) - - _, _, err = array.FromJSON(memory.DefaultAllocator, tt, strings.NewReader("0")) - assert.Error(t, err) - - _, _, err = array.FromJSON(memory.DefaultAllocator, tt, strings.NewReader("{}")) - assert.Error(t, err) - - _, _, err = array.FromJSON(memory.DefaultAllocator, tt, strings.NewReader("[[0]]")) - assert.EqualError(t, err, "json: cannot unmarshal [ into Go value of type "+tt.Name()) - }) - } -} - -func TestStringsJSON(t *testing.T) { - tests := []struct { - jsonstring string - values []string - valids []bool - }{ - {"[]", []string{}, []bool{}}, - {`["", "foo"]`, []string{"", "foo"}, nil}, - {`["", null]`, []string{"", ""}, []bool{true, false}}, - // NUL character in string - {`["", "some\u0000char"]`, []string{"", "some\x00char"}, nil}, - // utf8 sequence in string - {"[\"\xc3\xa9\"]", []string{"\xc3\xa9"}, nil}, - // bytes < 0x20 can be represented as JSON unicode escapes - {`["\u0000\u001f"]`, []string{"\x00\x1f"}, nil}, - } - - for _, tt := range tests { - t.Run("json "+tt.jsonstring, func(t *testing.T) { - bldr := array.NewStringBuilder(memory.DefaultAllocator) - defer bldr.Release() - - bldr.AppendValues(tt.values, tt.valids) - expected := bldr.NewStringArray() - defer expected.Release() - - arr, _, err := array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.String, strings.NewReader(tt.jsonstring)) - assert.NoError(t, err) - defer arr.Release() - - assert.Truef(t, array.Equal(expected, arr), "expected: %s\ngot: %s\n", expected, arr) - - data, err := json.Marshal(arr) - assert.NoError(t, err) - assert.JSONEq(t, tt.jsonstring, string(data)) - }) - } - - for _, tt := range tests { - t.Run("large json "+tt.jsonstring, func(t *testing.T) { - bldr := array.NewLargeStringBuilder(memory.DefaultAllocator) - defer bldr.Release() - - bldr.AppendValues(tt.values, tt.valids) - expected := bldr.NewLargeStringArray() - defer expected.Release() - - arr, _, err := array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.LargeString, strings.NewReader(tt.jsonstring)) - assert.NoError(t, err) - defer arr.Release() - - assert.Truef(t, array.Equal(expected, arr), "expected: %s\ngot: %s\n", expected, arr) - - data, err := json.Marshal(arr) - assert.NoError(t, err) - assert.JSONEq(t, tt.jsonstring, string(data)) - }) - } - - t.Run("errors", func(t *testing.T) { - _, _, err := array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.String, strings.NewReader("[0]")) - assert.Error(t, err) - - _, _, err = array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.String, strings.NewReader("[[]]")) - assert.Error(t, err) - }) -} - -func TestStructArrayFromJSON(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - jsonStr := `[{"hello": 3.5, "world": true, "yo": "foo"},{"hello": 3.25, "world": false, "yo": "bar"}]` - - arr, _, err := array.FromJSON(mem, arrow.StructOf( - arrow.Field{Name: "hello", Type: arrow.PrimitiveTypes.Float64}, - arrow.Field{Name: "world", Type: arrow.FixedWidthTypes.Boolean}, - arrow.Field{Name: "yo", Type: arrow.BinaryTypes.String}, - ), strings.NewReader(jsonStr)) - assert.NoError(t, err) - defer arr.Release() - - output, err := json.Marshal(arr) - assert.NoError(t, err) - assert.JSONEq(t, jsonStr, string(output)) -} - -func TestArrayFromJSONMulti(t *testing.T) { - arr, _, err := array.FromJSON(memory.DefaultAllocator, arrow.StructOf( - arrow.Field{Name: "hello", Type: arrow.PrimitiveTypes.Float64}, - arrow.Field{Name: "world", Type: arrow.FixedWidthTypes.Boolean}, - arrow.Field{Name: "yo", Type: arrow.BinaryTypes.String}, - ), strings.NewReader("{\"hello\": 3.5, \"world\": true, \"yo\": \"foo\"}\n{\"hello\": 3.25, \"world\": false, \"yo\": \"bar\"}\n"), - array.WithMultipleDocs()) - assert.NoError(t, err) - defer arr.Release() - - assert.EqualValues(t, 2, arr.Len()) - assert.Zero(t, arr.NullN()) -} - -func TestNestedJSONArrs(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - jsonStr := `[{"hello": 1.5, "world": [1, 2, 3, 4], "yo": [{"foo": "2005-05-06", "bar": "15:02:04.123"},{"foo": "1956-01-02", "bar": "02:10:00"}]}]` - - arr, _, err := array.FromJSON(mem, arrow.StructOf( - arrow.Field{Name: "hello", Type: arrow.PrimitiveTypes.Float64}, - arrow.Field{Name: "world", Type: arrow.ListOf(arrow.PrimitiveTypes.Int32)}, - arrow.Field{Name: "yo", Type: arrow.FixedSizeListOf(2, arrow.StructOf( - arrow.Field{Name: "foo", Type: arrow.FixedWidthTypes.Date32}, - arrow.Field{Name: "bar", Type: arrow.FixedWidthTypes.Time32ms}, - ))}, - ), strings.NewReader(jsonStr)) - assert.NoError(t, err) - defer arr.Release() - - v, err := json.Marshal(arr) - assert.NoError(t, err) - assert.JSONEq(t, jsonStr, string(v)) -} - -func TestGetNullsFromJSON(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - jsonStr := `[ - {"yo": "thing", "arr": null, "nuf": {"ps": "今日は"}}, - {"yo": null, "nuf": {"ps": null}, "arr": []}, - { "nuf": null, "yo": "今日は", "arr": [1,2,3]} - ]` - - rec, _, err := array.RecordFromJSON(mem, arrow.NewSchema([]arrow.Field{ - {Name: "yo", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "arr", Type: arrow.ListOf(arrow.PrimitiveTypes.Int32), Nullable: true}, - {Name: "nuf", Type: arrow.StructOf(arrow.Field{Name: "ps", Type: arrow.BinaryTypes.String, Nullable: true}), Nullable: true}, - }, nil), strings.NewReader(jsonStr)) - assert.NoError(t, err) - defer rec.Release() - - assert.EqualValues(t, 3, rec.NumCols()) - assert.EqualValues(t, 3, rec.NumRows()) - - data, err := json.Marshal(rec) - assert.NoError(t, err) - assert.JSONEq(t, jsonStr, string(data)) -} - -func TestDurationsJSON(t *testing.T) { - tests := []struct { - unit arrow.TimeUnit - jsonstr string - values []arrow.Duration - }{ - {arrow.Second, `["1s", "2s", "3s", "4s", "5s"]`, []arrow.Duration{1, 2, 3, 4, 5}}, - {arrow.Millisecond, `["1ms", "2ms", "3ms", "4ms", "5ms"]`, []arrow.Duration{1, 2, 3, 4, 5}}, - {arrow.Microsecond, `["1us", "2us", "3us", "4us", "5us"]`, []arrow.Duration{1, 2, 3, 4, 5}}, - {arrow.Nanosecond, `["1ns", "2ns", "3ns", "4ns", "5ns"]`, []arrow.Duration{1, 2, 3, 4, 5}}, - } - for _, tt := range tests { - dtype := &arrow.DurationType{Unit: tt.unit} - bldr := array.NewDurationBuilder(memory.DefaultAllocator, dtype) - defer bldr.Release() - - bldr.AppendValues(tt.values, nil) - expected := bldr.NewArray() - defer expected.Release() - - arr, _, err := array.FromJSON(memory.DefaultAllocator, dtype, strings.NewReader(tt.jsonstr)) - assert.NoError(t, err) - defer arr.Release() - - assert.Truef(t, array.Equal(expected, arr), "expected: %s\ngot: %s\n", expected, arr) - } -} - -func TestTimestampsJSON(t *testing.T) { - tests := []struct { - unit arrow.TimeUnit - jsonstr string - values []arrow.Timestamp - }{ - {arrow.Second, `["1970-01-01", "2000-02-29", "3989-07-14", "1900-02-28"]`, []arrow.Timestamp{0, 951782400, 63730281600, -2203977600}}, - {arrow.Nanosecond, `["1970-01-01", "2000-02-29", "1900-02-28"]`, []arrow.Timestamp{0, 951782400000000000, -2203977600000000000}}, - } - - for _, tt := range tests { - dtype := &arrow.TimestampType{Unit: tt.unit} - bldr := array.NewTimestampBuilder(memory.DefaultAllocator, dtype) - defer bldr.Release() - - bldr.AppendValues(tt.values, nil) - expected := bldr.NewArray() - defer expected.Release() - - arr, _, err := array.FromJSON(memory.DefaultAllocator, dtype, strings.NewReader(tt.jsonstr)) - assert.NoError(t, err) - defer arr.Release() - - assert.Truef(t, array.Equal(expected, arr), "expected: %s\ngot: %s\n", expected, arr) - } -} - -func TestDateJSON(t *testing.T) { - t.Run("date32", func(t *testing.T) { - bldr := array.NewDate32Builder(memory.DefaultAllocator) - defer bldr.Release() - - jsonstr := `["1970-01-06", null, "1970-02-12", 0]` - jsonExp := `["1970-01-06", null, "1970-02-12", "1970-01-01"]` - - bldr.AppendValues([]arrow.Date32{5, 0, 42, 0}, []bool{true, false, true, true}) - expected := bldr.NewArray() - defer expected.Release() - - arr, _, err := array.FromJSON(memory.DefaultAllocator, arrow.FixedWidthTypes.Date32, strings.NewReader(jsonstr)) - assert.NoError(t, err) - defer arr.Release() - - assert.Truef(t, array.Equal(expected, arr), "expected: %s\ngot: %s\n", expected, arr) - - data, err := json.Marshal(arr) - assert.NoError(t, err) - assert.JSONEq(t, jsonExp, string(data)) - }) - t.Run("date64", func(t *testing.T) { - bldr := array.NewDate64Builder(memory.DefaultAllocator) - defer bldr.Release() - - jsonstr := `["1970-01-02", null, "2286-11-20", 86400000]` - jsonExp := `["1970-01-02", null, "2286-11-20", "1970-01-02"]` - - bldr.AppendValues([]arrow.Date64{86400000, 0, 9999936000000, 86400000}, []bool{true, false, true, true}) - expected := bldr.NewArray() - defer expected.Release() - - arr, _, err := array.FromJSON(memory.DefaultAllocator, arrow.FixedWidthTypes.Date64, strings.NewReader(jsonstr)) - assert.NoError(t, err) - defer arr.Release() - - assert.Truef(t, array.Equal(expected, arr), "expected: %s\ngot: %s\n", expected, arr) - - data, err := json.Marshal(arr) - assert.NoError(t, err) - assert.JSONEq(t, jsonExp, string(data)) - }) -} - -func TestTimeJSON(t *testing.T) { - tententen := 60*(60*(10)+10) + 10 - tests := []struct { - dt arrow.DataType - jsonstr string - jsonexp string - valueadd int - }{ - {arrow.FixedWidthTypes.Time32s, `[null, "10:10:10", 36610]`, `[null, "10:10:10", "10:10:10"]`, 123}, - {arrow.FixedWidthTypes.Time32ms, `[null, "10:10:10.123", 36610123]`, `[null, "10:10:10.123", "10:10:10.123"]`, 456}, - {arrow.FixedWidthTypes.Time64us, `[null, "10:10:10.123456", 36610123456]`, `[null, "10:10:10.123456", "10:10:10.123456"]`, 789}, - {arrow.FixedWidthTypes.Time64ns, `[null, "10:10:10.123456789", 36610123456789]`, `[null, "10:10:10.123456789", "10:10:10.123456789"]`, 0}, - } - - for _, tt := range tests { - t.Run(fmt.Sprint(tt.dt), func(t *testing.T) { - defer func() { - tententen = 1000*tententen + tt.valueadd - }() - - bldr := array.NewBuilder(memory.DefaultAllocator, tt.dt) - defer bldr.Release() - - switch tt.dt.ID() { - case arrow.TIME32: - bldr.(*array.Time32Builder).AppendValues([]arrow.Time32{0, arrow.Time32(tententen), arrow.Time32(tententen)}, []bool{false, true, true}) - case arrow.TIME64: - bldr.(*array.Time64Builder).AppendValues([]arrow.Time64{0, arrow.Time64(tententen), arrow.Time64(tententen)}, []bool{false, true, true}) - } - - expected := bldr.NewArray() - defer expected.Release() - - arr, _, err := array.FromJSON(memory.DefaultAllocator, tt.dt, strings.NewReader(tt.jsonstr)) - assert.NoError(t, err) - defer arr.Release() - - assert.Truef(t, array.Equal(expected, arr), "expected: %s\ngot: %s\n", expected, arr) - - data, err := json.Marshal(arr) - assert.NoError(t, err) - assert.JSONEq(t, tt.jsonexp, string(data)) - }) - } -} - -func TestDecimal128JSON(t *testing.T) { - dt := &arrow.Decimal128Type{Precision: 10, Scale: 4} - bldr := array.NewDecimal128Builder(memory.DefaultAllocator, dt) - defer bldr.Release() - - bldr.AppendValues([]decimal128.Num{decimal128.FromU64(1234567), {}, decimal128.FromI64(-789000)}, []bool{true, false, true}) - expected := bldr.NewArray() - defer expected.Release() - - arr, _, err := array.FromJSON(memory.DefaultAllocator, dt, strings.NewReader(`["123.4567", null, "-78.9000"]`)) - assert.NoError(t, err) - defer arr.Release() - - assert.Truef(t, array.Equal(expected, arr), "expected: %s\ngot: %s\n", expected, arr) - - data, err := json.Marshal(arr) - assert.NoError(t, err) - assert.JSONEq(t, `["123.4567", null, "-78.9"]`, string(data)) -} - -func TestDecimal256JSON(t *testing.T) { - dt := &arrow.Decimal256Type{Precision: 10, Scale: 4} - bldr := array.NewDecimal256Builder(memory.DefaultAllocator, dt) - defer bldr.Release() - - bldr.AppendValues([]decimal256.Num{decimal256.FromU64(1234567), {}, decimal256.FromI64(-789000)}, []bool{true, false, true}) - expected := bldr.NewArray() - defer expected.Release() - - arr, _, err := array.FromJSON(memory.DefaultAllocator, dt, strings.NewReader(`["123.4567", null, "-78.9000"]`)) - assert.NoError(t, err) - defer arr.Release() - - assert.Truef(t, array.Equal(expected, arr), "expected: %s\ngot: %s\n", expected, arr) - - data, err := json.Marshal(arr) - assert.NoError(t, err) - assert.JSONEq(t, `["123.4567", null, "-78.9"]`, string(data)) -} - -func TestArrRecordsJSONRoundTrip(t *testing.T) { - for k, v := range arrdata.Records { - if k == "decimal128" || k == "decimal256" || k == "fixed_width_types" { - // test these separately since the sample data in the arrdata - // records doesn't lend itself to exactness when going to/from - // json. The fixed_width_types one uses negative values for - // time32 and time64 which correctly get interpreted into times, - // but re-encoding them in json produces the normalized positive - // values instead of re-creating negative ones. - // the decimal128/decimal256 values don't get parsed *exactly* due to fun - // float weirdness due to their size, so smaller tests will work fine. - continue - } - t.Run(k, func(t *testing.T) { - var buf bytes.Buffer - assert.NotPanics(t, func() { - enc := json.NewEncoder(&buf) - for _, r := range v { - if err := enc.Encode(r); err != nil { - panic(err) - } - } - }) - - rdr := bytes.NewReader(buf.Bytes()) - var cur int64 - - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - for _, r := range v { - rec, off, err := array.RecordFromJSON(mem, r.Schema(), rdr, array.WithStartOffset(cur)) - assert.NoError(t, err) - defer rec.Release() - - assert.Truef(t, array.RecordApproxEqual(r, rec), "expected: %s\ngot: %s\n", r, rec) - cur += off - } - }) - } -} - -func TestStructBuilderJSONUnknownNested(t *testing.T) { - dt := arrow.StructOf( - arrow.Field{Name: "region", Type: arrow.BinaryTypes.String}, - arrow.Field{Name: "model", Type: arrow.PrimitiveTypes.Int32}, - arrow.Field{Name: "sales", Type: arrow.PrimitiveTypes.Float32}) - - const data = `[ - {"region": "NY", "model": "3", "sales": 742.0}, - {"region": "CT", "model": "5", "sales": 742.0} - ]` - - const dataWithExtra = `[ - {"region": "NY", "model": "3", "sales": 742.0, "extra": 1234}, - {"region": "CT", "model": "5", "sales": 742.0, "extra_array": [1234], "extra_obj": {"nested": ["deeply"]}} - ]` - - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - arr, _, err := array.FromJSON(mem, dt, strings.NewReader(data)) - require.NoError(t, err) - require.NotNil(t, arr) - defer arr.Release() - - arr2, _, err := array.FromJSON(mem, dt, strings.NewReader(dataWithExtra)) - require.NoError(t, err) - require.NotNil(t, arr2) - defer arr2.Release() - - assert.Truef(t, array.Equal(arr, arr2), "expected: %s\n actual: %s", arr, arr2) -} - -func TestRecordBuilderUnmarshalJSONExtraFields(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - schema := arrow.NewSchema([]arrow.Field{ - {Name: "region", Type: arrow.BinaryTypes.String}, - {Name: "model", Type: arrow.PrimitiveTypes.Int32}, - {Name: "sales", Type: arrow.PrimitiveTypes.Float32}, - }, nil) - - bldr := array.NewRecordBuilder(mem, schema) - defer bldr.Release() - - const data = `{"region": "NY", "model": "3", "sales": 742.0, "extra": 1234} - {"region": "NY", "model": "3", "sales": 742.0, "extra_array": [1234], "extra_obj": {"nested": ["deeply"]}}` - - s := bufio.NewScanner(strings.NewReader(data)) - require.True(t, s.Scan()) - require.NoError(t, bldr.UnmarshalJSON(s.Bytes())) - - rec1 := bldr.NewRecord() - defer rec1.Release() - - require.True(t, s.Scan()) - require.NoError(t, bldr.UnmarshalJSON(s.Bytes())) - - rec2 := bldr.NewRecord() - defer rec2.Release() - - assert.Truef(t, array.RecordEqual(rec1, rec2), "expected: %s\nactual: %s", rec1, rec2) -} diff --git a/go/arrow/arrio/arrio.go b/go/arrow/arrio/arrio.go deleted file mode 100644 index 53215c81f75eb..0000000000000 --- a/go/arrow/arrio/arrio.go +++ /dev/null @@ -1,92 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package arrio exposes functions to manipulate records, exposing and using -// interfaces not unlike the ones defined in the stdlib io package. -package arrio - -import ( - "errors" - "io" - - "github.com/apache/arrow/go/v18/arrow" -) - -// Reader is the interface that wraps the Read method. -type Reader interface { - // Read reads the current record from the underlying stream and an error, if any. - // When the Reader reaches the end of the underlying stream, it returns (nil, io.EOF). - Read() (arrow.Record, error) -} - -// ReaderAt is the interface that wraps the ReadAt method. -type ReaderAt interface { - // ReadAt reads the i-th record from the underlying stream and an error, if any. - ReadAt(i int64) (arrow.Record, error) -} - -// Writer is the interface that wraps the Write method. -type Writer interface { - Write(rec arrow.Record) error -} - -// Copy copies all the records available from src to dst. -// Copy returns the number of records copied and the first error -// encountered while copying, if any. -// -// A successful Copy returns err == nil, not err == EOF. Because Copy is -// defined to read from src until EOF, it does not treat an EOF from Read as an -// error to be reported. -func Copy(dst Writer, src Reader) (n int64, err error) { - for { - rec, err := src.Read() - if err != nil { - if errors.Is(err, io.EOF) { - return n, nil - } - return n, err - } - err = dst.Write(rec) - if err != nil { - return n, err - } - n++ - } -} - -// CopyN copies n records (or until an error) from src to dst. It returns the -// number of records copied and the earliest error encountered while copying. On -// return, written == n if and only if err == nil. -func CopyN(dst Writer, src Reader, n int64) (written int64, err error) { - for ; written < n; written++ { - rec, err := src.Read() - if err != nil { - if errors.Is(err, io.EOF) && written == n { - return written, nil - } - return written, err - } - err = dst.Write(rec) - if err != nil { - return written, err - } - } - - if written != n && err == nil { - err = io.EOF - } - return written, err -} diff --git a/go/arrow/arrio/arrio_test.go b/go/arrow/arrio/arrio_test.go deleted file mode 100644 index 26863ec252bf7..0000000000000 --- a/go/arrow/arrio/arrio_test.go +++ /dev/null @@ -1,197 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package arrio_test - -import ( - "fmt" - "io" - "os" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/arrio" - "github.com/apache/arrow/go/v18/arrow/internal/arrdata" - "github.com/apache/arrow/go/v18/arrow/ipc" - "github.com/apache/arrow/go/v18/arrow/memory" -) - -type copyKind int - -const ( - fileKind copyKind = iota - streamKind -) - -func (k copyKind) write(t *testing.T, f *os.File, mem memory.Allocator, schema *arrow.Schema, recs []arrow.Record) { - t.Helper() - - switch k { - case fileKind: - arrdata.WriteFile(t, f, mem, schema, recs) - case streamKind: - arrdata.WriteStream(t, f, mem, schema, recs) - default: - panic("invalid copyKind") - } -} - -func (k copyKind) check(t *testing.T, f *os.File, mem memory.Allocator, schema *arrow.Schema, recs []arrow.Record) { - t.Helper() - - switch k { - case fileKind: - arrdata.CheckArrowFile(t, f, mem, schema, recs) - case streamKind: - arrdata.CheckArrowStream(t, f, mem, schema, recs) - default: - panic("invalid copyKind") - } -} - -func TestCopy(t *testing.T) { - tempDir := t.TempDir() - - for _, tc := range []struct { - name string - src, dst copyKind - }{ - {name: "file2file", src: fileKind, dst: fileKind}, - {name: "file2stream", src: fileKind, dst: streamKind}, - {name: "stream2file", src: streamKind, dst: fileKind}, - {name: "stream2stream", src: streamKind, dst: streamKind}, - } { - t.Run(tc.name, func(t *testing.T) { - for name, recs := range arrdata.Records { - t.Run(name, func(t *testing.T) { - for _, tcopy := range []struct { - n int - want int - err error - }{ - {-1, len(recs), nil}, - {1, 1, nil}, - {0, 0, nil}, - {len(recs), len(recs), nil}, - {len(recs) + 1, len(recs), io.EOF}, - } { - t.Run(fmt.Sprintf("-copy-n=%d", tcopy.n), func(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - f, err := os.CreateTemp(tempDir, "go-arrow-copy-") - if err != nil { - t.Fatal(err) - } - defer f.Close() - - o, err := os.CreateTemp(tempDir, "go-arrow-copy-") - if err != nil { - t.Fatal(err) - } - defer o.Close() - - tc.src.write(t, f, mem, recs[0].Schema(), recs) - tc.src.check(t, f, mem, recs[0].Schema(), recs) - - _, err = f.Seek(0, io.SeekStart) - if err != nil { - t.Fatal(err) - } - - var r arrio.Reader - switch tc.src { - case fileKind: - rr, err := ipc.NewFileReader(f, ipc.WithSchema(recs[0].Schema()), ipc.WithAllocator(mem)) - if err != nil { - t.Fatal(err) - } - defer rr.Close() - r = rr - case streamKind: - rr, err := ipc.NewReader(f, ipc.WithSchema(recs[0].Schema()), ipc.WithAllocator(mem)) - if err != nil { - t.Fatal(err) - } - defer rr.Release() - r = rr - default: - t.Fatalf("invalid src type %v", tc.src) - } - - var w interface { - arrio.Writer - io.Closer - } - - switch tc.dst { - case fileKind: - w, err = ipc.NewFileWriter(o, ipc.WithSchema(recs[0].Schema()), ipc.WithAllocator(mem)) - if err != nil { - t.Fatal(err) - } - case streamKind: - w = ipc.NewWriter(o, ipc.WithSchema(recs[0].Schema()), ipc.WithAllocator(mem)) - default: - t.Fatalf("invalid dst type %v", tc.dst) - } - defer w.Close() - - var ( - n int64 - ) - switch tcopy.n { - case -1: - n, err = arrio.Copy(w, r) - case len(recs) + 1: - n, err = arrio.CopyN(w, r, int64(tcopy.n)) - default: - n, err = arrio.CopyN(w, r, int64(tcopy.n)) - } - - switch err { - case nil: - if tcopy.err != nil { - t.Fatalf("got a nil error, want=%v", tcopy.err) - } - default: - switch tcopy.err { - case nil: - t.Fatalf("invalid error: got=%v, want=%v", err, tcopy.err) - default: - if tcopy.err.Error() != err.Error() { - t.Fatalf("invalid error: got=%v, want=%v", err, tcopy.err) - } - } - } - - if got, want := n, int64(tcopy.want); got != want { - t.Fatalf("invalid number of records copied: got=%d, want=%d", got, want) - } - - err = w.Close() - if err != nil { - t.Fatal(err) - } - - tc.dst.check(t, o, mem, recs[0].Schema(), recs[:tcopy.want]) - }) - } - }) - } - }) - } -} diff --git a/go/arrow/avro/avro2parquet/main.go b/go/arrow/avro/avro2parquet/main.go deleted file mode 100644 index ae514c5ed1fda..0000000000000 --- a/go/arrow/avro/avro2parquet/main.go +++ /dev/null @@ -1,119 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "bufio" - "bytes" - "flag" - "fmt" - "log" - "os" - "runtime/pprof" - "time" - - "github.com/apache/arrow/go/v18/arrow/avro" - "github.com/apache/arrow/go/v18/parquet" - "github.com/apache/arrow/go/v18/parquet/compress" - pq "github.com/apache/arrow/go/v18/parquet/pqarrow" -) - -var ( - cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`") - filepath = flag.String("file", "", "avro ocf to convert") -) - -func main() { - flag.Parse() - if *cpuprofile != "" { - f, err := os.Create(*cpuprofile) - if err != nil { - log.Fatal("could not create CPU profile: ", err) - } - defer f.Close() // error handling omitted for example - if err := pprof.StartCPUProfile(f); err != nil { - log.Fatal("could not start CPU profile: ", err) - } - defer pprof.StopCPUProfile() - } - if *filepath == "" { - fmt.Println("no file specified") - } - chunk := 1024 * 8 - ts := time.Now() - log.Println("starting:") - info, err := os.Stat(*filepath) - if err != nil { - fmt.Println(err) - os.Exit(1) - } - filesize := info.Size() - data, err := os.ReadFile(*filepath) - if err != nil { - fmt.Println(err) - os.Exit(2) - } - fmt.Printf("file : %v\nsize: %v MB\n", filepath, float64(filesize)/1024/1024) - - r := bytes.NewReader(data) - ior := bufio.NewReaderSize(r, 4096*8) - av2arReader, err := avro.NewOCFReader(ior, avro.WithChunk(chunk)) - if err != nil { - fmt.Println(err) - os.Exit(3) - } - fp, err := os.OpenFile(*filepath+".parquet", os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o644) - if err != nil { - fmt.Println(err) - os.Exit(4) - } - defer fp.Close() - pwProperties := parquet.NewWriterProperties(parquet.WithDictionaryDefault(true), - parquet.WithVersion(parquet.V2_LATEST), - parquet.WithCompression(compress.Codecs.Snappy), - parquet.WithBatchSize(1024*32), - parquet.WithDataPageSize(1024*1024), - parquet.WithMaxRowGroupLength(64*1024*1024), - ) - awProperties := pq.NewArrowWriterProperties(pq.WithStoreSchema()) - pr, err := pq.NewFileWriter(av2arReader.Schema(), fp, pwProperties, awProperties) - if err != nil { - fmt.Println(err) - os.Exit(5) - } - defer pr.Close() - fmt.Printf("parquet version: %v\n", pwProperties.Version()) - for av2arReader.Next() { - if av2arReader.Err() != nil { - fmt.Println(err) - os.Exit(6) - } - recs := av2arReader.Record() - err = pr.WriteBuffered(recs) - if err != nil { - fmt.Println(err) - os.Exit(7) - } - recs.Release() - } - if av2arReader.Err() != nil { - fmt.Println(av2arReader.Err()) - } - - pr.Close() - log.Printf("time to convert: %v\n", time.Since(ts)) -} diff --git a/go/arrow/avro/loader.go b/go/arrow/avro/loader.go deleted file mode 100644 index 26d8678e8e2be..0000000000000 --- a/go/arrow/avro/loader.go +++ /dev/null @@ -1,85 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package avro - -import ( - "errors" - "fmt" - "io" -) - -func (r *OCFReader) decodeOCFToChan() { - defer close(r.avroChan) - for r.r.HasNext() { - select { - case <-r.readerCtx.Done(): - r.err = fmt.Errorf("avro decoding cancelled, %d records read", r.avroDatumCount) - return - default: - var datum any - err := r.r.Decode(&datum) - if err != nil { - if errors.Is(err, io.EOF) { - r.err = nil - return - } - r.err = err - return - } - r.avroChan <- datum - r.avroDatumCount++ - } - } -} - -func (r *OCFReader) recordFactory() { - defer close(r.recChan) - r.primed = true - recChunk := 0 - switch { - case r.chunk < 1: - for data := range r.avroChan { - err := r.ldr.loadDatum(data) - if err != nil { - r.err = err - return - } - } - r.recChan <- r.bld.NewRecord() - r.bldDone <- struct{}{} - case r.chunk >= 1: - for data := range r.avroChan { - if recChunk == 0 { - r.bld.Reserve(r.chunk) - } - err := r.ldr.loadDatum(data) - if err != nil { - r.err = err - return - } - recChunk++ - if recChunk >= r.chunk { - r.recChan <- r.bld.NewRecord() - recChunk = 0 - } - } - if recChunk != 0 { - r.recChan <- r.bld.NewRecord() - } - r.bldDone <- struct{}{} - } -} diff --git a/go/arrow/avro/reader.go b/go/arrow/avro/reader.go deleted file mode 100644 index 1463041499de2..0000000000000 --- a/go/arrow/avro/reader.go +++ /dev/null @@ -1,337 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package avro - -import ( - "context" - "errors" - "fmt" - "io" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/hamba/avro/v2/ocf" - "github.com/tidwall/sjson" - - avro "github.com/hamba/avro/v2" -) - -var ErrMismatchFields = errors.New("arrow/avro: number of records mismatch") - -// Option configures an Avro reader/writer. -type ( - Option func(config) - config *OCFReader -) - -type schemaEdit struct { - method string - path string - value any -} - -// Reader wraps goavro/OCFReader and creates array.Records from a schema. -type OCFReader struct { - r *ocf.Decoder - avroSchema string - avroSchemaEdits []schemaEdit - schema *arrow.Schema - - refs int64 - bld *array.RecordBuilder - bldMap *fieldPos - ldr *dataLoader - cur arrow.Record - err error - - primed bool - readerCtx context.Context - readCancel func() - maxOCF int - maxRec int - - avroChan chan any - avroDatumCount int64 - avroChanSize int - recChan chan arrow.Record - - bldDone chan struct{} - - recChanSize int - chunk int - mem memory.Allocator -} - -// NewReader returns a reader that reads from an Avro OCF file and creates -// arrow.Records from the converted avro data. -func NewOCFReader(r io.Reader, opts ...Option) (*OCFReader, error) { - ocfr, err := ocf.NewDecoder(r) - if err != nil { - return nil, fmt.Errorf("%w: could not create avro ocfreader", arrow.ErrInvalid) - } - - rr := &OCFReader{ - r: ocfr, - refs: 1, - chunk: 1, - avroChanSize: 500, - recChanSize: 10, - } - for _, opt := range opts { - opt(rr) - } - - rr.avroChan = make(chan any, rr.avroChanSize) - rr.recChan = make(chan arrow.Record, rr.recChanSize) - rr.bldDone = make(chan struct{}) - schema, err := avro.Parse(string(ocfr.Metadata()["avro.schema"])) - if err != nil { - return nil, fmt.Errorf("%w: could not parse avro header", arrow.ErrInvalid) - } - rr.avroSchema = schema.String() - if len(rr.avroSchemaEdits) > 0 { - // execute schema edits - for _, e := range rr.avroSchemaEdits { - err := rr.editAvroSchema(e) - if err != nil { - return nil, fmt.Errorf("%w: could not edit avro schema", arrow.ErrInvalid) - } - } - // validate edited schema - schema, err = avro.Parse(rr.avroSchema) - if err != nil { - return nil, fmt.Errorf("%w: could not parse modified avro schema", arrow.ErrInvalid) - } - } - rr.schema, err = ArrowSchemaFromAvro(schema) - if err != nil { - return nil, fmt.Errorf("%w: could not convert avro schema", arrow.ErrInvalid) - } - if rr.mem == nil { - rr.mem = memory.DefaultAllocator - } - rr.readerCtx, rr.readCancel = context.WithCancel(context.Background()) - go rr.decodeOCFToChan() - - rr.bld = array.NewRecordBuilder(rr.mem, rr.schema) - rr.bldMap = newFieldPos() - rr.ldr = newDataLoader() - for idx, fb := range rr.bld.Fields() { - mapFieldBuilders(fb, rr.schema.Field(idx), rr.bldMap) - } - rr.ldr.drawTree(rr.bldMap) - go rr.recordFactory() - return rr, nil -} - -// Reuse allows the OCFReader to be reused to read another Avro file provided the -// new Avro file has an identical schema. -func (rr *OCFReader) Reuse(r io.Reader, opts ...Option) error { - rr.Close() - rr.err = nil - ocfr, err := ocf.NewDecoder(r) - if err != nil { - return fmt.Errorf("%w: could not create avro ocfreader", arrow.ErrInvalid) - } - schema, err := avro.Parse(string(ocfr.Metadata()["avro.schema"])) - if err != nil { - return fmt.Errorf("%w: could not parse avro header", arrow.ErrInvalid) - } - if rr.avroSchema != schema.String() { - return fmt.Errorf("%w: avro schema mismatch", arrow.ErrInvalid) - } - - rr.r = ocfr - for _, opt := range opts { - opt(rr) - } - - rr.maxOCF = 0 - rr.maxRec = 0 - rr.avroDatumCount = 0 - rr.primed = false - - rr.avroChan = make(chan any, rr.avroChanSize) - rr.recChan = make(chan arrow.Record, rr.recChanSize) - rr.bldDone = make(chan struct{}) - - rr.readerCtx, rr.readCancel = context.WithCancel(context.Background()) - go rr.decodeOCFToChan() - go rr.recordFactory() - return nil -} - -// Err returns the last error encountered during the iteration over the -// underlying Avro file. -func (r *OCFReader) Err() error { return r.err } - -// AvroSchema returns the Avro schema of the Avro OCF -func (r *OCFReader) AvroSchema() string { return r.avroSchema } - -// Schema returns the converted Arrow schema of the Avro OCF -func (r *OCFReader) Schema() *arrow.Schema { return r.schema } - -// Record returns the current record that has been extracted from the -// underlying Avro OCF file. -// It is valid until the next call to Next. -func (r *OCFReader) Record() arrow.Record { return r.cur } - -// Metrics returns the maximum queue depth of the Avro record read cache and of the -// converted Arrow record cache. -func (r *OCFReader) Metrics() string { - return fmt.Sprintf("Max. OCF queue depth: %d/%d Max. record queue depth: %d/%d", r.maxOCF, r.avroChanSize, r.maxRec, r.recChanSize) -} - -// OCFRecordsReadCount returns the number of Avro datum that were read from the Avro file. -func (r *OCFReader) OCFRecordsReadCount() int64 { return r.avroDatumCount } - -// Close closes the OCFReader's Avro record read cache and converted Arrow record cache. OCFReader must -// be closed if the Avro OCF's records have not been read to completion. -func (r *OCFReader) Close() { - r.readCancel() - r.err = r.readerCtx.Err() -} - -func (r *OCFReader) editAvroSchema(e schemaEdit) error { - var err error - switch e.method { - case "set": - r.avroSchema, err = sjson.Set(r.avroSchema, e.path, e.value) - if err != nil { - return fmt.Errorf("%w: schema edit 'set %s = %v' failure - %v", arrow.ErrInvalid, e.path, e.value, err) - } - case "delete": - r.avroSchema, err = sjson.Delete(r.avroSchema, e.path) - if err != nil { - return fmt.Errorf("%w: schema edit 'delete' failure - %v", arrow.ErrInvalid, err) - } - default: - return fmt.Errorf("%w: schema edit method must be 'set' or 'delete'", arrow.ErrInvalid) - } - return nil -} - -// Next returns whether a Record can be received from the converted record queue. -// The user should check Err() after call to Next that return false to check -// if an error took place. -func (r *OCFReader) Next() bool { - if r.cur != nil { - r.cur.Release() - r.cur = nil - } - if r.maxOCF < len(r.avroChan) { - r.maxOCF = len(r.avroChan) - } - if r.maxRec < len(r.recChan) { - r.maxRec = len(r.recChan) - } - select { - case r.cur = <-r.recChan: - case <-r.bldDone: - if len(r.recChan) > 0 { - r.cur = <-r.recChan - } - } - if r.err != nil { - return false - } - - return r.cur != nil -} - -// WithAllocator specifies the Arrow memory allocator used while building records. -func WithAllocator(mem memory.Allocator) Option { - return func(cfg config) { - cfg.mem = mem - } -} - -// WithReadCacheSize specifies the size of the OCF record decode queue, default value -// is 500. -func WithReadCacheSize(n int) Option { - return func(cfg config) { - if n < 1 { - cfg.avroChanSize = 500 - } else { - cfg.avroChanSize = n - } - } -} - -// WithRecordCacheSize specifies the size of the converted Arrow record queue, default -// value is 1. -func WithRecordCacheSize(n int) Option { - return func(cfg config) { - if n < 1 { - cfg.recChanSize = 1 - } else { - cfg.recChanSize = n - } - } -} - -// WithSchemaEdit specifies modifications to the Avro schema. Supported methods are 'set' and -// 'delete'. Set sets the value for the specified path. Delete deletes the value for the specified path. -// A path is in dot syntax, such as "fields.1" or "fields.0.type". The modified Avro schema is -// validated before conversion to Arrow schema - NewOCFReader will return an error if the modified schema -// cannot be parsed. -func WithSchemaEdit(method, path string, value any) Option { - return func(cfg config) { - var e schemaEdit - e.method = method - e.path = path - e.value = value - cfg.avroSchemaEdits = append(cfg.avroSchemaEdits, e) - } -} - -// WithChunk specifies the chunk size used while reading Avro OCF files. -// -// If n is zero or 1, no chunking will take place and the reader will create -// one record per row. -// If n is greater than 1, chunks of n rows will be read. -// If n is negative, the reader will load the whole Avro OCF file into memory and -// create one big record with all the rows. -func WithChunk(n int) Option { - return func(cfg config) { - cfg.chunk = n - } -} - -// Retain increases the reference count by 1. -// Retain may be called simultaneously from multiple goroutines. -func (r *OCFReader) Retain() { - atomic.AddInt64(&r.refs, 1) -} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -// Release may be called simultaneously from multiple goroutines. -func (r *OCFReader) Release() { - debug.Assert(atomic.LoadInt64(&r.refs) > 0, "too many releases") - - if atomic.AddInt64(&r.refs, -1) == 0 { - if r.cur != nil { - r.cur.Release() - } - } -} - -var _ array.RecordReader = (*OCFReader)(nil) diff --git a/go/arrow/avro/reader_test.go b/go/arrow/avro/reader_test.go deleted file mode 100644 index 2cb1a7caa801c..0000000000000 --- a/go/arrow/avro/reader_test.go +++ /dev/null @@ -1,364 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package avro - -import ( - "fmt" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - hamba "github.com/hamba/avro/v2" -) - -func TestEditSchemaStringEqual(t *testing.T) { - tests := []struct { - avroSchema string - arrowSchema []arrow.Field - }{ - { - avroSchema: `{ - "fields": [ - { - "name": "inheritNull", - "type": { - "name": "Simple", - "symbols": [ - "a", - "b" - ], - "type": "enum" - } - }, - { - "name": "explicitNamespace", - "type": { - "name": "test", - "namespace": "org.hamba.avro", - "size": 12, - "type": "fixed" - } - }, - { - "name": "fullName", - "type": { - "type": "record", - "name": "fullName_data", - "namespace": "ignored", - "doc": "A name attribute with a fullname, so the namespace attribute is ignored. The fullname is 'a.full.Name', and the namespace is 'a.full'.", - "fields": [{ - "name": "inheritNamespace", - "type": { - "type": "enum", - "name": "Understanding", - "doc": "A simple name (attribute) and no namespace attribute: inherit the namespace of the enclosing type 'a.full.Name'. The fullname is 'a.full.Understanding'.", - "symbols": ["d", "e"] - } - }, { - "name": "md5", - "type": { - "name": "md5_data", - "type": "fixed", - "size": 16, - "namespace": "ignored" - } - } - ] - } - }, - { - "name": "id", - "type": "int" - }, - { - "name": "bigId", - "type": "long" - }, - { - "name": "temperature", - "type": [ - "null", - "float" - ] - }, - { - "name": "fraction", - "type": [ - "null", - "double" - ] - }, - { - "name": "is_emergency", - "type": "boolean" - }, - { - "name": "remote_ip", - "type": [ - "null", - "bytes" - ] - }, - { - "name": "person", - "type": { - "fields": [ - { - "name": "lastname", - "type": "string" - }, - { - "name": "address", - "type": { - "fields": [ - { - "name": "streetaddress", - "type": "string" - }, - { - "name": "city", - "type": "string" - } - ], - "name": "AddressUSRecord", - "type": "record" - } - }, - { - "name": "mapfield", - "type": { - "default": { - }, - "type": "map", - "values": "long" - } - }, - { - "name": "arrayField", - "type": { - "default": [ - ], - "items": "string", - "type": "array" - } - } - ], - "name": "person_data", - "type": "record" - } - }, - { - "name": "decimalField", - "type": { - "logicalType": "decimal", - "precision": 4, - "scale": 2, - "type": "bytes" - } - }, - { - "logicalType": "uuid", - "name": "uuidField", - "type": "string" - }, - { - "name": "timemillis", - "type": { - "type": "int", - "logicalType": "time-millis" - } - }, - { - "name": "timemicros", - "type": { - "type": "long", - "logicalType": "time-micros" - } - }, - { - "name": "timestampmillis", - "type": { - "type": "long", - "logicalType": "timestamp-millis" - } - }, - { - "name": "timestampmicros", - "type": { - "type": "long", - "logicalType": "timestamp-micros" - } - }, - { - "name": "duration", - "type": { - "name": "duration", - "namespace": "whyowhy", - "logicalType": "duration", - "size": 12, - "type": "fixed" - } - }, - { - "name": "date", - "type": { - "logicalType": "date", - "type": "int" - } - } - ], - "name": "Example", - "type": "record" - }`, - arrowSchema: []arrow.Field{ - { - Name: "explicitNamespace", - Type: &arrow.FixedSizeBinaryType{ByteWidth: 12}, - }, - { - Name: "fullName", - Type: arrow.StructOf( - arrow.Field{ - Name: "inheritNamespace", - Type: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: arrow.BinaryTypes.String, Ordered: false}, - }, - arrow.Field{ - Name: "md5", - Type: &arrow.FixedSizeBinaryType{ByteWidth: 16}, - }, - ), - }, - { - Name: "id", - Type: arrow.PrimitiveTypes.Int32, - }, - { - Name: "bigId", - Type: arrow.PrimitiveTypes.Int64, - }, - { - Name: "temperature", - Type: arrow.PrimitiveTypes.Float32, - Nullable: true, - }, - { - Name: "fraction", - Type: arrow.PrimitiveTypes.Float64, - Nullable: true, - }, - { - Name: "is_emergency", - Type: arrow.FixedWidthTypes.Boolean, - }, - { - Name: "remote_ip", - Type: arrow.BinaryTypes.Binary, - Nullable: true, - }, - { - Name: "person", - Type: arrow.StructOf( - arrow.Field{ - Name: "lastname", - Type: arrow.BinaryTypes.String, - }, - arrow.Field{ - Name: "address", - Type: arrow.StructOf( - arrow.Field{ - Name: "streetaddress", - Type: arrow.BinaryTypes.String, - }, - arrow.Field{ - Name: "city", - Type: arrow.BinaryTypes.String, - }, - ), - }, - arrow.Field{ - Name: "mapfield", - Type: arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int64), - Nullable: true, - }, - arrow.Field{ - Name: "arrayField", - Type: arrow.ListOfNonNullable(arrow.BinaryTypes.String), - }, - ), - }, - { - Name: "decimalField", - Type: &arrow.Decimal128Type{Precision: 4, Scale: 2}, - }, - { - Name: "uuidField", - Type: arrow.BinaryTypes.String, - }, - { - Name: "timemillis", - Type: arrow.FixedWidthTypes.Time32ms, - }, - { - Name: "timemicros", - Type: arrow.FixedWidthTypes.Time64us, - }, - { - Name: "timestampmillis", - Type: arrow.FixedWidthTypes.Timestamp_ms, - }, - { - Name: "timestampmicros", - Type: arrow.FixedWidthTypes.Timestamp_us, - }, - { - Name: "duration", - Type: arrow.FixedWidthTypes.MonthDayNanoInterval, - }, - { - Name: "date", - Type: arrow.FixedWidthTypes.Date32, - }, - }, - }, - } - - for _, test := range tests { - t.Run("", func(t *testing.T) { - want := arrow.NewSchema(test.arrowSchema, nil) - - schema, err := hamba.ParseBytes([]byte(test.avroSchema)) - if err != nil { - t.Fatalf("%v", err) - } - r := new(OCFReader) - r.avroSchema = schema.String() - r.editAvroSchema(schemaEdit{method: "delete", path: "fields.0"}) - schema, err = hamba.Parse(r.avroSchema) - if err != nil { - t.Fatalf("%v: could not parse modified avro schema", arrow.ErrInvalid) - } - got, err := ArrowSchemaFromAvro(schema) - if err != nil { - t.Fatalf("%v", err) - } - if !(fmt.Sprintf("%+v", want.String()) == fmt.Sprintf("%+v", got.String())) { - t.Fatalf("got=%v,\n want=%v", got.String(), want.String()) - } else { - t.Logf("schema.String() comparison passed") - } - }) - } -} diff --git a/go/arrow/avro/reader_types.go b/go/arrow/avro/reader_types.go deleted file mode 100644 index dab2b33dce601..0000000000000 --- a/go/arrow/avro/reader_types.go +++ /dev/null @@ -1,875 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package avro - -import ( - "bytes" - "encoding/binary" - "errors" - "fmt" - "math/big" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/decimal256" - "github.com/apache/arrow/go/v18/arrow/extensions" - "github.com/apache/arrow/go/v18/arrow/memory" -) - -type dataLoader struct { - idx, depth int32 - list *fieldPos - item *fieldPos - mapField *fieldPos - mapKey *fieldPos - mapValue *fieldPos - fields []*fieldPos - children []*dataLoader -} - -var ( - ErrNullStructData = errors.New("null struct data") -) - -func newDataLoader() *dataLoader { return &dataLoader{idx: 0, depth: 0} } - -// drawTree takes the tree of field builders produced by mapFieldBuilders() -// and produces another tree structure and aggregates fields whose values can -// be retrieved from a `map[string]any` into a slice of builders, and creates a hierarchy to -// deal with nested types (lists and maps). -func (d *dataLoader) drawTree(field *fieldPos) { - for _, f := range field.children() { - if f.isList || f.isMap { - if f.isList { - c := d.newListChild(f) - if !f.childrens[0].isList { - c.item = f.childrens[0] - c.drawTree(f.childrens[0]) - } else { - c.drawTree(f.childrens[0].childrens[0]) - } - } - if f.isMap { - c := d.newMapChild(f) - if !arrow.IsNested(f.childrens[1].builder.Type().ID()) { - c.mapKey = f.childrens[0] - c.mapValue = f.childrens[1] - } else { - c.mapKey = f.childrens[0] - m := c.newChild() - m.mapValue = f.childrens[1] - m.drawTree(f.childrens[1]) - } - } - } else { - d.fields = append(d.fields, f) - if len(f.children()) > 0 { - d.drawTree(f) - } - } - } -} - -// loadDatum loads decoded Avro data to the schema fields' builder functions. -// Since array.StructBuilder.AppendNull() will recursively append null to all of the -// struct's fields, in the case of nil being passed to a struct's builderFunc it will -// return a ErrNullStructData error to signal that all its sub-fields can be skipped. -func (d *dataLoader) loadDatum(data any) error { - if d.list == nil && d.mapField == nil { - if d.mapValue != nil { - d.mapValue.appendFunc(data) - } - var NullParent *fieldPos - for _, f := range d.fields { - if f.parent == NullParent { - continue - } - if d.mapValue == nil { - err := f.appendFunc(f.getValue(data)) - if err != nil { - if err == ErrNullStructData { - NullParent = f - continue - } - return err - } - } else { - switch dt := data.(type) { - case nil: - err := f.appendFunc(dt) - if err != nil { - if err == ErrNullStructData { - NullParent = f - continue - } - return err - } - case []any: - if len(d.children) < 1 { - for _, e := range dt { - err := f.appendFunc(e) - if err != nil { - if err == ErrNullStructData { - NullParent = f - continue - } - return err - } - } - } else { - for _, e := range dt { - d.children[0].loadDatum(e) - } - } - case map[string]any: - err := f.appendFunc(f.getValue(dt)) - if err != nil { - if err == ErrNullStructData { - NullParent = f - continue - } - return err - } - } - - } - } - for _, c := range d.children { - if c.list != nil { - c.loadDatum(c.list.getValue(data)) - } - if c.mapField != nil { - switch dt := data.(type) { - case nil: - c.loadDatum(dt) - case map[string]any: - c.loadDatum(c.mapField.getValue(dt)) - default: - c.loadDatum(c.mapField.getValue(data)) - } - } - } - } else { - if d.list != nil { - switch dt := data.(type) { - case nil: - d.list.appendFunc(dt) - case []any: - d.list.appendFunc(dt) - for _, e := range dt { - if d.item != nil { - d.item.appendFunc(e) - } - var NullParent *fieldPos - for _, f := range d.fields { - if f.parent == NullParent { - continue - } - err := f.appendFunc(f.getValue(e)) - if err != nil { - if err == ErrNullStructData { - NullParent = f - continue - } - return err - } - } - for _, c := range d.children { - if c.list != nil { - c.loadDatum(c.list.getValue(e)) - } - if c.mapField != nil { - c.loadDatum(c.mapField.getValue(e)) - } - } - } - case map[string]any: - d.list.appendFunc(dt["array"]) - for _, e := range dt["array"].([]any) { - if d.item != nil { - d.item.appendFunc(e) - } - var NullParent *fieldPos - for _, f := range d.fields { - if f.parent == NullParent { - continue - } - err := f.appendFunc(f.getValue(e)) - if err != nil { - if err == ErrNullStructData { - NullParent = f - continue - } - return err - } - } - for _, c := range d.children { - c.loadDatum(c.list.getValue(e)) - } - } - default: - d.list.appendFunc(data) - d.item.appendFunc(dt) - } - } - if d.mapField != nil { - switch dt := data.(type) { - case nil: - d.mapField.appendFunc(dt) - case map[string]any: - - d.mapField.appendFunc(dt) - for k, v := range dt { - d.mapKey.appendFunc(k) - if d.mapValue != nil { - d.mapValue.appendFunc(v) - } else { - d.children[0].loadDatum(v) - } - } - } - } - } - return nil -} - -func (d *dataLoader) newChild() *dataLoader { - var child *dataLoader = &dataLoader{ - depth: d.depth + 1, - } - d.children = append(d.children, child) - return child -} - -func (d *dataLoader) newListChild(list *fieldPos) *dataLoader { - var child *dataLoader = &dataLoader{ - list: list, - item: list.childrens[0], - depth: d.depth + 1, - } - d.children = append(d.children, child) - return child -} - -func (d *dataLoader) newMapChild(mapField *fieldPos) *dataLoader { - var child *dataLoader = &dataLoader{ - mapField: mapField, - depth: d.depth + 1, - } - d.children = append(d.children, child) - return child -} - -type fieldPos struct { - parent *fieldPos - fieldName string - builder array.Builder - path []string - isList bool - isItem bool - isStruct bool - isMap bool - typeName string - appendFunc func(val interface{}) error - metadatas arrow.Metadata - childrens []*fieldPos - index, depth int32 -} - -func newFieldPos() *fieldPos { return &fieldPos{index: -1} } - -func (f *fieldPos) children() []*fieldPos { return f.childrens } - -func (f *fieldPos) newChild(childName string, childBuilder array.Builder, meta arrow.Metadata) *fieldPos { - var child fieldPos = fieldPos{ - parent: f, - fieldName: childName, - builder: childBuilder, - metadatas: meta, - index: int32(len(f.childrens)), - depth: f.depth + 1, - } - if f.isList { - child.isItem = true - } - child.path = child.buildNamePath() - f.childrens = append(f.childrens, &child) - return &child -} - -func (f *fieldPos) buildNamePath() []string { - var path []string - var listPath []string - cur := f - for i := f.depth - 1; i >= 0; i-- { - if cur.typeName == "" { - path = append([]string{cur.fieldName}, path...) - } else { - path = append([]string{cur.fieldName, cur.typeName}, path...) - } - if !cur.parent.isMap { - cur = cur.parent - } - } - if f.parent.parent != nil && f.parent.parent.isList { - for i := len(path) - 1; i >= 0; i-- { - if path[i] != "item" { - listPath = append([]string{path[i]}, listPath...) - } else { - return listPath - } - } - } - if f.parent != nil && f.parent.fieldName == "value" { - for i := len(path) - 1; i >= 0; i-- { - if path[i] != "value" { - listPath = append([]string{path[i]}, listPath...) - } else { - return listPath - } - } - } - return path -} - -// NamePath returns a slice of keys making up the path to the field -func (f *fieldPos) namePath() []string { return f.path } - -// GetValue retrieves the value from the map[string]any -// by following the field's key path -func (f *fieldPos) getValue(m any) any { - if _, ok := m.(map[string]any); !ok { - return m - } - for _, key := range f.namePath() { - valueMap, ok := m.(map[string]any) - if !ok { - if key == "item" { - return m - } - return nil - } - m, ok = valueMap[key] - if !ok { - return nil - } - } - return m -} - -// Avro data is loaded to Arrow arrays using the following type mapping: -// -// Avro Go Arrow -// null nil Null -// boolean bool Boolean -// bytes []byte Binary -// float float32 Float32 -// double float64 Float64 -// long int64 Int64 -// int int32 Int32 -// string string String -// array []interface{} List -// enum string Dictionary -// fixed []byte FixedSizeBinary -// map and record map[string]any Struct -// -// mapFieldBuilders builds a tree of field builders matching the Arrow schema -func mapFieldBuilders(b array.Builder, field arrow.Field, parent *fieldPos) { - f := parent.newChild(field.Name, b, field.Metadata) - switch bt := b.(type) { - case *array.BinaryBuilder: - f.appendFunc = func(data interface{}) error { - appendBinaryData(bt, data) - return nil - } - case *array.BinaryDictionaryBuilder: - // has metadata for Avro enum symbols - f.appendFunc = func(data interface{}) error { - appendBinaryDictData(bt, data) - return nil - } - // add Avro enum symbols to builder - sb := array.NewStringBuilder(memory.DefaultAllocator) - for _, v := range field.Metadata.Values() { - sb.Append(v) - } - sa := sb.NewStringArray() - bt.InsertStringDictValues(sa) - case *array.BooleanBuilder: - f.appendFunc = func(data interface{}) error { - appendBoolData(bt, data) - return nil - } - case *array.Date32Builder: - f.appendFunc = func(data interface{}) error { - appendDate32Data(bt, data) - return nil - } - case *array.Decimal128Builder: - f.appendFunc = func(data interface{}) error { - err := appendDecimal128Data(bt, data) - if err != nil { - return err - } - return nil - } - case *array.Decimal256Builder: - f.appendFunc = func(data interface{}) error { - err := appendDecimal256Data(bt, data) - if err != nil { - return err - } - return nil - } - case *extensions.UUIDBuilder: - f.appendFunc = func(data interface{}) error { - switch dt := data.(type) { - case nil: - bt.AppendNull() - case string: - err := bt.AppendValueFromString(dt) - if err != nil { - return err - } - case []byte: - err := bt.AppendValueFromString(string(dt)) - if err != nil { - return err - } - } - return nil - } - case *array.FixedSizeBinaryBuilder: - f.appendFunc = func(data interface{}) error { - appendFixedSizeBinaryData(bt, data) - return nil - } - case *array.Float32Builder: - f.appendFunc = func(data interface{}) error { - appendFloat32Data(bt, data) - return nil - } - case *array.Float64Builder: - f.appendFunc = func(data interface{}) error { - appendFloat64Data(bt, data) - return nil - } - case *array.Int32Builder: - f.appendFunc = func(data interface{}) error { - appendInt32Data(bt, data) - return nil - } - case *array.Int64Builder: - f.appendFunc = func(data interface{}) error { - appendInt64Data(bt, data) - return nil - } - case *array.LargeListBuilder: - vb := bt.ValueBuilder() - f.isList = true - mapFieldBuilders(vb, field.Type.(*arrow.LargeListType).ElemField(), f) - f.appendFunc = func(data interface{}) error { - switch dt := data.(type) { - case nil: - bt.AppendNull() - case []interface{}: - if len(dt) == 0 { - bt.AppendEmptyValue() - } else { - bt.Append(true) - } - default: - bt.Append(true) - } - return nil - } - case *array.ListBuilder: - vb := bt.ValueBuilder() - f.isList = true - mapFieldBuilders(vb, field.Type.(*arrow.ListType).ElemField(), f) - f.appendFunc = func(data interface{}) error { - switch dt := data.(type) { - case nil: - bt.AppendNull() - case []interface{}: - if len(dt) == 0 { - bt.AppendEmptyValue() - } else { - bt.Append(true) - } - default: - bt.Append(true) - } - return nil - } - case *array.MapBuilder: - // has metadata for objects in values - f.isMap = true - kb := bt.KeyBuilder() - ib := bt.ItemBuilder() - mapFieldBuilders(kb, field.Type.(*arrow.MapType).KeyField(), f) - mapFieldBuilders(ib, field.Type.(*arrow.MapType).ItemField(), f) - f.appendFunc = func(data interface{}) error { - switch data.(type) { - case nil: - bt.AppendNull() - default: - bt.Append(true) - } - return nil - } - case *array.MonthDayNanoIntervalBuilder: - f.appendFunc = func(data interface{}) error { - appendDurationData(bt, data) - return nil - } - case *array.StringBuilder: - f.appendFunc = func(data interface{}) error { - appendStringData(bt, data) - return nil - } - case *array.StructBuilder: - // has metadata for Avro Union named types - f.typeName, _ = field.Metadata.GetValue("typeName") - f.isStruct = true - // create children - for i, p := range field.Type.(*arrow.StructType).Fields() { - mapFieldBuilders(bt.FieldBuilder(i), p, f) - } - f.appendFunc = func(data interface{}) error { - switch data.(type) { - case nil: - bt.AppendNull() - return ErrNullStructData - default: - bt.Append(true) - } - return nil - } - case *array.Time32Builder: - f.appendFunc = func(data interface{}) error { - appendTime32Data(bt, data) - return nil - } - case *array.Time64Builder: - f.appendFunc = func(data interface{}) error { - appendTime64Data(bt, data) - return nil - } - case *array.TimestampBuilder: - f.appendFunc = func(data interface{}) error { - appendTimestampData(bt, data) - return nil - } - } -} - -func appendBinaryData(b *array.BinaryBuilder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case map[string]any: - switch ct := dt["bytes"].(type) { - case nil: - b.AppendNull() - default: - b.Append(ct.([]byte)) - } - default: - b.Append(fmt.Append([]byte{}, data)) - } -} - -func appendBinaryDictData(b *array.BinaryDictionaryBuilder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case string: - b.AppendString(dt) - case map[string]any: - switch v := dt["string"].(type) { - case nil: - b.AppendNull() - case string: - b.AppendString(v) - } - } -} - -func appendBoolData(b *array.BooleanBuilder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case bool: - b.Append(dt) - case map[string]any: - switch v := dt["boolean"].(type) { - case nil: - b.AppendNull() - case bool: - b.Append(v) - } - } -} - -func appendDate32Data(b *array.Date32Builder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case int32: - b.Append(arrow.Date32(dt)) - case map[string]any: - switch v := dt["int"].(type) { - case nil: - b.AppendNull() - case int32: - b.Append(arrow.Date32(v)) - } - } -} - -func appendDecimal128Data(b *array.Decimal128Builder, data interface{}) error { - switch dt := data.(type) { - case nil: - b.AppendNull() - case []byte: - buf := bytes.NewBuffer(dt) - if len(dt) <= 38 { - var intData int64 - err := binary.Read(buf, binary.BigEndian, &intData) - if err != nil { - return err - } - b.Append(decimal128.FromI64(intData)) - } else { - var bigIntData big.Int - b.Append(decimal128.FromBigInt(bigIntData.SetBytes(buf.Bytes()))) - } - case map[string]any: - buf := bytes.NewBuffer(dt["bytes"].([]byte)) - if len(dt["bytes"].([]byte)) <= 38 { - var intData int64 - err := binary.Read(buf, binary.BigEndian, &intData) - if err != nil { - return err - } - b.Append(decimal128.FromI64(intData)) - } else { - var bigIntData big.Int - b.Append(decimal128.FromBigInt(bigIntData.SetBytes(buf.Bytes()))) - } - } - return nil -} - -func appendDecimal256Data(b *array.Decimal256Builder, data interface{}) error { - switch dt := data.(type) { - case nil: - b.AppendNull() - case []byte: - var bigIntData big.Int - buf := bytes.NewBuffer(dt) - b.Append(decimal256.FromBigInt(bigIntData.SetBytes(buf.Bytes()))) - case map[string]any: - var bigIntData big.Int - buf := bytes.NewBuffer(dt["bytes"].([]byte)) - b.Append(decimal256.FromBigInt(bigIntData.SetBytes(buf.Bytes()))) - } - return nil -} - -// Avro duration logical type annotates Avro fixed type of size 12, which stores three little-endian -// unsigned integers that represent durations at different granularities of time. The first stores -// a number in months, the second stores a number in days, and the third stores a number in milliseconds. -func appendDurationData(b *array.MonthDayNanoIntervalBuilder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case []byte: - dur := new(arrow.MonthDayNanoInterval) - dur.Months = int32(binary.LittleEndian.Uint16(dt[:3])) - dur.Days = int32(binary.LittleEndian.Uint16(dt[4:7])) - dur.Nanoseconds = int64(binary.LittleEndian.Uint32(dt[8:]) * 1000000) - b.Append(*dur) - case map[string]any: - switch dtb := dt["bytes"].(type) { - case nil: - b.AppendNull() - case []byte: - dur := new(arrow.MonthDayNanoInterval) - dur.Months = int32(binary.LittleEndian.Uint16(dtb[:3])) - dur.Days = int32(binary.LittleEndian.Uint16(dtb[4:7])) - dur.Nanoseconds = int64(binary.LittleEndian.Uint32(dtb[8:]) * 1000000) - b.Append(*dur) - } - } -} - -func appendFixedSizeBinaryData(b *array.FixedSizeBinaryBuilder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case []byte: - b.Append(dt) - case map[string]any: - switch v := dt["bytes"].(type) { - case nil: - b.AppendNull() - case []byte: - b.Append(v) - } - } -} - -func appendFloat32Data(b *array.Float32Builder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case float32: - b.Append(dt) - case map[string]any: - switch v := dt["float"].(type) { - case nil: - b.AppendNull() - case float32: - b.Append(v) - } - } -} - -func appendFloat64Data(b *array.Float64Builder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case float64: - b.Append(dt) - case map[string]any: - switch v := dt["double"].(type) { - case nil: - b.AppendNull() - case float64: - b.Append(v) - } - } -} - -func appendInt32Data(b *array.Int32Builder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case int: - b.Append(int32(dt)) - case int32: - b.Append(dt) - case map[string]any: - switch v := dt["int"].(type) { - case nil: - b.AppendNull() - case int: - b.Append(int32(v)) - case int32: - b.Append(v) - } - } -} - -func appendInt64Data(b *array.Int64Builder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case int: - b.Append(int64(dt)) - case int64: - b.Append(dt) - case map[string]any: - switch v := dt["long"].(type) { - case nil: - b.AppendNull() - case int: - b.Append(int64(v)) - case int64: - b.Append(v) - } - } -} - -func appendStringData(b *array.StringBuilder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case string: - b.Append(dt) - case map[string]any: - switch v := dt["string"].(type) { - case nil: - b.AppendNull() - case string: - b.Append(v) - } - default: - b.Append(fmt.Sprint(data)) - } -} - -func appendTime32Data(b *array.Time32Builder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case int32: - b.Append(arrow.Time32(dt)) - case map[string]any: - switch v := dt["int"].(type) { - case nil: - b.AppendNull() - case int32: - b.Append(arrow.Time32(v)) - } - } -} - -func appendTime64Data(b *array.Time64Builder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case int64: - b.Append(arrow.Time64(dt)) - case map[string]any: - switch v := dt["long"].(type) { - case nil: - b.AppendNull() - case int64: - b.Append(arrow.Time64(v)) - } - } -} - -func appendTimestampData(b *array.TimestampBuilder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case int64: - b.Append(arrow.Timestamp(dt)) - case map[string]any: - switch v := dt["long"].(type) { - case nil: - b.AppendNull() - case int64: - b.Append(arrow.Timestamp(v)) - } - } -} diff --git a/go/arrow/avro/schema.go b/go/arrow/avro/schema.go deleted file mode 100644 index a6de3718d3ccf..0000000000000 --- a/go/arrow/avro/schema.go +++ /dev/null @@ -1,423 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package avro reads Avro OCF files and presents the extracted data as records -package avro - -import ( - "fmt" - "math" - "strconv" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/extensions" - "github.com/apache/arrow/go/v18/internal/utils" - avro "github.com/hamba/avro/v2" -) - -type schemaNode struct { - name string - parent *schemaNode - schema avro.Schema - union bool - nullable bool - childrens []*schemaNode - arrowField arrow.Field - schemaCache *avro.SchemaCache - index, depth int32 -} - -func newSchemaNode() *schemaNode { - var schemaCache avro.SchemaCache - return &schemaNode{name: "", index: -1, schemaCache: &schemaCache} -} - -func (node *schemaNode) schemaPath() string { - var path string - n := node - for n.parent != nil { - path = "." + n.name + path - n = n.parent - } - return path -} - -func (node *schemaNode) newChild(n string, s avro.Schema) *schemaNode { - child := &schemaNode{ - name: n, - parent: node, - schema: s, - schemaCache: node.schemaCache, - index: int32(len(node.childrens)), - depth: node.depth + 1, - } - node.childrens = append(node.childrens, child) - return child -} -func (node *schemaNode) children() []*schemaNode { return node.childrens } - -// func (node *schemaNode) nodeName() string { return node.name } - -// ArrowSchemaFromAvro returns a new Arrow schema from an Avro schema -func ArrowSchemaFromAvro(schema avro.Schema) (s *arrow.Schema, err error) { - defer func() { - if r := recover(); r != nil { - s = nil - err = utils.FormatRecoveredError("invalid avro schema", r) - } - }() - n := newSchemaNode() - n.schema = schema - c := n.newChild(n.schema.(avro.NamedSchema).Name(), n.schema) - arrowSchemafromAvro(c) - var fields []arrow.Field - for _, g := range c.children() { - fields = append(fields, g.arrowField) - } - s = arrow.NewSchema(fields, nil) - return s, nil -} - -func arrowSchemafromAvro(n *schemaNode) { - if ns, ok := n.schema.(avro.NamedSchema); ok { - n.schemaCache.Add(ns.Name(), ns) - } - switch st := n.schema.Type(); st { - case "record": - iterateFields(n) - case "enum": - n.schemaCache.Add(n.schema.(avro.NamedSchema).Name(), n.schema.(*avro.EnumSchema)) - symbols := make(map[string]string) - for index, symbol := range n.schema.(avro.PropertySchema).(*avro.EnumSchema).Symbols() { - k := strconv.FormatInt(int64(index), 10) - symbols[k] = symbol - } - var dt arrow.DictionaryType = arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint64, ValueType: arrow.BinaryTypes.String, Ordered: false} - sl := int64(len(symbols)) - switch { - case sl <= math.MaxUint8: - dt.IndexType = arrow.PrimitiveTypes.Uint8 - case sl > math.MaxUint8 && sl <= math.MaxUint16: - dt.IndexType = arrow.PrimitiveTypes.Uint16 - case sl > math.MaxUint16 && sl <= math.MaxUint32: - dt.IndexType = arrow.PrimitiveTypes.Uint32 - } - n.arrowField = buildArrowField(n, &dt, arrow.MetadataFrom(symbols)) - case "array": - // logical items type - c := n.newChild(n.name, n.schema.(*avro.ArraySchema).Items()) - if isLogicalSchemaType(n.schema.(*avro.ArraySchema).Items()) { - avroLogicalToArrowField(c) - } else { - arrowSchemafromAvro(c) - } - switch c.arrowField.Nullable { - case true: - n.arrowField = arrow.Field{Name: n.name, Type: arrow.ListOfField(c.arrowField), Metadata: c.arrowField.Metadata} - case false: - n.arrowField = arrow.Field{Name: n.name, Type: arrow.ListOfNonNullable(c.arrowField.Type), Metadata: c.arrowField.Metadata} - } - case "map": - n.schemaCache.Add(n.schema.(*avro.MapSchema).Values().(avro.NamedSchema).Name(), n.schema.(*avro.MapSchema).Values()) - c := n.newChild(n.name, n.schema.(*avro.MapSchema).Values()) - arrowSchemafromAvro(c) - n.arrowField = buildArrowField(n, arrow.MapOf(arrow.BinaryTypes.String, c.arrowField.Type), c.arrowField.Metadata) - case "union": - if n.schema.(*avro.UnionSchema).Nullable() { - if len(n.schema.(*avro.UnionSchema).Types()) > 1 { - n.schema = n.schema.(*avro.UnionSchema).Types()[1] - n.union = true - n.nullable = true - arrowSchemafromAvro(n) - } - } - // Avro "fixed" field type = Arrow FixedSize Primitive BinaryType - case "fixed": - n.schemaCache.Add(n.schema.(avro.NamedSchema).Name(), n.schema.(*avro.FixedSchema)) - if isLogicalSchemaType(n.schema) { - avroLogicalToArrowField(n) - } else { - n.arrowField = buildArrowField(n, &arrow.FixedSizeBinaryType{ByteWidth: n.schema.(*avro.FixedSchema).Size()}, arrow.Metadata{}) - } - case "string", "bytes", "int", "long": - if isLogicalSchemaType(n.schema) { - avroLogicalToArrowField(n) - } else { - n.arrowField = buildArrowField(n, avroPrimitiveToArrowType(string(st)), arrow.Metadata{}) - } - case "float", "double", "boolean": - n.arrowField = arrow.Field{Name: n.name, Type: avroPrimitiveToArrowType(string(st)), Nullable: n.nullable} - case "": - refSchema := n.schemaCache.Get(string(n.schema.(*avro.RefSchema).Schema().Name())) - if refSchema == nil { - panic(fmt.Errorf("could not find schema for '%v' in schema cache - %v", n.schemaPath(), n.schema.(*avro.RefSchema).Schema().Name())) - } - n.schema = refSchema - arrowSchemafromAvro(n) - case "null": - n.schemaCache.Add(n.schema.(*avro.MapSchema).Values().(avro.NamedSchema).Name(), &avro.NullSchema{}) - n.nullable = true - n.arrowField = buildArrowField(n, arrow.Null, arrow.Metadata{}) - } -} - -// iterate record Fields() -func iterateFields(n *schemaNode) { - for _, f := range n.schema.(*avro.RecordSchema).Fields() { - switch ft := f.Type().(type) { - // Avro "array" field type - case *avro.ArraySchema: - n.schemaCache.Add(f.Name(), ft.Items()) - // logical items type - c := n.newChild(f.Name(), ft.Items()) - if isLogicalSchemaType(ft.Items()) { - avroLogicalToArrowField(c) - } else { - arrowSchemafromAvro(c) - } - switch c.arrowField.Nullable { - case true: - c.arrowField = arrow.Field{Name: c.name, Type: arrow.ListOfField(c.arrowField), Metadata: c.arrowField.Metadata} - case false: - c.arrowField = arrow.Field{Name: c.name, Type: arrow.ListOfNonNullable(c.arrowField.Type), Metadata: c.arrowField.Metadata} - } - // Avro "enum" field type = Arrow dictionary type - case *avro.EnumSchema: - n.schemaCache.Add(f.Type().(*avro.EnumSchema).Name(), f.Type()) - c := n.newChild(f.Name(), f.Type()) - symbols := make(map[string]string) - for index, symbol := range ft.Symbols() { - k := strconv.FormatInt(int64(index), 10) - symbols[k] = symbol - } - var dt arrow.DictionaryType = arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint64, ValueType: arrow.BinaryTypes.String, Ordered: false} - sl := len(symbols) - switch { - case sl <= math.MaxUint8: - dt.IndexType = arrow.PrimitiveTypes.Uint8 - case sl > math.MaxUint8 && sl <= math.MaxUint16: - dt.IndexType = arrow.PrimitiveTypes.Uint16 - case sl > math.MaxUint16 && sl <= math.MaxInt: - dt.IndexType = arrow.PrimitiveTypes.Uint32 - } - c.arrowField = buildArrowField(c, &dt, arrow.MetadataFrom(symbols)) - // Avro "fixed" field type = Arrow FixedSize Primitive BinaryType - case *avro.FixedSchema: - n.schemaCache.Add(f.Name(), f.Type()) - c := n.newChild(f.Name(), f.Type()) - if isLogicalSchemaType(f.Type()) { - avroLogicalToArrowField(c) - } else { - arrowSchemafromAvro(c) - } - case *avro.RecordSchema: - n.schemaCache.Add(f.Name(), f.Type()) - c := n.newChild(f.Name(), f.Type()) - iterateFields(c) - // Avro "map" field type - KVP with value of one type - keys are strings - case *avro.MapSchema: - n.schemaCache.Add(f.Name(), ft.Values()) - c := n.newChild(f.Name(), ft.Values()) - arrowSchemafromAvro(c) - c.arrowField = buildArrowField(c, arrow.MapOf(arrow.BinaryTypes.String, c.arrowField.Type), c.arrowField.Metadata) - case *avro.UnionSchema: - if ft.Nullable() { - if len(ft.Types()) > 1 { - n.schemaCache.Add(f.Name(), ft.Types()[1]) - c := n.newChild(f.Name(), ft.Types()[1]) - c.union = true - c.nullable = true - arrowSchemafromAvro(c) - } - } - default: - n.schemaCache.Add(f.Name(), f.Type()) - if isLogicalSchemaType(f.Type()) { - c := n.newChild(f.Name(), f.Type()) - avroLogicalToArrowField(c) - } else { - c := n.newChild(f.Name(), f.Type()) - arrowSchemafromAvro(c) - } - - } - } - var fields []arrow.Field - for _, child := range n.children() { - fields = append(fields, child.arrowField) - } - - namedSchema, ok := isNamedSchema(n.schema) - - var md arrow.Metadata - if ok && namedSchema != n.name+"_data" && n.union { - md = arrow.NewMetadata([]string{"typeName"}, []string{namedSchema}) - } - n.arrowField = buildArrowField(n, arrow.StructOf(fields...), md) -} - -func isLogicalSchemaType(s avro.Schema) bool { - lts, ok := s.(avro.LogicalTypeSchema) - if !ok { - return false - } - if lts.Logical() != nil { - return true - } - return false -} - -func isNamedSchema(s avro.Schema) (string, bool) { - if ns, ok := s.(avro.NamedSchema); ok { - return ns.FullName(), ok - } - return "", false -} - -func buildArrowField(n *schemaNode, t arrow.DataType, m arrow.Metadata) arrow.Field { - return arrow.Field{ - Name: n.name, - Type: t, - Metadata: m, - Nullable: n.nullable, - } -} - -// Avro primitive type. -// -// NOTE: Arrow Binary type is used as a catchall to avoid potential data loss. -func avroPrimitiveToArrowType(avroFieldType string) arrow.DataType { - switch avroFieldType { - // int: 32-bit signed integer - case "int": - return arrow.PrimitiveTypes.Int32 - // long: 64-bit signed integer - case "long": - return arrow.PrimitiveTypes.Int64 - // float: single precision (32-bit) IEEE 754 floating-point number - case "float": - return arrow.PrimitiveTypes.Float32 - // double: double precision (64-bit) IEEE 754 floating-point number - case "double": - return arrow.PrimitiveTypes.Float64 - // bytes: sequence of 8-bit unsigned bytes - case "bytes": - return arrow.BinaryTypes.Binary - // boolean: a binary value - case "boolean": - return arrow.FixedWidthTypes.Boolean - // string: unicode character sequence - case "string": - return arrow.BinaryTypes.String - } - return nil -} - -func avroLogicalToArrowField(n *schemaNode) { - var dt arrow.DataType - // Avro logical types - switch lt := n.schema.(avro.LogicalTypeSchema).Logical(); lt.Type() { - // The decimal logical type represents an arbitrary-precision signed decimal number of the form unscaled × 10-scale. - // A decimal logical type annotates Avro bytes or fixed types. The byte array must contain the two’s-complement - // representation of the unscaled integer value in big-endian byte order. The scale is fixed, and is specified - // using an attribute. - // - // The following attributes are supported: - // scale, a JSON integer representing the scale (optional). If not specified the scale is 0. - // precision, a JSON integer representing the (maximum) precision of decimals stored in this type (required). - case "decimal": - id := arrow.DECIMAL128 - if lt.(*avro.DecimalLogicalSchema).Precision() > decimal128.MaxPrecision { - id = arrow.DECIMAL256 - } - dt, _ = arrow.NewDecimalType(id, int32(lt.(*avro.DecimalLogicalSchema).Precision()), int32(lt.(*avro.DecimalLogicalSchema).Scale())) - - // The uuid logical type represents a random generated universally unique identifier (UUID). - // A uuid logical type annotates an Avro string. The string has to conform with RFC-4122 - case "uuid": - dt = extensions.NewUUIDType() - - // The date logical type represents a date within the calendar, with no reference to a particular - // time zone or time of day. - // A date logical type annotates an Avro int, where the int stores the number of days from the unix epoch, - // 1 January 1970 (ISO calendar). - case "date": - dt = arrow.FixedWidthTypes.Date32 - - // The time-millis logical type represents a time of day, with no reference to a particular calendar, - // time zone or date, with a precision of one millisecond. - // A time-millis logical type annotates an Avro int, where the int stores the number of milliseconds - // after midnight, 00:00:00.000. - case "time-millis": - dt = arrow.FixedWidthTypes.Time32ms - - // The time-micros logical type represents a time of day, with no reference to a particular calendar, - // time zone or date, with a precision of one microsecond. - // A time-micros logical type annotates an Avro long, where the long stores the number of microseconds - // after midnight, 00:00:00.000000. - case "time-micros": - dt = arrow.FixedWidthTypes.Time64us - - // The timestamp-millis logical type represents an instant on the global timeline, independent of a - // particular time zone or calendar, with a precision of one millisecond. Please note that time zone - // information gets lost in this process. Upon reading a value back, we can only reconstruct the instant, - // but not the original representation. In practice, such timestamps are typically displayed to users in - // their local time zones, therefore they may be displayed differently depending on the execution environment. - // A timestamp-millis logical type annotates an Avro long, where the long stores the number of milliseconds - // from the unix epoch, 1 January 1970 00:00:00.000 UTC. - case "timestamp-millis": - dt = arrow.FixedWidthTypes.Timestamp_ms - - // The timestamp-micros logical type represents an instant on the global timeline, independent of a - // particular time zone or calendar, with a precision of one microsecond. Please note that time zone - // information gets lost in this process. Upon reading a value back, we can only reconstruct the instant, - // but not the original representation. In practice, such timestamps are typically displayed to users - // in their local time zones, therefore they may be displayed differently depending on the execution environment. - // A timestamp-micros logical type annotates an Avro long, where the long stores the number of microseconds - // from the unix epoch, 1 January 1970 00:00:00.000000 UTC. - case "timestamp-micros": - dt = arrow.FixedWidthTypes.Timestamp_us - - // The local-timestamp-millis logical type represents a timestamp in a local timezone, regardless of - // what specific time zone is considered local, with a precision of one millisecond. - // A local-timestamp-millis logical type annotates an Avro long, where the long stores the number of - // milliseconds, from 1 January 1970 00:00:00.000. - // Note: not implemented in hamba/avro - // case "local-timestamp-millis": - // dt = &arrow.TimestampType{Unit: arrow.Millisecond} - - // The local-timestamp-micros logical type represents a timestamp in a local timezone, regardless of - // what specific time zone is considered local, with a precision of one microsecond. - // A local-timestamp-micros logical type annotates an Avro long, where the long stores the number of - // microseconds, from 1 January 1970 00:00:00.000000. - // case "local-timestamp-micros": - // Note: not implemented in hamba/avro - // dt = &arrow.TimestampType{Unit: arrow.Microsecond} - - // The duration logical type represents an amount of time defined by a number of months, days and milliseconds. - // This is not equivalent to a number of milliseconds, because, depending on the moment in time from which the - // duration is measured, the number of days in the month and number of milliseconds in a day may differ. Other - // standard periods such as years, quarters, hours and minutes can be expressed through these basic periods. - - // A duration logical type annotates Avro fixed type of size 12, which stores three little-endian unsigned integers - // that represent durations at different granularities of time. The first stores a number in months, the second - // stores a number in days, and the third stores a number in milliseconds. - case "duration": - dt = arrow.FixedWidthTypes.MonthDayNanoInterval - } - n.arrowField = buildArrowField(n, dt, arrow.Metadata{}) -} diff --git a/go/arrow/avro/schema_test.go b/go/arrow/avro/schema_test.go deleted file mode 100644 index 395abcb694d84..0000000000000 --- a/go/arrow/avro/schema_test.go +++ /dev/null @@ -1,362 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package avro - -import ( - "fmt" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - hamba "github.com/hamba/avro/v2" -) - -func TestSchemaStringEqual(t *testing.T) { - tests := []struct { - avroSchema string - arrowSchema []arrow.Field - }{ - { - avroSchema: `{ - "fields": [ - { - "name": "inheritNull", - "type": { - "name": "Simple", - "symbols": [ - "a", - "b" - ], - "type": "enum" - } - }, - { - "name": "explicitNamespace", - "type": { - "name": "test", - "namespace": "org.hamba.avro", - "size": 12, - "type": "fixed" - } - }, - { - "name": "fullName", - "type": { - "type": "record", - "name": "fullName_data", - "namespace": "ignored", - "doc": "A name attribute with a fullname, so the namespace attribute is ignored. The fullname is 'a.full.Name', and the namespace is 'a.full'.", - "fields": [{ - "name": "inheritNamespace", - "type": { - "type": "enum", - "name": "Understanding", - "doc": "A simple name (attribute) and no namespace attribute: inherit the namespace of the enclosing type 'a.full.Name'. The fullname is 'a.full.Understanding'.", - "symbols": ["d", "e"] - } - }, { - "name": "md5", - "type": { - "name": "md5_data", - "type": "fixed", - "size": 16, - "namespace": "ignored" - } - } - ] - } - }, - { - "name": "id", - "type": "int" - }, - { - "name": "bigId", - "type": "long" - }, - { - "name": "temperature", - "type": [ - "null", - "float" - ] - }, - { - "name": "fraction", - "type": [ - "null", - "double" - ] - }, - { - "name": "is_emergency", - "type": "boolean" - }, - { - "name": "remote_ip", - "type": [ - "null", - "bytes" - ] - }, - { - "name": "person", - "type": { - "fields": [ - { - "name": "lastname", - "type": "string" - }, - { - "name": "address", - "type": { - "fields": [ - { - "name": "streetaddress", - "type": "string" - }, - { - "name": "city", - "type": "string" - } - ], - "name": "AddressUSRecord", - "type": "record" - } - }, - { - "name": "mapfield", - "type": { - "default": { - }, - "type": "map", - "values": "long" - } - }, - { - "name": "arrayField", - "type": { - "default": [ - ], - "items": "string", - "type": "array" - } - } - ], - "name": "person_data", - "type": "record" - } - }, - { - "name": "decimalField", - "type": { - "logicalType": "decimal", - "precision": 4, - "scale": 2, - "type": "bytes" - } - }, - { - "logicalType": "uuid", - "name": "uuidField", - "type": "string" - }, - { - "name": "timemillis", - "type": { - "type": "int", - "logicalType": "time-millis" - } - }, - { - "name": "timemicros", - "type": { - "type": "long", - "logicalType": "time-micros" - } - }, - { - "name": "timestampmillis", - "type": { - "type": "long", - "logicalType": "timestamp-millis" - } - }, - { - "name": "timestampmicros", - "type": { - "type": "long", - "logicalType": "timestamp-micros" - } - }, - { - "name": "duration", - "type": { - "name": "duration", - "namespace": "whyowhy", - "logicalType": "duration", - "size": 12, - "type": "fixed" - } - }, - { - "name": "date", - "type": { - "logicalType": "date", - "type": "int" - } - } - ], - "name": "Example", - "type": "record" - }`, - arrowSchema: []arrow.Field{ - { - Name: "inheritNull", - Type: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: arrow.BinaryTypes.String, Ordered: false}, - Metadata: arrow.MetadataFrom(map[string]string{"0": "a", "1": "b"}), - }, - { - Name: "explicitNamespace", - Type: &arrow.FixedSizeBinaryType{ByteWidth: 12}, - }, - { - Name: "fullName", - Type: arrow.StructOf( - arrow.Field{ - Name: "inheritNamespace", - Type: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: arrow.BinaryTypes.String, Ordered: false}, - }, - arrow.Field{ - Name: "md5", - Type: &arrow.FixedSizeBinaryType{ByteWidth: 16}, - }, - ), - }, - { - Name: "id", - Type: arrow.PrimitiveTypes.Int32, - }, - { - Name: "bigId", - Type: arrow.PrimitiveTypes.Int64, - }, - { - Name: "temperature", - Type: arrow.PrimitiveTypes.Float32, - Nullable: true, - }, - { - Name: "fraction", - Type: arrow.PrimitiveTypes.Float64, - Nullable: true, - }, - { - Name: "is_emergency", - Type: arrow.FixedWidthTypes.Boolean, - }, - { - Name: "remote_ip", - Type: arrow.BinaryTypes.Binary, - Nullable: true, - }, - { - Name: "person", - Type: arrow.StructOf( - arrow.Field{ - Name: "lastname", - Type: arrow.BinaryTypes.String, - Nullable: true, - }, - arrow.Field{ - Name: "address", - Type: arrow.StructOf( - arrow.Field{ - Name: "streetaddress", - Type: arrow.BinaryTypes.String, - }, - arrow.Field{ - Name: "city", - Type: arrow.BinaryTypes.String, - }, - ), - }, - arrow.Field{ - Name: "mapfield", - Type: arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int64), - Nullable: true, - }, - arrow.Field{ - Name: "arrayField", - Type: arrow.ListOfNonNullable(arrow.BinaryTypes.String), - }, - ), - }, - { - Name: "decimalField", - Type: &arrow.Decimal128Type{Precision: 4, Scale: 2}, - }, - { - Name: "uuidField", - Type: arrow.BinaryTypes.String, - }, - { - Name: "timemillis", - Type: arrow.FixedWidthTypes.Time32ms, - }, - { - Name: "timemicros", - Type: arrow.FixedWidthTypes.Time64us, - }, - { - Name: "timestampmillis", - Type: arrow.FixedWidthTypes.Timestamp_ms, - }, - { - Name: "timestampmicros", - Type: arrow.FixedWidthTypes.Timestamp_us, - }, - { - Name: "duration", - Type: arrow.FixedWidthTypes.MonthDayNanoInterval, - }, - { - Name: "date", - Type: arrow.FixedWidthTypes.Date32, - }, - }, - }, - } - - for _, test := range tests { - t.Run("", func(t *testing.T) { - want := arrow.NewSchema(test.arrowSchema, nil) - schema, err := hamba.ParseBytes([]byte(test.avroSchema)) - if err != nil { - t.Fatalf("%v", err) - } - got, err := ArrowSchemaFromAvro(schema) - if err != nil { - t.Fatalf("%v", err) - } - if !(fmt.Sprintf("%+v", want.String()) == fmt.Sprintf("%+v", got.String())) { - t.Fatalf("got=%v,\n want=%v", got.String(), want.String()) - } else { - t.Logf("schema.String() comparison passed") - } - }) - } -} diff --git a/go/arrow/avro/testdata/arrayrecordmap.avro b/go/arrow/avro/testdata/arrayrecordmap.avro deleted file mode 100644 index 84a8b59b427b5..0000000000000 Binary files a/go/arrow/avro/testdata/arrayrecordmap.avro and /dev/null differ diff --git a/go/arrow/avro/testdata/githubsamplecommits.avro b/go/arrow/avro/testdata/githubsamplecommits.avro deleted file mode 100644 index f16d17d29e991..0000000000000 Binary files a/go/arrow/avro/testdata/githubsamplecommits.avro and /dev/null differ diff --git a/go/arrow/bitutil/Makefile b/go/arrow/bitutil/Makefile deleted file mode 100644 index 12dd1d3491745..0000000000000 --- a/go/arrow/bitutil/Makefile +++ /dev/null @@ -1,62 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# this converts rotate instructions from "ro[lr] " -> "ro[lr] , 1" for yasm compatibility -PERL_FIXUP_ROTATE=perl -i -pe 's/(ro[rl]\s+\w{2,3})$$/\1, 1/' - -C2GOASM=c2goasm -CC=clang-11 -C_FLAGS=-target x86_64-unknown-none -masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=1000 \ - -fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -O3 -fno-builtin -ffast-math -fno-jump-tables -I_lib -ASM_FLAGS_AVX2=-mavx2 -mfma -ASM_FLAGS_SSE4=-msse4 -ASM_FLAGS_BMI2=-mbmi2 -ASM_FLAGS_POPCNT=-mpopcnt - -C_FLAGS_NEON=-O3 -fvectorize -mllvm -force-vector-width=16 -fno-asynchronous-unwind-tables -mno-red-zone -mstackrealign -fno-exceptions \ - -fno-rtti -fno-builtin -ffast-math -fno-jump-tables -I_lib - -GO_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -not -name '*_test.go') -ALL_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -name '*.s' -not -name '*_test.go') - -.PHONEY: assembly - -INTEL_SOURCES := \ - bitmap_ops_avx2_amd64.s bitmap_ops_sse4_amd64.s - -# -# ARROW-15336: DO NOT add the assembly target for Arm64 (ARM_SOURCES) until c2goasm added the Arm64 support. -# min_max_neon_arm64.s was generated by asm2plan9s. -# And manually formatted it as the Arm64 Plan9. -# - -assembly: $(INTEL_SOURCES) - -_lib/bitmap_ops_avx2_amd64.s: _lib/bitmap_ops.c - $(CC) -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -_lib/bitmap_ops_sse4_amd64.s: _lib/bitmap_ops.c - $(CC) -S $(C_FLAGS) $(ASM_FLAGS_SSE4) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -bitmap_ops_avx2_amd64.s: _lib/bitmap_ops_avx2_amd64.s - $(C2GOASM) -a -f $^ $@ - -bitmap_ops_sse4_amd64.s: _lib/bitmap_ops_sse4_amd64.s - $(C2GOASM) -a -f $^ $@ - -clean: - rm -f $(INTEL_SOURCES) - rm -f $(addprefix _lib/,$(INTEL_SOURCES)) diff --git a/go/arrow/bitutil/_lib/bitmap_ops.c b/go/arrow/bitutil/_lib/bitmap_ops.c deleted file mode 100644 index f48b4d4d821cb..0000000000000 --- a/go/arrow/bitutil/_lib/bitmap_ops.c +++ /dev/null @@ -1,46 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "../../../internal/utils/_lib/arch.h" -#include - -// like elsewhere in this repo, this .c file gets compiled into optimized -// assembly and then converted to go plan9 assembly via c2goasm so we can -// call these functions. see the Makefile in the parent directory. - -void FULL_NAME(bitmap_aligned_and)(const uint8_t* left, const uint8_t* right, uint8_t* out, const int64_t nbytes) { - for (int64_t i = 0; i < nbytes; ++i) { - out[i] = left[i] & right[i]; - } -} - -void FULL_NAME(bitmap_aligned_or)(const uint8_t* left, const uint8_t* right, uint8_t* out, const int64_t nbytes) { - for (int64_t i = 0; i < nbytes; ++i) { - out[i] = left[i] | right[i]; - } -} - -void FULL_NAME(bitmap_aligned_and_not)(const uint8_t* left, const uint8_t* right, uint8_t* out, const int64_t nbytes) { - for (int64_t i = 0; i < nbytes; ++i) { - out[i] = left[i] & ~right[i]; - } -} - -void FULL_NAME(bitmap_aligned_xor)(const uint8_t* left, const uint8_t* right, uint8_t* out, const int64_t nbytes) { - for (int64_t i = 0; i < nbytes; ++i) { - out[i] = left[i] ^ right[i]; - } -} diff --git a/go/arrow/bitutil/_lib/bitmap_ops_avx2_amd64.s b/go/arrow/bitutil/_lib/bitmap_ops_avx2_amd64.s deleted file mode 100644 index a4010dab55b25..0000000000000 --- a/go/arrow/bitutil/_lib/bitmap_ops_avx2_amd64.s +++ /dev/null @@ -1,410 +0,0 @@ - .text - .intel_syntax noprefix - .file "bitmap_ops.c" - .globl bitmap_aligned_and_avx2 # -- Begin function bitmap_aligned_and_avx2 - .p2align 4, 0x90 - .type bitmap_aligned_and_avx2,@function -bitmap_aligned_and_avx2: # @bitmap_aligned_and_avx2 -# %bb.0: - push rbp - mov rbp, rsp - push rbx - and rsp, -8 - test rcx, rcx - jle .LBB0_12 -# %bb.1: - cmp rcx, 127 - ja .LBB0_7 -# %bb.2: - xor r10d, r10d - jmp .LBB0_3 -.LBB0_7: - lea r9, [rdx + rcx] - lea rax, [rdi + rcx] - cmp rax, rdx - seta r11b - lea rax, [rsi + rcx] - cmp r9, rdi - seta bl - cmp rax, rdx - seta r8b - cmp r9, rsi - seta r9b - xor r10d, r10d - test r11b, bl - jne .LBB0_3 -# %bb.8: - and r8b, r9b - jne .LBB0_3 -# %bb.9: - mov r10, rcx - and r10, -128 - xor r8d, r8d - .p2align 4, 0x90 -.LBB0_10: # =>This Inner Loop Header: Depth=1 - vmovups ymm0, ymmword ptr [rsi + r8] - vmovups ymm1, ymmword ptr [rsi + r8 + 32] - vmovups ymm2, ymmword ptr [rsi + r8 + 64] - vmovups ymm3, ymmword ptr [rsi + r8 + 96] - vandps ymm0, ymm0, ymmword ptr [rdi + r8] - vandps ymm1, ymm1, ymmword ptr [rdi + r8 + 32] - vandps ymm2, ymm2, ymmword ptr [rdi + r8 + 64] - vandps ymm3, ymm3, ymmword ptr [rdi + r8 + 96] - vmovups ymmword ptr [rdx + r8], ymm0 - vmovups ymmword ptr [rdx + r8 + 32], ymm1 - vmovups ymmword ptr [rdx + r8 + 64], ymm2 - vmovups ymmword ptr [rdx + r8 + 96], ymm3 - sub r8, -128 - cmp r10, r8 - jne .LBB0_10 -# %bb.11: - cmp r10, rcx - je .LBB0_12 -.LBB0_3: - mov r8, r10 - not r8 - add r8, rcx - mov r9, rcx - and r9, 3 - je .LBB0_5 - .p2align 4, 0x90 -.LBB0_4: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r10] - and al, byte ptr [rdi + r10] - mov byte ptr [rdx + r10], al - add r10, 1 - add r9, -1 - jne .LBB0_4 -.LBB0_5: - cmp r8, 3 - jb .LBB0_12 - .p2align 4, 0x90 -.LBB0_6: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r10] - and al, byte ptr [rdi + r10] - mov byte ptr [rdx + r10], al - movzx eax, byte ptr [rsi + r10 + 1] - and al, byte ptr [rdi + r10 + 1] - mov byte ptr [rdx + r10 + 1], al - movzx eax, byte ptr [rsi + r10 + 2] - and al, byte ptr [rdi + r10 + 2] - mov byte ptr [rdx + r10 + 2], al - movzx eax, byte ptr [rsi + r10 + 3] - and al, byte ptr [rdi + r10 + 3] - mov byte ptr [rdx + r10 + 3], al - add r10, 4 - cmp rcx, r10 - jne .LBB0_6 -.LBB0_12: - lea rsp, [rbp - 8] - pop rbx - pop rbp - vzeroupper - ret -.Lfunc_end0: - .size bitmap_aligned_and_avx2, .Lfunc_end0-bitmap_aligned_and_avx2 - # -- End function - .globl bitmap_aligned_or_avx2 # -- Begin function bitmap_aligned_or_avx2 - .p2align 4, 0x90 - .type bitmap_aligned_or_avx2,@function -bitmap_aligned_or_avx2: # @bitmap_aligned_or_avx2 -# %bb.0: - push rbp - mov rbp, rsp - push rbx - and rsp, -8 - test rcx, rcx - jle .LBB1_12 -# %bb.1: - cmp rcx, 127 - ja .LBB1_7 -# %bb.2: - xor r10d, r10d - jmp .LBB1_3 -.LBB1_7: - lea r9, [rdx + rcx] - lea rax, [rdi + rcx] - cmp rax, rdx - seta r11b - lea rax, [rsi + rcx] - cmp r9, rdi - seta bl - cmp rax, rdx - seta r8b - cmp r9, rsi - seta r9b - xor r10d, r10d - test r11b, bl - jne .LBB1_3 -# %bb.8: - and r8b, r9b - jne .LBB1_3 -# %bb.9: - mov r10, rcx - and r10, -128 - xor r8d, r8d - .p2align 4, 0x90 -.LBB1_10: # =>This Inner Loop Header: Depth=1 - vmovups ymm0, ymmword ptr [rsi + r8] - vmovups ymm1, ymmword ptr [rsi + r8 + 32] - vmovups ymm2, ymmword ptr [rsi + r8 + 64] - vmovups ymm3, ymmword ptr [rsi + r8 + 96] - vorps ymm0, ymm0, ymmword ptr [rdi + r8] - vorps ymm1, ymm1, ymmword ptr [rdi + r8 + 32] - vorps ymm2, ymm2, ymmword ptr [rdi + r8 + 64] - vorps ymm3, ymm3, ymmword ptr [rdi + r8 + 96] - vmovups ymmword ptr [rdx + r8], ymm0 - vmovups ymmword ptr [rdx + r8 + 32], ymm1 - vmovups ymmword ptr [rdx + r8 + 64], ymm2 - vmovups ymmword ptr [rdx + r8 + 96], ymm3 - sub r8, -128 - cmp r10, r8 - jne .LBB1_10 -# %bb.11: - cmp r10, rcx - je .LBB1_12 -.LBB1_3: - mov r8, r10 - not r8 - add r8, rcx - mov r9, rcx - and r9, 3 - je .LBB1_5 - .p2align 4, 0x90 -.LBB1_4: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r10] - or al, byte ptr [rdi + r10] - mov byte ptr [rdx + r10], al - add r10, 1 - add r9, -1 - jne .LBB1_4 -.LBB1_5: - cmp r8, 3 - jb .LBB1_12 - .p2align 4, 0x90 -.LBB1_6: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r10] - or al, byte ptr [rdi + r10] - mov byte ptr [rdx + r10], al - movzx eax, byte ptr [rsi + r10 + 1] - or al, byte ptr [rdi + r10 + 1] - mov byte ptr [rdx + r10 + 1], al - movzx eax, byte ptr [rsi + r10 + 2] - or al, byte ptr [rdi + r10 + 2] - mov byte ptr [rdx + r10 + 2], al - movzx eax, byte ptr [rsi + r10 + 3] - or al, byte ptr [rdi + r10 + 3] - mov byte ptr [rdx + r10 + 3], al - add r10, 4 - cmp rcx, r10 - jne .LBB1_6 -.LBB1_12: - lea rsp, [rbp - 8] - pop rbx - pop rbp - vzeroupper - ret -.Lfunc_end1: - .size bitmap_aligned_or_avx2, .Lfunc_end1-bitmap_aligned_or_avx2 - # -- End function - .globl bitmap_aligned_and_not_avx2 # -- Begin function bitmap_aligned_and_not_avx2 - .p2align 4, 0x90 - .type bitmap_aligned_and_not_avx2,@function -bitmap_aligned_and_not_avx2: # @bitmap_aligned_and_not_avx2 -# %bb.0: - push rbp - mov rbp, rsp - push rbx - and rsp, -8 - test rcx, rcx - jle .LBB2_12 -# %bb.1: - cmp rcx, 127 - ja .LBB2_7 -# %bb.2: - xor r8d, r8d - jmp .LBB2_3 -.LBB2_7: - lea r8, [rdx + rcx] - lea rax, [rdi + rcx] - cmp rax, rdx - seta r11b - lea rax, [rsi + rcx] - cmp r8, rdi - seta bl - cmp rax, rdx - seta r10b - cmp r8, rsi - seta r9b - xor r8d, r8d - test r11b, bl - jne .LBB2_3 -# %bb.8: - and r10b, r9b - jne .LBB2_3 -# %bb.9: - mov r8, rcx - and r8, -128 - xor eax, eax - .p2align 4, 0x90 -.LBB2_10: # =>This Inner Loop Header: Depth=1 - vmovups ymm0, ymmword ptr [rsi + rax] - vmovups ymm1, ymmword ptr [rsi + rax + 32] - vmovups ymm2, ymmword ptr [rsi + rax + 64] - vmovups ymm3, ymmword ptr [rsi + rax + 96] - vandnps ymm0, ymm0, ymmword ptr [rdi + rax] - vandnps ymm1, ymm1, ymmword ptr [rdi + rax + 32] - vandnps ymm2, ymm2, ymmword ptr [rdi + rax + 64] - vandnps ymm3, ymm3, ymmword ptr [rdi + rax + 96] - vmovups ymmword ptr [rdx + rax], ymm0 - vmovups ymmword ptr [rdx + rax + 32], ymm1 - vmovups ymmword ptr [rdx + rax + 64], ymm2 - vmovups ymmword ptr [rdx + rax + 96], ymm3 - sub rax, -128 - cmp r8, rax - jne .LBB2_10 -# %bb.11: - cmp r8, rcx - je .LBB2_12 -.LBB2_3: - mov r9, r8 - not r9 - test cl, 1 - je .LBB2_5 -# %bb.4: - mov al, byte ptr [rsi + r8] - not al - and al, byte ptr [rdi + r8] - mov byte ptr [rdx + r8], al - or r8, 1 -.LBB2_5: - add r9, rcx - je .LBB2_12 - .p2align 4, 0x90 -.LBB2_6: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r8] - not al - and al, byte ptr [rdi + r8] - mov byte ptr [rdx + r8], al - movzx eax, byte ptr [rsi + r8 + 1] - not al - and al, byte ptr [rdi + r8 + 1] - mov byte ptr [rdx + r8 + 1], al - add r8, 2 - cmp rcx, r8 - jne .LBB2_6 -.LBB2_12: - lea rsp, [rbp - 8] - pop rbx - pop rbp - vzeroupper - ret -.Lfunc_end2: - .size bitmap_aligned_and_not_avx2, .Lfunc_end2-bitmap_aligned_and_not_avx2 - # -- End function - .globl bitmap_aligned_xor_avx2 # -- Begin function bitmap_aligned_xor_avx2 - .p2align 4, 0x90 - .type bitmap_aligned_xor_avx2,@function -bitmap_aligned_xor_avx2: # @bitmap_aligned_xor_avx2 -# %bb.0: - push rbp - mov rbp, rsp - push rbx - and rsp, -8 - test rcx, rcx - jle .LBB3_12 -# %bb.1: - cmp rcx, 127 - ja .LBB3_7 -# %bb.2: - xor r10d, r10d - jmp .LBB3_3 -.LBB3_7: - lea r9, [rdx + rcx] - lea rax, [rdi + rcx] - cmp rax, rdx - seta r11b - lea rax, [rsi + rcx] - cmp r9, rdi - seta bl - cmp rax, rdx - seta r8b - cmp r9, rsi - seta r9b - xor r10d, r10d - test r11b, bl - jne .LBB3_3 -# %bb.8: - and r8b, r9b - jne .LBB3_3 -# %bb.9: - mov r10, rcx - and r10, -128 - xor r8d, r8d - .p2align 4, 0x90 -.LBB3_10: # =>This Inner Loop Header: Depth=1 - vmovups ymm0, ymmword ptr [rsi + r8] - vmovups ymm1, ymmword ptr [rsi + r8 + 32] - vmovups ymm2, ymmword ptr [rsi + r8 + 64] - vmovups ymm3, ymmword ptr [rsi + r8 + 96] - vxorps ymm0, ymm0, ymmword ptr [rdi + r8] - vxorps ymm1, ymm1, ymmword ptr [rdi + r8 + 32] - vxorps ymm2, ymm2, ymmword ptr [rdi + r8 + 64] - vxorps ymm3, ymm3, ymmword ptr [rdi + r8 + 96] - vmovups ymmword ptr [rdx + r8], ymm0 - vmovups ymmword ptr [rdx + r8 + 32], ymm1 - vmovups ymmword ptr [rdx + r8 + 64], ymm2 - vmovups ymmword ptr [rdx + r8 + 96], ymm3 - sub r8, -128 - cmp r10, r8 - jne .LBB3_10 -# %bb.11: - cmp r10, rcx - je .LBB3_12 -.LBB3_3: - mov r8, r10 - not r8 - add r8, rcx - mov r9, rcx - and r9, 3 - je .LBB3_5 - .p2align 4, 0x90 -.LBB3_4: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r10] - xor al, byte ptr [rdi + r10] - mov byte ptr [rdx + r10], al - add r10, 1 - add r9, -1 - jne .LBB3_4 -.LBB3_5: - cmp r8, 3 - jb .LBB3_12 - .p2align 4, 0x90 -.LBB3_6: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r10] - xor al, byte ptr [rdi + r10] - mov byte ptr [rdx + r10], al - movzx eax, byte ptr [rsi + r10 + 1] - xor al, byte ptr [rdi + r10 + 1] - mov byte ptr [rdx + r10 + 1], al - movzx eax, byte ptr [rsi + r10 + 2] - xor al, byte ptr [rdi + r10 + 2] - mov byte ptr [rdx + r10 + 2], al - movzx eax, byte ptr [rsi + r10 + 3] - xor al, byte ptr [rdi + r10 + 3] - mov byte ptr [rdx + r10 + 3], al - add r10, 4 - cmp rcx, r10 - jne .LBB3_6 -.LBB3_12: - lea rsp, [rbp - 8] - pop rbx - pop rbp - vzeroupper - ret -.Lfunc_end3: - .size bitmap_aligned_xor_avx2, .Lfunc_end3-bitmap_aligned_xor_avx2 - # -- End function - .ident "Ubuntu clang version 11.1.0-6" - .section ".note.GNU-stack","",@progbits - .addrsig diff --git a/go/arrow/bitutil/_lib/bitmap_ops_sse4_amd64.s b/go/arrow/bitutil/_lib/bitmap_ops_sse4_amd64.s deleted file mode 100644 index 840c1a623bb1b..0000000000000 --- a/go/arrow/bitutil/_lib/bitmap_ops_sse4_amd64.s +++ /dev/null @@ -1,530 +0,0 @@ - .text - .intel_syntax noprefix - .file "bitmap_ops.c" - .globl bitmap_aligned_and_sse4 # -- Begin function bitmap_aligned_and_sse4 - .p2align 4, 0x90 - .type bitmap_aligned_and_sse4,@function -bitmap_aligned_and_sse4: # @bitmap_aligned_and_sse4 -# %bb.0: - push rbp - mov rbp, rsp - push rbx - and rsp, -8 - test rcx, rcx - jle .LBB0_16 -# %bb.1: - cmp rcx, 31 - ja .LBB0_7 -# %bb.2: - xor r11d, r11d -.LBB0_3: - mov r8, r11 - not r8 - add r8, rcx - mov r9, rcx - and r9, 3 - je .LBB0_5 - .p2align 4, 0x90 -.LBB0_4: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r11] - and al, byte ptr [rdi + r11] - mov byte ptr [rdx + r11], al - add r11, 1 - add r9, -1 - jne .LBB0_4 -.LBB0_5: - cmp r8, 3 - jb .LBB0_16 - .p2align 4, 0x90 -.LBB0_6: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r11] - and al, byte ptr [rdi + r11] - mov byte ptr [rdx + r11], al - movzx eax, byte ptr [rsi + r11 + 1] - and al, byte ptr [rdi + r11 + 1] - mov byte ptr [rdx + r11 + 1], al - movzx eax, byte ptr [rsi + r11 + 2] - and al, byte ptr [rdi + r11 + 2] - mov byte ptr [rdx + r11 + 2], al - movzx eax, byte ptr [rsi + r11 + 3] - and al, byte ptr [rdi + r11 + 3] - mov byte ptr [rdx + r11 + 3], al - add r11, 4 - cmp rcx, r11 - jne .LBB0_6 - jmp .LBB0_16 -.LBB0_7: - lea r9, [rdx + rcx] - lea rax, [rdi + rcx] - cmp rax, rdx - seta r10b - lea rax, [rsi + rcx] - cmp r9, rdi - seta bl - cmp rax, rdx - seta r8b - cmp r9, rsi - seta r9b - xor r11d, r11d - test r10b, bl - jne .LBB0_3 -# %bb.8: - and r8b, r9b - jne .LBB0_3 -# %bb.9: - mov r11, rcx - and r11, -32 - lea rax, [r11 - 32] - mov r9, rax - shr r9, 5 - add r9, 1 - test rax, rax - je .LBB0_10 -# %bb.11: - mov r10, r9 - and r10, -2 - neg r10 - xor r8d, r8d - .p2align 4, 0x90 -.LBB0_12: # =>This Inner Loop Header: Depth=1 - movups xmm0, xmmword ptr [rdi + r8] - movups xmm1, xmmword ptr [rdi + r8 + 16] - movups xmm2, xmmword ptr [rsi + r8] - andps xmm2, xmm0 - movups xmm0, xmmword ptr [rsi + r8 + 16] - andps xmm0, xmm1 - movups xmmword ptr [rdx + r8], xmm2 - movups xmmword ptr [rdx + r8 + 16], xmm0 - movups xmm0, xmmword ptr [rdi + r8 + 32] - movups xmm1, xmmword ptr [rdi + r8 + 48] - movups xmm2, xmmword ptr [rsi + r8 + 32] - andps xmm2, xmm0 - movups xmm0, xmmword ptr [rsi + r8 + 48] - andps xmm0, xmm1 - movups xmmword ptr [rdx + r8 + 32], xmm2 - movups xmmword ptr [rdx + r8 + 48], xmm0 - add r8, 64 - add r10, 2 - jne .LBB0_12 -# %bb.13: - test r9b, 1 - je .LBB0_15 -.LBB0_14: - movups xmm0, xmmword ptr [rdi + r8] - movups xmm1, xmmword ptr [rdi + r8 + 16] - movups xmm2, xmmword ptr [rsi + r8] - andps xmm2, xmm0 - movups xmm0, xmmword ptr [rsi + r8 + 16] - andps xmm0, xmm1 - movups xmmword ptr [rdx + r8], xmm2 - movups xmmword ptr [rdx + r8 + 16], xmm0 -.LBB0_15: - cmp r11, rcx - jne .LBB0_3 -.LBB0_16: - lea rsp, [rbp - 8] - pop rbx - pop rbp - ret -.LBB0_10: - xor r8d, r8d - test r9b, 1 - jne .LBB0_14 - jmp .LBB0_15 -.Lfunc_end0: - .size bitmap_aligned_and_sse4, .Lfunc_end0-bitmap_aligned_and_sse4 - # -- End function - .globl bitmap_aligned_or_sse4 # -- Begin function bitmap_aligned_or_sse4 - .p2align 4, 0x90 - .type bitmap_aligned_or_sse4,@function -bitmap_aligned_or_sse4: # @bitmap_aligned_or_sse4 -# %bb.0: - push rbp - mov rbp, rsp - push rbx - and rsp, -8 - test rcx, rcx - jle .LBB1_16 -# %bb.1: - cmp rcx, 31 - ja .LBB1_7 -# %bb.2: - xor r11d, r11d -.LBB1_3: - mov r8, r11 - not r8 - add r8, rcx - mov r9, rcx - and r9, 3 - je .LBB1_5 - .p2align 4, 0x90 -.LBB1_4: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r11] - or al, byte ptr [rdi + r11] - mov byte ptr [rdx + r11], al - add r11, 1 - add r9, -1 - jne .LBB1_4 -.LBB1_5: - cmp r8, 3 - jb .LBB1_16 - .p2align 4, 0x90 -.LBB1_6: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r11] - or al, byte ptr [rdi + r11] - mov byte ptr [rdx + r11], al - movzx eax, byte ptr [rsi + r11 + 1] - or al, byte ptr [rdi + r11 + 1] - mov byte ptr [rdx + r11 + 1], al - movzx eax, byte ptr [rsi + r11 + 2] - or al, byte ptr [rdi + r11 + 2] - mov byte ptr [rdx + r11 + 2], al - movzx eax, byte ptr [rsi + r11 + 3] - or al, byte ptr [rdi + r11 + 3] - mov byte ptr [rdx + r11 + 3], al - add r11, 4 - cmp rcx, r11 - jne .LBB1_6 - jmp .LBB1_16 -.LBB1_7: - lea r9, [rdx + rcx] - lea rax, [rdi + rcx] - cmp rax, rdx - seta r10b - lea rax, [rsi + rcx] - cmp r9, rdi - seta bl - cmp rax, rdx - seta r8b - cmp r9, rsi - seta r9b - xor r11d, r11d - test r10b, bl - jne .LBB1_3 -# %bb.8: - and r8b, r9b - jne .LBB1_3 -# %bb.9: - mov r11, rcx - and r11, -32 - lea rax, [r11 - 32] - mov r9, rax - shr r9, 5 - add r9, 1 - test rax, rax - je .LBB1_10 -# %bb.11: - mov r10, r9 - and r10, -2 - neg r10 - xor r8d, r8d - .p2align 4, 0x90 -.LBB1_12: # =>This Inner Loop Header: Depth=1 - movups xmm0, xmmword ptr [rdi + r8] - movups xmm1, xmmword ptr [rdi + r8 + 16] - movups xmm2, xmmword ptr [rsi + r8] - orps xmm2, xmm0 - movups xmm0, xmmword ptr [rsi + r8 + 16] - orps xmm0, xmm1 - movups xmmword ptr [rdx + r8], xmm2 - movups xmmword ptr [rdx + r8 + 16], xmm0 - movups xmm0, xmmword ptr [rdi + r8 + 32] - movups xmm1, xmmword ptr [rdi + r8 + 48] - movups xmm2, xmmword ptr [rsi + r8 + 32] - orps xmm2, xmm0 - movups xmm0, xmmword ptr [rsi + r8 + 48] - orps xmm0, xmm1 - movups xmmword ptr [rdx + r8 + 32], xmm2 - movups xmmword ptr [rdx + r8 + 48], xmm0 - add r8, 64 - add r10, 2 - jne .LBB1_12 -# %bb.13: - test r9b, 1 - je .LBB1_15 -.LBB1_14: - movups xmm0, xmmword ptr [rdi + r8] - movups xmm1, xmmword ptr [rdi + r8 + 16] - movups xmm2, xmmword ptr [rsi + r8] - orps xmm2, xmm0 - movups xmm0, xmmword ptr [rsi + r8 + 16] - orps xmm0, xmm1 - movups xmmword ptr [rdx + r8], xmm2 - movups xmmword ptr [rdx + r8 + 16], xmm0 -.LBB1_15: - cmp r11, rcx - jne .LBB1_3 -.LBB1_16: - lea rsp, [rbp - 8] - pop rbx - pop rbp - ret -.LBB1_10: - xor r8d, r8d - test r9b, 1 - jne .LBB1_14 - jmp .LBB1_15 -.Lfunc_end1: - .size bitmap_aligned_or_sse4, .Lfunc_end1-bitmap_aligned_or_sse4 - # -- End function - .globl bitmap_aligned_and_not_sse4 # -- Begin function bitmap_aligned_and_not_sse4 - .p2align 4, 0x90 - .type bitmap_aligned_and_not_sse4,@function -bitmap_aligned_and_not_sse4: # @bitmap_aligned_and_not_sse4 -# %bb.0: - push rbp - mov rbp, rsp - push rbx - and rsp, -8 - test rcx, rcx - jle .LBB2_16 -# %bb.1: - cmp rcx, 31 - ja .LBB2_7 -# %bb.2: - xor r11d, r11d -.LBB2_3: - mov r8, r11 - not r8 - test cl, 1 - je .LBB2_5 -# %bb.4: - mov al, byte ptr [rsi + r11] - not al - and al, byte ptr [rdi + r11] - mov byte ptr [rdx + r11], al - or r11, 1 -.LBB2_5: - add r8, rcx - je .LBB2_16 - .p2align 4, 0x90 -.LBB2_6: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r11] - not al - and al, byte ptr [rdi + r11] - mov byte ptr [rdx + r11], al - movzx eax, byte ptr [rsi + r11 + 1] - not al - and al, byte ptr [rdi + r11 + 1] - mov byte ptr [rdx + r11 + 1], al - add r11, 2 - cmp rcx, r11 - jne .LBB2_6 - jmp .LBB2_16 -.LBB2_7: - lea r9, [rdx + rcx] - lea rax, [rdi + rcx] - cmp rax, rdx - seta r10b - lea rax, [rsi + rcx] - cmp r9, rdi - seta bl - cmp rax, rdx - seta r8b - cmp r9, rsi - seta r9b - xor r11d, r11d - test r10b, bl - jne .LBB2_3 -# %bb.8: - and r8b, r9b - jne .LBB2_3 -# %bb.9: - mov r11, rcx - and r11, -32 - lea rax, [r11 - 32] - mov r9, rax - shr r9, 5 - add r9, 1 - test rax, rax - je .LBB2_10 -# %bb.11: - mov r10, r9 - and r10, -2 - neg r10 - xor r8d, r8d - .p2align 4, 0x90 -.LBB2_12: # =>This Inner Loop Header: Depth=1 - movups xmm0, xmmword ptr [rdi + r8] - movups xmm1, xmmword ptr [rdi + r8 + 16] - movups xmm2, xmmword ptr [rsi + r8] - andnps xmm2, xmm0 - movups xmm0, xmmword ptr [rsi + r8 + 16] - andnps xmm0, xmm1 - movups xmmword ptr [rdx + r8], xmm2 - movups xmmword ptr [rdx + r8 + 16], xmm0 - movups xmm0, xmmword ptr [rdi + r8 + 32] - movups xmm1, xmmword ptr [rdi + r8 + 48] - movups xmm2, xmmword ptr [rsi + r8 + 32] - andnps xmm2, xmm0 - movups xmm0, xmmword ptr [rsi + r8 + 48] - andnps xmm0, xmm1 - movups xmmword ptr [rdx + r8 + 32], xmm2 - movups xmmword ptr [rdx + r8 + 48], xmm0 - add r8, 64 - add r10, 2 - jne .LBB2_12 -# %bb.13: - test r9b, 1 - je .LBB2_15 -.LBB2_14: - movups xmm0, xmmword ptr [rdi + r8] - movups xmm1, xmmword ptr [rdi + r8 + 16] - movups xmm2, xmmword ptr [rsi + r8] - andnps xmm2, xmm0 - movups xmm0, xmmword ptr [rsi + r8 + 16] - andnps xmm0, xmm1 - movups xmmword ptr [rdx + r8], xmm2 - movups xmmword ptr [rdx + r8 + 16], xmm0 -.LBB2_15: - cmp r11, rcx - jne .LBB2_3 -.LBB2_16: - lea rsp, [rbp - 8] - pop rbx - pop rbp - ret -.LBB2_10: - xor r8d, r8d - test r9b, 1 - jne .LBB2_14 - jmp .LBB2_15 -.Lfunc_end2: - .size bitmap_aligned_and_not_sse4, .Lfunc_end2-bitmap_aligned_and_not_sse4 - # -- End function - .globl bitmap_aligned_xor_sse4 # -- Begin function bitmap_aligned_xor_sse4 - .p2align 4, 0x90 - .type bitmap_aligned_xor_sse4,@function -bitmap_aligned_xor_sse4: # @bitmap_aligned_xor_sse4 -# %bb.0: - push rbp - mov rbp, rsp - push rbx - and rsp, -8 - test rcx, rcx - jle .LBB3_16 -# %bb.1: - cmp rcx, 31 - ja .LBB3_7 -# %bb.2: - xor r11d, r11d -.LBB3_3: - mov r8, r11 - not r8 - add r8, rcx - mov r9, rcx - and r9, 3 - je .LBB3_5 - .p2align 4, 0x90 -.LBB3_4: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r11] - xor al, byte ptr [rdi + r11] - mov byte ptr [rdx + r11], al - add r11, 1 - add r9, -1 - jne .LBB3_4 -.LBB3_5: - cmp r8, 3 - jb .LBB3_16 - .p2align 4, 0x90 -.LBB3_6: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r11] - xor al, byte ptr [rdi + r11] - mov byte ptr [rdx + r11], al - movzx eax, byte ptr [rsi + r11 + 1] - xor al, byte ptr [rdi + r11 + 1] - mov byte ptr [rdx + r11 + 1], al - movzx eax, byte ptr [rsi + r11 + 2] - xor al, byte ptr [rdi + r11 + 2] - mov byte ptr [rdx + r11 + 2], al - movzx eax, byte ptr [rsi + r11 + 3] - xor al, byte ptr [rdi + r11 + 3] - mov byte ptr [rdx + r11 + 3], al - add r11, 4 - cmp rcx, r11 - jne .LBB3_6 - jmp .LBB3_16 -.LBB3_7: - lea r9, [rdx + rcx] - lea rax, [rdi + rcx] - cmp rax, rdx - seta r10b - lea rax, [rsi + rcx] - cmp r9, rdi - seta bl - cmp rax, rdx - seta r8b - cmp r9, rsi - seta r9b - xor r11d, r11d - test r10b, bl - jne .LBB3_3 -# %bb.8: - and r8b, r9b - jne .LBB3_3 -# %bb.9: - mov r11, rcx - and r11, -32 - lea rax, [r11 - 32] - mov r9, rax - shr r9, 5 - add r9, 1 - test rax, rax - je .LBB3_10 -# %bb.11: - mov r10, r9 - and r10, -2 - neg r10 - xor r8d, r8d - .p2align 4, 0x90 -.LBB3_12: # =>This Inner Loop Header: Depth=1 - movups xmm0, xmmword ptr [rdi + r8] - movups xmm1, xmmword ptr [rdi + r8 + 16] - movups xmm2, xmmword ptr [rsi + r8] - xorps xmm2, xmm0 - movups xmm0, xmmword ptr [rsi + r8 + 16] - xorps xmm0, xmm1 - movups xmmword ptr [rdx + r8], xmm2 - movups xmmword ptr [rdx + r8 + 16], xmm0 - movups xmm0, xmmword ptr [rdi + r8 + 32] - movups xmm1, xmmword ptr [rdi + r8 + 48] - movups xmm2, xmmword ptr [rsi + r8 + 32] - xorps xmm2, xmm0 - movups xmm0, xmmword ptr [rsi + r8 + 48] - xorps xmm0, xmm1 - movups xmmword ptr [rdx + r8 + 32], xmm2 - movups xmmword ptr [rdx + r8 + 48], xmm0 - add r8, 64 - add r10, 2 - jne .LBB3_12 -# %bb.13: - test r9b, 1 - je .LBB3_15 -.LBB3_14: - movups xmm0, xmmword ptr [rdi + r8] - movups xmm1, xmmword ptr [rdi + r8 + 16] - movups xmm2, xmmword ptr [rsi + r8] - xorps xmm2, xmm0 - movups xmm0, xmmword ptr [rsi + r8 + 16] - xorps xmm0, xmm1 - movups xmmword ptr [rdx + r8], xmm2 - movups xmmword ptr [rdx + r8 + 16], xmm0 -.LBB3_15: - cmp r11, rcx - jne .LBB3_3 -.LBB3_16: - lea rsp, [rbp - 8] - pop rbx - pop rbp - ret -.LBB3_10: - xor r8d, r8d - test r9b, 1 - jne .LBB3_14 - jmp .LBB3_15 -.Lfunc_end3: - .size bitmap_aligned_xor_sse4, .Lfunc_end3-bitmap_aligned_xor_sse4 - # -- End function - .ident "Ubuntu clang version 11.1.0-6" - .section ".note.GNU-stack","",@progbits - .addrsig diff --git a/go/arrow/bitutil/bitmap_ops.go b/go/arrow/bitutil/bitmap_ops.go deleted file mode 100644 index 7db750a6dd937..0000000000000 --- a/go/arrow/bitutil/bitmap_ops.go +++ /dev/null @@ -1,109 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package bitutil - -func alignedBitAndGo(left, right, out []byte) { - var ( - nbytes = len(out) - i = 0 - ) - if nbytes > uint64SizeBytes { - // case where we have enough bytes to operate on words - leftWords := bytesToUint64(left[i:]) - rightWords := bytesToUint64(right[i:]) - outWords := bytesToUint64(out[i:]) - - for w := range outWords { - outWords[w] = leftWords[w] & rightWords[w] - } - - i += len(outWords) * uint64SizeBytes - } - // grab any remaining bytes that were fewer than a word - for ; i < nbytes; i++ { - out[i] = left[i] & right[i] - } -} - -func alignedBitAndNotGo(left, right, out []byte) { - var ( - nbytes = len(out) - i = 0 - ) - if nbytes > uint64SizeBytes { - // case where we have enough bytes to operate on words - leftWords := bytesToUint64(left[i:]) - rightWords := bytesToUint64(right[i:]) - outWords := bytesToUint64(out[i:]) - - for w := range outWords { - outWords[w] = leftWords[w] &^ rightWords[w] - } - - i += len(outWords) * uint64SizeBytes - } - // grab any remaining bytes that were fewer than a word - for ; i < nbytes; i++ { - out[i] = left[i] &^ right[i] - } -} - -func alignedBitOrGo(left, right, out []byte) { - var ( - nbytes = len(out) - i = 0 - ) - if nbytes > uint64SizeBytes { - // case where we have enough bytes to operate on words - leftWords := bytesToUint64(left[i:]) - rightWords := bytesToUint64(right[i:]) - outWords := bytesToUint64(out[i:]) - - for w := range outWords { - outWords[w] = leftWords[w] | rightWords[w] - } - - i += len(outWords) * uint64SizeBytes - } - // grab any remaining bytes that were fewer than a word - for ; i < nbytes; i++ { - out[i] = left[i] | right[i] - } -} - -func alignedBitXorGo(left, right, out []byte) { - var ( - nbytes = len(out) - i = 0 - ) - if nbytes > uint64SizeBytes { - // case where we have enough bytes to operate on words - leftWords := bytesToUint64(left[i:]) - rightWords := bytesToUint64(right[i:]) - outWords := bytesToUint64(out[i:]) - - for w := range outWords { - outWords[w] = leftWords[w] ^ rightWords[w] - } - - i += len(outWords) * uint64SizeBytes - } - // grab any remaining bytes that were fewer than a word - for ; i < nbytes; i++ { - out[i] = left[i] ^ right[i] - } -} diff --git a/go/arrow/bitutil/bitmap_ops_amd64.go b/go/arrow/bitutil/bitmap_ops_amd64.go deleted file mode 100644 index ad0fd674ab9b7..0000000000000 --- a/go/arrow/bitutil/bitmap_ops_amd64.go +++ /dev/null @@ -1,41 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm -// +build !noasm - -package bitutil - -import "golang.org/x/sys/cpu" - -func init() { - if cpu.X86.HasAVX2 { - bitAndOp.opAligned = bitmapAlignedAndAVX2 - bitOrOp.opAligned = bitmapAlignedOrAVX2 - bitAndNotOp.opAligned = bitmapAlignedAndNotAVX2 - bitXorOp.opAligned = bitmapAlignedXorAVX2 - } else if cpu.X86.HasSSE42 { - bitAndOp.opAligned = bitmapAlignedAndSSE4 - bitOrOp.opAligned = bitmapAlignedOrSSE4 - bitAndNotOp.opAligned = bitmapAlignedAndNotSSE4 - bitXorOp.opAligned = bitmapAlignedXorSSE4 - } else { - bitAndOp.opAligned = alignedBitAndGo - bitOrOp.opAligned = alignedBitOrGo - bitAndNotOp.opAligned = alignedBitAndNotGo - bitXorOp.opAligned = alignedBitXorGo - } -} diff --git a/go/arrow/bitutil/bitmap_ops_arm64.go b/go/arrow/bitutil/bitmap_ops_arm64.go deleted file mode 100644 index 28d95d84ade2d..0000000000000 --- a/go/arrow/bitutil/bitmap_ops_arm64.go +++ /dev/null @@ -1,27 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm -// +build !noasm - -package bitutil - -func init() { - bitAndOp.opAligned = alignedBitAndGo - bitOrOp.opAligned = alignedBitOrGo - bitAndNotOp.opAligned = alignedBitAndNotGo - bitXorOp.opAligned = alignedBitXorGo -} diff --git a/go/arrow/bitutil/bitmap_ops_avx2_amd64.go b/go/arrow/bitutil/bitmap_ops_avx2_amd64.go deleted file mode 100644 index 1c01bd0f38015..0000000000000 --- a/go/arrow/bitutil/bitmap_ops_avx2_amd64.go +++ /dev/null @@ -1,52 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm -// +build !noasm - -package bitutil - -import ( - "unsafe" -) - -//go:noescape -func _bitmap_aligned_and_avx2(left, right, out unsafe.Pointer, length int64) - -func bitmapAlignedAndAVX2(left, right, out []byte) { - _bitmap_aligned_and_avx2(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) -} - -//go:noescape -func _bitmap_aligned_or_avx2(left, right, out unsafe.Pointer, length int64) - -func bitmapAlignedOrAVX2(left, right, out []byte) { - _bitmap_aligned_or_avx2(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) -} - -//go:noescape -func _bitmap_aligned_and_not_avx2(left, right, out unsafe.Pointer, length int64) - -func bitmapAlignedAndNotAVX2(left, right, out []byte) { - _bitmap_aligned_and_not_avx2(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) -} - -//go:noescape -func _bitmap_aligned_xor_avx2(left, right, out unsafe.Pointer, length int64) - -func bitmapAlignedXorAVX2(left, right, out []byte) { - _bitmap_aligned_xor_avx2(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) -} diff --git a/go/arrow/bitutil/bitmap_ops_avx2_amd64.s b/go/arrow/bitutil/bitmap_ops_avx2_amd64.s deleted file mode 100644 index 00172e865926d..0000000000000 --- a/go/arrow/bitutil/bitmap_ops_avx2_amd64.s +++ /dev/null @@ -1,373 +0,0 @@ -//+build !noasm !appengine -// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT - -TEXT ·_bitmap_aligned_and_avx2(SB), $0-32 - - MOVQ left+0(FP), DI - MOVQ right+8(FP), SI - MOVQ out+16(FP), DX - MOVQ length+24(FP), CX - - WORD $0x8548; BYTE $0xc9 // test rcx, rcx - JLE LBB0_12 - LONG $0x7ff98348 // cmp rcx, 127 - JA LBB0_7 - WORD $0x3145; BYTE $0xd2 // xor r10d, r10d - JMP LBB0_3 - -LBB0_7: - LONG $0x0a0c8d4c // lea r9, [rdx + rcx] - LONG $0x0f048d48 // lea rax, [rdi + rcx] - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd3970f41 // seta r11b - LONG $0x0e048d48 // lea rax, [rsi + rcx] - WORD $0x3949; BYTE $0xf9 // cmp r9, rdi - WORD $0x970f; BYTE $0xd3 // seta bl - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd0970f41 // seta r8b - WORD $0x3949; BYTE $0xf1 // cmp r9, rsi - LONG $0xd1970f41 // seta r9b - WORD $0x3145; BYTE $0xd2 // xor r10d, r10d - WORD $0x8441; BYTE $0xdb // test r11b, bl - JNE LBB0_3 - WORD $0x2045; BYTE $0xc8 // and r8b, r9b - JNE LBB0_3 - WORD $0x8949; BYTE $0xca // mov r10, rcx - LONG $0x80e28349 // and r10, -128 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB0_10: - LONG $0x107ca1c4; WORD $0x0604 // vmovups ymm0, yword [rsi + r8] - LONG $0x107ca1c4; WORD $0x064c; BYTE $0x20 // vmovups ymm1, yword [rsi + r8 + 32] - LONG $0x107ca1c4; WORD $0x0654; BYTE $0x40 // vmovups ymm2, yword [rsi + r8 + 64] - LONG $0x107ca1c4; WORD $0x065c; BYTE $0x60 // vmovups ymm3, yword [rsi + r8 + 96] - LONG $0x547ca1c4; WORD $0x0704 // vandps ymm0, ymm0, yword [rdi + r8] - LONG $0x5474a1c4; WORD $0x074c; BYTE $0x20 // vandps ymm1, ymm1, yword [rdi + r8 + 32] - LONG $0x546ca1c4; WORD $0x0754; BYTE $0x40 // vandps ymm2, ymm2, yword [rdi + r8 + 64] - LONG $0x5464a1c4; WORD $0x075c; BYTE $0x60 // vandps ymm3, ymm3, yword [rdi + r8 + 96] - LONG $0x117ca1c4; WORD $0x0204 // vmovups yword [rdx + r8], ymm0 - LONG $0x117ca1c4; WORD $0x024c; BYTE $0x20 // vmovups yword [rdx + r8 + 32], ymm1 - LONG $0x117ca1c4; WORD $0x0254; BYTE $0x40 // vmovups yword [rdx + r8 + 64], ymm2 - LONG $0x117ca1c4; WORD $0x025c; BYTE $0x60 // vmovups yword [rdx + r8 + 96], ymm3 - LONG $0x80e88349 // sub r8, -128 - WORD $0x394d; BYTE $0xc2 // cmp r10, r8 - JNE LBB0_10 - WORD $0x3949; BYTE $0xca // cmp r10, rcx - JE LBB0_12 - -LBB0_3: - WORD $0x894d; BYTE $0xd0 // mov r8, r10 - WORD $0xf749; BYTE $0xd0 // not r8 - WORD $0x0149; BYTE $0xc8 // add r8, rcx - WORD $0x8949; BYTE $0xc9 // mov r9, rcx - LONG $0x03e18349 // and r9, 3 - JE LBB0_5 - -LBB0_4: - LONG $0x04b60f42; BYTE $0x16 // movzx eax, byte [rsi + r10] - LONG $0x17042242 // and al, byte [rdi + r10] - LONG $0x12048842 // mov byte [rdx + r10], al - LONG $0x01c28349 // add r10, 1 - LONG $0xffc18349 // add r9, -1 - JNE LBB0_4 - -LBB0_5: - LONG $0x03f88349 // cmp r8, 3 - JB LBB0_12 - -LBB0_6: - LONG $0x04b60f42; BYTE $0x16 // movzx eax, byte [rsi + r10] - LONG $0x17042242 // and al, byte [rdi + r10] - LONG $0x12048842 // mov byte [rdx + r10], al - LONG $0x44b60f42; WORD $0x0116 // movzx eax, byte [rsi + r10 + 1] - LONG $0x17442242; BYTE $0x01 // and al, byte [rdi + r10 + 1] - LONG $0x12448842; BYTE $0x01 // mov byte [rdx + r10 + 1], al - LONG $0x44b60f42; WORD $0x0216 // movzx eax, byte [rsi + r10 + 2] - LONG $0x17442242; BYTE $0x02 // and al, byte [rdi + r10 + 2] - LONG $0x12448842; BYTE $0x02 // mov byte [rdx + r10 + 2], al - LONG $0x44b60f42; WORD $0x0316 // movzx eax, byte [rsi + r10 + 3] - LONG $0x17442242; BYTE $0x03 // and al, byte [rdi + r10 + 3] - LONG $0x12448842; BYTE $0x03 // mov byte [rdx + r10 + 3], al - LONG $0x04c28349 // add r10, 4 - WORD $0x394c; BYTE $0xd1 // cmp rcx, r10 - JNE LBB0_6 - -LBB0_12: - VZEROUPPER - RET - -TEXT ·_bitmap_aligned_or_avx2(SB), $0-32 - - MOVQ left+0(FP), DI - MOVQ right+8(FP), SI - MOVQ out+16(FP), DX - MOVQ length+24(FP), CX - - WORD $0x8548; BYTE $0xc9 // test rcx, rcx - JLE LBB1_12 - LONG $0x7ff98348 // cmp rcx, 127 - JA LBB1_7 - WORD $0x3145; BYTE $0xd2 // xor r10d, r10d - JMP LBB1_3 - -LBB1_7: - LONG $0x0a0c8d4c // lea r9, [rdx + rcx] - LONG $0x0f048d48 // lea rax, [rdi + rcx] - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd3970f41 // seta r11b - LONG $0x0e048d48 // lea rax, [rsi + rcx] - WORD $0x3949; BYTE $0xf9 // cmp r9, rdi - WORD $0x970f; BYTE $0xd3 // seta bl - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd0970f41 // seta r8b - WORD $0x3949; BYTE $0xf1 // cmp r9, rsi - LONG $0xd1970f41 // seta r9b - WORD $0x3145; BYTE $0xd2 // xor r10d, r10d - WORD $0x8441; BYTE $0xdb // test r11b, bl - JNE LBB1_3 - WORD $0x2045; BYTE $0xc8 // and r8b, r9b - JNE LBB1_3 - WORD $0x8949; BYTE $0xca // mov r10, rcx - LONG $0x80e28349 // and r10, -128 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB1_10: - LONG $0x107ca1c4; WORD $0x0604 // vmovups ymm0, yword [rsi + r8] - LONG $0x107ca1c4; WORD $0x064c; BYTE $0x20 // vmovups ymm1, yword [rsi + r8 + 32] - LONG $0x107ca1c4; WORD $0x0654; BYTE $0x40 // vmovups ymm2, yword [rsi + r8 + 64] - LONG $0x107ca1c4; WORD $0x065c; BYTE $0x60 // vmovups ymm3, yword [rsi + r8 + 96] - LONG $0x567ca1c4; WORD $0x0704 // vorps ymm0, ymm0, yword [rdi + r8] - LONG $0x5674a1c4; WORD $0x074c; BYTE $0x20 // vorps ymm1, ymm1, yword [rdi + r8 + 32] - LONG $0x566ca1c4; WORD $0x0754; BYTE $0x40 // vorps ymm2, ymm2, yword [rdi + r8 + 64] - LONG $0x5664a1c4; WORD $0x075c; BYTE $0x60 // vorps ymm3, ymm3, yword [rdi + r8 + 96] - LONG $0x117ca1c4; WORD $0x0204 // vmovups yword [rdx + r8], ymm0 - LONG $0x117ca1c4; WORD $0x024c; BYTE $0x20 // vmovups yword [rdx + r8 + 32], ymm1 - LONG $0x117ca1c4; WORD $0x0254; BYTE $0x40 // vmovups yword [rdx + r8 + 64], ymm2 - LONG $0x117ca1c4; WORD $0x025c; BYTE $0x60 // vmovups yword [rdx + r8 + 96], ymm3 - LONG $0x80e88349 // sub r8, -128 - WORD $0x394d; BYTE $0xc2 // cmp r10, r8 - JNE LBB1_10 - WORD $0x3949; BYTE $0xca // cmp r10, rcx - JE LBB1_12 - -LBB1_3: - WORD $0x894d; BYTE $0xd0 // mov r8, r10 - WORD $0xf749; BYTE $0xd0 // not r8 - WORD $0x0149; BYTE $0xc8 // add r8, rcx - WORD $0x8949; BYTE $0xc9 // mov r9, rcx - LONG $0x03e18349 // and r9, 3 - JE LBB1_5 - -LBB1_4: - LONG $0x04b60f42; BYTE $0x16 // movzx eax, byte [rsi + r10] - LONG $0x17040a42 // or al, byte [rdi + r10] - LONG $0x12048842 // mov byte [rdx + r10], al - LONG $0x01c28349 // add r10, 1 - LONG $0xffc18349 // add r9, -1 - JNE LBB1_4 - -LBB1_5: - LONG $0x03f88349 // cmp r8, 3 - JB LBB1_12 - -LBB1_6: - LONG $0x04b60f42; BYTE $0x16 // movzx eax, byte [rsi + r10] - LONG $0x17040a42 // or al, byte [rdi + r10] - LONG $0x12048842 // mov byte [rdx + r10], al - LONG $0x44b60f42; WORD $0x0116 // movzx eax, byte [rsi + r10 + 1] - LONG $0x17440a42; BYTE $0x01 // or al, byte [rdi + r10 + 1] - LONG $0x12448842; BYTE $0x01 // mov byte [rdx + r10 + 1], al - LONG $0x44b60f42; WORD $0x0216 // movzx eax, byte [rsi + r10 + 2] - LONG $0x17440a42; BYTE $0x02 // or al, byte [rdi + r10 + 2] - LONG $0x12448842; BYTE $0x02 // mov byte [rdx + r10 + 2], al - LONG $0x44b60f42; WORD $0x0316 // movzx eax, byte [rsi + r10 + 3] - LONG $0x17440a42; BYTE $0x03 // or al, byte [rdi + r10 + 3] - LONG $0x12448842; BYTE $0x03 // mov byte [rdx + r10 + 3], al - LONG $0x04c28349 // add r10, 4 - WORD $0x394c; BYTE $0xd1 // cmp rcx, r10 - JNE LBB1_6 - -LBB1_12: - VZEROUPPER - RET - -TEXT ·_bitmap_aligned_and_not_avx2(SB), $0-32 - - MOVQ left+0(FP), DI - MOVQ right+8(FP), SI - MOVQ out+16(FP), DX - MOVQ length+24(FP), CX - - WORD $0x8548; BYTE $0xc9 // test rcx, rcx - JLE LBB2_12 - LONG $0x7ff98348 // cmp rcx, 127 - JA LBB2_7 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - JMP LBB2_3 - -LBB2_7: - LONG $0x0a048d4c // lea r8, [rdx + rcx] - LONG $0x0f048d48 // lea rax, [rdi + rcx] - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd3970f41 // seta r11b - LONG $0x0e048d48 // lea rax, [rsi + rcx] - WORD $0x3949; BYTE $0xf8 // cmp r8, rdi - WORD $0x970f; BYTE $0xd3 // seta bl - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd2970f41 // seta r10b - WORD $0x3949; BYTE $0xf0 // cmp r8, rsi - LONG $0xd1970f41 // seta r9b - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - WORD $0x8441; BYTE $0xdb // test r11b, bl - JNE LBB2_3 - WORD $0x2045; BYTE $0xca // and r10b, r9b - JNE LBB2_3 - WORD $0x8949; BYTE $0xc8 // mov r8, rcx - LONG $0x80e08349 // and r8, -128 - WORD $0xc031 // xor eax, eax - -LBB2_10: - LONG $0x0410fcc5; BYTE $0x06 // vmovups ymm0, yword [rsi + rax] - LONG $0x4c10fcc5; WORD $0x2006 // vmovups ymm1, yword [rsi + rax + 32] - LONG $0x5410fcc5; WORD $0x4006 // vmovups ymm2, yword [rsi + rax + 64] - LONG $0x5c10fcc5; WORD $0x6006 // vmovups ymm3, yword [rsi + rax + 96] - LONG $0x0455fcc5; BYTE $0x07 // vandnps ymm0, ymm0, yword [rdi + rax] - LONG $0x4c55f4c5; WORD $0x2007 // vandnps ymm1, ymm1, yword [rdi + rax + 32] - LONG $0x5455ecc5; WORD $0x4007 // vandnps ymm2, ymm2, yword [rdi + rax + 64] - LONG $0x5c55e4c5; WORD $0x6007 // vandnps ymm3, ymm3, yword [rdi + rax + 96] - LONG $0x0411fcc5; BYTE $0x02 // vmovups yword [rdx + rax], ymm0 - LONG $0x4c11fcc5; WORD $0x2002 // vmovups yword [rdx + rax + 32], ymm1 - LONG $0x5411fcc5; WORD $0x4002 // vmovups yword [rdx + rax + 64], ymm2 - LONG $0x5c11fcc5; WORD $0x6002 // vmovups yword [rdx + rax + 96], ymm3 - LONG $0x80e88348 // sub rax, -128 - WORD $0x3949; BYTE $0xc0 // cmp r8, rax - JNE LBB2_10 - WORD $0x3949; BYTE $0xc8 // cmp r8, rcx - JE LBB2_12 - -LBB2_3: - WORD $0x894d; BYTE $0xc1 // mov r9, r8 - WORD $0xf749; BYTE $0xd1 // not r9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB2_5 - LONG $0x06048a42 // mov al, byte [rsi + r8] - WORD $0xd0f6 // not al - LONG $0x07042242 // and al, byte [rdi + r8] - LONG $0x02048842 // mov byte [rdx + r8], al - LONG $0x01c88349 // or r8, 1 - -LBB2_5: - WORD $0x0149; BYTE $0xc9 // add r9, rcx - JE LBB2_12 - -LBB2_6: - LONG $0x04b60f42; BYTE $0x06 // movzx eax, byte [rsi + r8] - WORD $0xd0f6 // not al - LONG $0x07042242 // and al, byte [rdi + r8] - LONG $0x02048842 // mov byte [rdx + r8], al - LONG $0x44b60f42; WORD $0x0106 // movzx eax, byte [rsi + r8 + 1] - WORD $0xd0f6 // not al - LONG $0x07442242; BYTE $0x01 // and al, byte [rdi + r8 + 1] - LONG $0x02448842; BYTE $0x01 // mov byte [rdx + r8 + 1], al - LONG $0x02c08349 // add r8, 2 - WORD $0x394c; BYTE $0xc1 // cmp rcx, r8 - JNE LBB2_6 - -LBB2_12: - VZEROUPPER - RET - -TEXT ·_bitmap_aligned_xor_avx2(SB), $0-32 - - MOVQ left+0(FP), DI - MOVQ right+8(FP), SI - MOVQ out+16(FP), DX - MOVQ length+24(FP), CX - - WORD $0x8548; BYTE $0xc9 // test rcx, rcx - JLE LBB3_12 - LONG $0x7ff98348 // cmp rcx, 127 - JA LBB3_7 - WORD $0x3145; BYTE $0xd2 // xor r10d, r10d - JMP LBB3_3 - -LBB3_7: - LONG $0x0a0c8d4c // lea r9, [rdx + rcx] - LONG $0x0f048d48 // lea rax, [rdi + rcx] - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd3970f41 // seta r11b - LONG $0x0e048d48 // lea rax, [rsi + rcx] - WORD $0x3949; BYTE $0xf9 // cmp r9, rdi - WORD $0x970f; BYTE $0xd3 // seta bl - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd0970f41 // seta r8b - WORD $0x3949; BYTE $0xf1 // cmp r9, rsi - LONG $0xd1970f41 // seta r9b - WORD $0x3145; BYTE $0xd2 // xor r10d, r10d - WORD $0x8441; BYTE $0xdb // test r11b, bl - JNE LBB3_3 - WORD $0x2045; BYTE $0xc8 // and r8b, r9b - JNE LBB3_3 - WORD $0x8949; BYTE $0xca // mov r10, rcx - LONG $0x80e28349 // and r10, -128 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB3_10: - LONG $0x107ca1c4; WORD $0x0604 // vmovups ymm0, yword [rsi + r8] - LONG $0x107ca1c4; WORD $0x064c; BYTE $0x20 // vmovups ymm1, yword [rsi + r8 + 32] - LONG $0x107ca1c4; WORD $0x0654; BYTE $0x40 // vmovups ymm2, yword [rsi + r8 + 64] - LONG $0x107ca1c4; WORD $0x065c; BYTE $0x60 // vmovups ymm3, yword [rsi + r8 + 96] - LONG $0x577ca1c4; WORD $0x0704 // vxorps ymm0, ymm0, yword [rdi + r8] - LONG $0x5774a1c4; WORD $0x074c; BYTE $0x20 // vxorps ymm1, ymm1, yword [rdi + r8 + 32] - LONG $0x576ca1c4; WORD $0x0754; BYTE $0x40 // vxorps ymm2, ymm2, yword [rdi + r8 + 64] - LONG $0x5764a1c4; WORD $0x075c; BYTE $0x60 // vxorps ymm3, ymm3, yword [rdi + r8 + 96] - LONG $0x117ca1c4; WORD $0x0204 // vmovups yword [rdx + r8], ymm0 - LONG $0x117ca1c4; WORD $0x024c; BYTE $0x20 // vmovups yword [rdx + r8 + 32], ymm1 - LONG $0x117ca1c4; WORD $0x0254; BYTE $0x40 // vmovups yword [rdx + r8 + 64], ymm2 - LONG $0x117ca1c4; WORD $0x025c; BYTE $0x60 // vmovups yword [rdx + r8 + 96], ymm3 - LONG $0x80e88349 // sub r8, -128 - WORD $0x394d; BYTE $0xc2 // cmp r10, r8 - JNE LBB3_10 - WORD $0x3949; BYTE $0xca // cmp r10, rcx - JE LBB3_12 - -LBB3_3: - WORD $0x894d; BYTE $0xd0 // mov r8, r10 - WORD $0xf749; BYTE $0xd0 // not r8 - WORD $0x0149; BYTE $0xc8 // add r8, rcx - WORD $0x8949; BYTE $0xc9 // mov r9, rcx - LONG $0x03e18349 // and r9, 3 - JE LBB3_5 - -LBB3_4: - LONG $0x04b60f42; BYTE $0x16 // movzx eax, byte [rsi + r10] - LONG $0x17043242 // xor al, byte [rdi + r10] - LONG $0x12048842 // mov byte [rdx + r10], al - LONG $0x01c28349 // add r10, 1 - LONG $0xffc18349 // add r9, -1 - JNE LBB3_4 - -LBB3_5: - LONG $0x03f88349 // cmp r8, 3 - JB LBB3_12 - -LBB3_6: - LONG $0x04b60f42; BYTE $0x16 // movzx eax, byte [rsi + r10] - LONG $0x17043242 // xor al, byte [rdi + r10] - LONG $0x12048842 // mov byte [rdx + r10], al - LONG $0x44b60f42; WORD $0x0116 // movzx eax, byte [rsi + r10 + 1] - LONG $0x17443242; BYTE $0x01 // xor al, byte [rdi + r10 + 1] - LONG $0x12448842; BYTE $0x01 // mov byte [rdx + r10 + 1], al - LONG $0x44b60f42; WORD $0x0216 // movzx eax, byte [rsi + r10 + 2] - LONG $0x17443242; BYTE $0x02 // xor al, byte [rdi + r10 + 2] - LONG $0x12448842; BYTE $0x02 // mov byte [rdx + r10 + 2], al - LONG $0x44b60f42; WORD $0x0316 // movzx eax, byte [rsi + r10 + 3] - LONG $0x17443242; BYTE $0x03 // xor al, byte [rdi + r10 + 3] - LONG $0x12448842; BYTE $0x03 // mov byte [rdx + r10 + 3], al - LONG $0x04c28349 // add r10, 4 - WORD $0x394c; BYTE $0xd1 // cmp rcx, r10 - JNE LBB3_6 - -LBB3_12: - VZEROUPPER - RET diff --git a/go/arrow/bitutil/bitmap_ops_noasm.go b/go/arrow/bitutil/bitmap_ops_noasm.go deleted file mode 100644 index e25347791fe45..0000000000000 --- a/go/arrow/bitutil/bitmap_ops_noasm.go +++ /dev/null @@ -1,27 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build noasm -// +build noasm - -package bitutil - -func init() { - bitAndOp.opAligned = alignedBitAndGo - bitOrOp.opAligned = alignedBitOrGo - bitAndNotOp.opAligned = alignedBitAndNotGo - bitXorOp.opAligned = alignedBitXorGo -} diff --git a/go/arrow/bitutil/bitmap_ops_ppc64le.go b/go/arrow/bitutil/bitmap_ops_ppc64le.go deleted file mode 100644 index 28d95d84ade2d..0000000000000 --- a/go/arrow/bitutil/bitmap_ops_ppc64le.go +++ /dev/null @@ -1,27 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm -// +build !noasm - -package bitutil - -func init() { - bitAndOp.opAligned = alignedBitAndGo - bitOrOp.opAligned = alignedBitOrGo - bitAndNotOp.opAligned = alignedBitAndNotGo - bitXorOp.opAligned = alignedBitXorGo -} diff --git a/go/arrow/bitutil/bitmap_ops_s390x.go b/go/arrow/bitutil/bitmap_ops_s390x.go deleted file mode 100644 index 28d95d84ade2d..0000000000000 --- a/go/arrow/bitutil/bitmap_ops_s390x.go +++ /dev/null @@ -1,27 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm -// +build !noasm - -package bitutil - -func init() { - bitAndOp.opAligned = alignedBitAndGo - bitOrOp.opAligned = alignedBitOrGo - bitAndNotOp.opAligned = alignedBitAndNotGo - bitXorOp.opAligned = alignedBitXorGo -} diff --git a/go/arrow/bitutil/bitmap_ops_sse4_amd64.go b/go/arrow/bitutil/bitmap_ops_sse4_amd64.go deleted file mode 100644 index f16bce12bbfa2..0000000000000 --- a/go/arrow/bitutil/bitmap_ops_sse4_amd64.go +++ /dev/null @@ -1,52 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm -// +build !noasm - -package bitutil - -import ( - "unsafe" -) - -//go:noescape -func _bitmap_aligned_and_sse4(left, right, out unsafe.Pointer, length int64) - -func bitmapAlignedAndSSE4(left, right, out []byte) { - _bitmap_aligned_and_sse4(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) -} - -//go:noescape -func _bitmap_aligned_or_sse4(left, right, out unsafe.Pointer, length int64) - -func bitmapAlignedOrSSE4(left, right, out []byte) { - _bitmap_aligned_or_sse4(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) -} - -//go:noescape -func _bitmap_aligned_and_not_sse4(left, right, out unsafe.Pointer, length int64) - -func bitmapAlignedAndNotSSE4(left, right, out []byte) { - _bitmap_aligned_and_not_sse4(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) -} - -//go:noescape -func _bitmap_aligned_xor_sse4(left, right, out unsafe.Pointer, length int64) - -func bitmapAlignedXorSSE4(left, right, out []byte) { - _bitmap_aligned_xor_sse4(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) -} diff --git a/go/arrow/bitutil/bitmap_ops_sse4_amd64.s b/go/arrow/bitutil/bitmap_ops_sse4_amd64.s deleted file mode 100644 index c15e186253a36..0000000000000 --- a/go/arrow/bitutil/bitmap_ops_sse4_amd64.s +++ /dev/null @@ -1,501 +0,0 @@ -//+build !noasm !appengine -// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT - -TEXT ·_bitmap_aligned_and_sse4(SB), $0-32 - - MOVQ left+0(FP), DI - MOVQ right+8(FP), SI - MOVQ out+16(FP), DX - MOVQ length+24(FP), CX - - WORD $0x8548; BYTE $0xc9 // test rcx, rcx - JLE LBB0_16 - LONG $0x1ff98348 // cmp rcx, 31 - JA LBB0_7 - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - -LBB0_3: - WORD $0x894d; BYTE $0xd8 // mov r8, r11 - WORD $0xf749; BYTE $0xd0 // not r8 - WORD $0x0149; BYTE $0xc8 // add r8, rcx - WORD $0x8949; BYTE $0xc9 // mov r9, rcx - LONG $0x03e18349 // and r9, 3 - JE LBB0_5 - -LBB0_4: - LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] - LONG $0x1f042242 // and al, byte [rdi + r11] - LONG $0x1a048842 // mov byte [rdx + r11], al - LONG $0x01c38349 // add r11, 1 - LONG $0xffc18349 // add r9, -1 - JNE LBB0_4 - -LBB0_5: - LONG $0x03f88349 // cmp r8, 3 - JB LBB0_16 - -LBB0_6: - LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] - LONG $0x1f042242 // and al, byte [rdi + r11] - LONG $0x1a048842 // mov byte [rdx + r11], al - LONG $0x44b60f42; WORD $0x011e // movzx eax, byte [rsi + r11 + 1] - LONG $0x1f442242; BYTE $0x01 // and al, byte [rdi + r11 + 1] - LONG $0x1a448842; BYTE $0x01 // mov byte [rdx + r11 + 1], al - LONG $0x44b60f42; WORD $0x021e // movzx eax, byte [rsi + r11 + 2] - LONG $0x1f442242; BYTE $0x02 // and al, byte [rdi + r11 + 2] - LONG $0x1a448842; BYTE $0x02 // mov byte [rdx + r11 + 2], al - LONG $0x44b60f42; WORD $0x031e // movzx eax, byte [rsi + r11 + 3] - LONG $0x1f442242; BYTE $0x03 // and al, byte [rdi + r11 + 3] - LONG $0x1a448842; BYTE $0x03 // mov byte [rdx + r11 + 3], al - LONG $0x04c38349 // add r11, 4 - WORD $0x394c; BYTE $0xd9 // cmp rcx, r11 - JNE LBB0_6 - JMP LBB0_16 - -LBB0_7: - LONG $0x0a0c8d4c // lea r9, [rdx + rcx] - LONG $0x0f048d48 // lea rax, [rdi + rcx] - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd2970f41 // seta r10b - LONG $0x0e048d48 // lea rax, [rsi + rcx] - WORD $0x3949; BYTE $0xf9 // cmp r9, rdi - WORD $0x970f; BYTE $0xd3 // seta bl - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd0970f41 // seta r8b - WORD $0x3949; BYTE $0xf1 // cmp r9, rsi - LONG $0xd1970f41 // seta r9b - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - WORD $0x8441; BYTE $0xda // test r10b, bl - JNE LBB0_3 - WORD $0x2045; BYTE $0xc8 // and r8b, r9b - JNE LBB0_3 - WORD $0x8949; BYTE $0xcb // mov r11, rcx - LONG $0xe0e38349 // and r11, -32 - LONG $0xe0438d49 // lea rax, [r11 - 32] - WORD $0x8949; BYTE $0xc1 // mov r9, rax - LONG $0x05e9c149 // shr r9, 5 - LONG $0x01c18349 // add r9, 1 - WORD $0x8548; BYTE $0xc0 // test rax, rax - JE LBB0_10 - WORD $0x894d; BYTE $0xca // mov r10, r9 - LONG $0xfee28349 // and r10, -2 - WORD $0xf749; BYTE $0xda // neg r10 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB0_12: - LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] - LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] - LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] - WORD $0x540f; BYTE $0xd0 // andps xmm2, xmm0 - LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] - WORD $0x540f; BYTE $0xc1 // andps xmm0, xmm1 - LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 - LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 - LONG $0x44100f42; WORD $0x2007 // movups xmm0, oword [rdi + r8 + 32] - LONG $0x4c100f42; WORD $0x3007 // movups xmm1, oword [rdi + r8 + 48] - LONG $0x54100f42; WORD $0x2006 // movups xmm2, oword [rsi + r8 + 32] - WORD $0x540f; BYTE $0xd0 // andps xmm2, xmm0 - LONG $0x44100f42; WORD $0x3006 // movups xmm0, oword [rsi + r8 + 48] - WORD $0x540f; BYTE $0xc1 // andps xmm0, xmm1 - LONG $0x54110f42; WORD $0x2002 // movups oword [rdx + r8 + 32], xmm2 - LONG $0x44110f42; WORD $0x3002 // movups oword [rdx + r8 + 48], xmm0 - LONG $0x40c08349 // add r8, 64 - LONG $0x02c28349 // add r10, 2 - JNE LBB0_12 - LONG $0x01c1f641 // test r9b, 1 - JE LBB0_15 - -LBB0_14: - LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] - LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] - LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] - WORD $0x540f; BYTE $0xd0 // andps xmm2, xmm0 - LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] - WORD $0x540f; BYTE $0xc1 // andps xmm0, xmm1 - LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 - LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 - -LBB0_15: - WORD $0x3949; BYTE $0xcb // cmp r11, rcx - JNE LBB0_3 - -LBB0_16: - RET - -LBB0_10: - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - LONG $0x01c1f641 // test r9b, 1 - JNE LBB0_14 - JMP LBB0_15 - -TEXT ·_bitmap_aligned_or_sse4(SB), $0-32 - - MOVQ left+0(FP), DI - MOVQ right+8(FP), SI - MOVQ out+16(FP), DX - MOVQ length+24(FP), CX - - WORD $0x8548; BYTE $0xc9 // test rcx, rcx - JLE LBB1_16 - LONG $0x1ff98348 // cmp rcx, 31 - JA LBB1_7 - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - -LBB1_3: - WORD $0x894d; BYTE $0xd8 // mov r8, r11 - WORD $0xf749; BYTE $0xd0 // not r8 - WORD $0x0149; BYTE $0xc8 // add r8, rcx - WORD $0x8949; BYTE $0xc9 // mov r9, rcx - LONG $0x03e18349 // and r9, 3 - JE LBB1_5 - -LBB1_4: - LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] - LONG $0x1f040a42 // or al, byte [rdi + r11] - LONG $0x1a048842 // mov byte [rdx + r11], al - LONG $0x01c38349 // add r11, 1 - LONG $0xffc18349 // add r9, -1 - JNE LBB1_4 - -LBB1_5: - LONG $0x03f88349 // cmp r8, 3 - JB LBB1_16 - -LBB1_6: - LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] - LONG $0x1f040a42 // or al, byte [rdi + r11] - LONG $0x1a048842 // mov byte [rdx + r11], al - LONG $0x44b60f42; WORD $0x011e // movzx eax, byte [rsi + r11 + 1] - LONG $0x1f440a42; BYTE $0x01 // or al, byte [rdi + r11 + 1] - LONG $0x1a448842; BYTE $0x01 // mov byte [rdx + r11 + 1], al - LONG $0x44b60f42; WORD $0x021e // movzx eax, byte [rsi + r11 + 2] - LONG $0x1f440a42; BYTE $0x02 // or al, byte [rdi + r11 + 2] - LONG $0x1a448842; BYTE $0x02 // mov byte [rdx + r11 + 2], al - LONG $0x44b60f42; WORD $0x031e // movzx eax, byte [rsi + r11 + 3] - LONG $0x1f440a42; BYTE $0x03 // or al, byte [rdi + r11 + 3] - LONG $0x1a448842; BYTE $0x03 // mov byte [rdx + r11 + 3], al - LONG $0x04c38349 // add r11, 4 - WORD $0x394c; BYTE $0xd9 // cmp rcx, r11 - JNE LBB1_6 - JMP LBB1_16 - -LBB1_7: - LONG $0x0a0c8d4c // lea r9, [rdx + rcx] - LONG $0x0f048d48 // lea rax, [rdi + rcx] - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd2970f41 // seta r10b - LONG $0x0e048d48 // lea rax, [rsi + rcx] - WORD $0x3949; BYTE $0xf9 // cmp r9, rdi - WORD $0x970f; BYTE $0xd3 // seta bl - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd0970f41 // seta r8b - WORD $0x3949; BYTE $0xf1 // cmp r9, rsi - LONG $0xd1970f41 // seta r9b - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - WORD $0x8441; BYTE $0xda // test r10b, bl - JNE LBB1_3 - WORD $0x2045; BYTE $0xc8 // and r8b, r9b - JNE LBB1_3 - WORD $0x8949; BYTE $0xcb // mov r11, rcx - LONG $0xe0e38349 // and r11, -32 - LONG $0xe0438d49 // lea rax, [r11 - 32] - WORD $0x8949; BYTE $0xc1 // mov r9, rax - LONG $0x05e9c149 // shr r9, 5 - LONG $0x01c18349 // add r9, 1 - WORD $0x8548; BYTE $0xc0 // test rax, rax - JE LBB1_10 - WORD $0x894d; BYTE $0xca // mov r10, r9 - LONG $0xfee28349 // and r10, -2 - WORD $0xf749; BYTE $0xda // neg r10 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB1_12: - LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] - LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] - LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] - WORD $0x560f; BYTE $0xd0 // orps xmm2, xmm0 - LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] - WORD $0x560f; BYTE $0xc1 // orps xmm0, xmm1 - LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 - LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 - LONG $0x44100f42; WORD $0x2007 // movups xmm0, oword [rdi + r8 + 32] - LONG $0x4c100f42; WORD $0x3007 // movups xmm1, oword [rdi + r8 + 48] - LONG $0x54100f42; WORD $0x2006 // movups xmm2, oword [rsi + r8 + 32] - WORD $0x560f; BYTE $0xd0 // orps xmm2, xmm0 - LONG $0x44100f42; WORD $0x3006 // movups xmm0, oword [rsi + r8 + 48] - WORD $0x560f; BYTE $0xc1 // orps xmm0, xmm1 - LONG $0x54110f42; WORD $0x2002 // movups oword [rdx + r8 + 32], xmm2 - LONG $0x44110f42; WORD $0x3002 // movups oword [rdx + r8 + 48], xmm0 - LONG $0x40c08349 // add r8, 64 - LONG $0x02c28349 // add r10, 2 - JNE LBB1_12 - LONG $0x01c1f641 // test r9b, 1 - JE LBB1_15 - -LBB1_14: - LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] - LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] - LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] - WORD $0x560f; BYTE $0xd0 // orps xmm2, xmm0 - LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] - WORD $0x560f; BYTE $0xc1 // orps xmm0, xmm1 - LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 - LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 - -LBB1_15: - WORD $0x3949; BYTE $0xcb // cmp r11, rcx - JNE LBB1_3 - -LBB1_16: - RET - -LBB1_10: - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - LONG $0x01c1f641 // test r9b, 1 - JNE LBB1_14 - JMP LBB1_15 - -TEXT ·_bitmap_aligned_and_not_sse4(SB), $0-32 - - MOVQ left+0(FP), DI - MOVQ right+8(FP), SI - MOVQ out+16(FP), DX - MOVQ length+24(FP), CX - - WORD $0x8548; BYTE $0xc9 // test rcx, rcx - JLE LBB2_16 - LONG $0x1ff98348 // cmp rcx, 31 - JA LBB2_7 - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - -LBB2_3: - WORD $0x894d; BYTE $0xd8 // mov r8, r11 - WORD $0xf749; BYTE $0xd0 // not r8 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB2_5 - LONG $0x1e048a42 // mov al, byte [rsi + r11] - WORD $0xd0f6 // not al - LONG $0x1f042242 // and al, byte [rdi + r11] - LONG $0x1a048842 // mov byte [rdx + r11], al - LONG $0x01cb8349 // or r11, 1 - -LBB2_5: - WORD $0x0149; BYTE $0xc8 // add r8, rcx - JE LBB2_16 - -LBB2_6: - LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] - WORD $0xd0f6 // not al - LONG $0x1f042242 // and al, byte [rdi + r11] - LONG $0x1a048842 // mov byte [rdx + r11], al - LONG $0x44b60f42; WORD $0x011e // movzx eax, byte [rsi + r11 + 1] - WORD $0xd0f6 // not al - LONG $0x1f442242; BYTE $0x01 // and al, byte [rdi + r11 + 1] - LONG $0x1a448842; BYTE $0x01 // mov byte [rdx + r11 + 1], al - LONG $0x02c38349 // add r11, 2 - WORD $0x394c; BYTE $0xd9 // cmp rcx, r11 - JNE LBB2_6 - JMP LBB2_16 - -LBB2_7: - LONG $0x0a0c8d4c // lea r9, [rdx + rcx] - LONG $0x0f048d48 // lea rax, [rdi + rcx] - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd2970f41 // seta r10b - LONG $0x0e048d48 // lea rax, [rsi + rcx] - WORD $0x3949; BYTE $0xf9 // cmp r9, rdi - WORD $0x970f; BYTE $0xd3 // seta bl - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd0970f41 // seta r8b - WORD $0x3949; BYTE $0xf1 // cmp r9, rsi - LONG $0xd1970f41 // seta r9b - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - WORD $0x8441; BYTE $0xda // test r10b, bl - JNE LBB2_3 - WORD $0x2045; BYTE $0xc8 // and r8b, r9b - JNE LBB2_3 - WORD $0x8949; BYTE $0xcb // mov r11, rcx - LONG $0xe0e38349 // and r11, -32 - LONG $0xe0438d49 // lea rax, [r11 - 32] - WORD $0x8949; BYTE $0xc1 // mov r9, rax - LONG $0x05e9c149 // shr r9, 5 - LONG $0x01c18349 // add r9, 1 - WORD $0x8548; BYTE $0xc0 // test rax, rax - JE LBB2_10 - WORD $0x894d; BYTE $0xca // mov r10, r9 - LONG $0xfee28349 // and r10, -2 - WORD $0xf749; BYTE $0xda // neg r10 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB2_12: - LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] - LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] - LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] - WORD $0x550f; BYTE $0xd0 // andnps xmm2, xmm0 - LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] - WORD $0x550f; BYTE $0xc1 // andnps xmm0, xmm1 - LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 - LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 - LONG $0x44100f42; WORD $0x2007 // movups xmm0, oword [rdi + r8 + 32] - LONG $0x4c100f42; WORD $0x3007 // movups xmm1, oword [rdi + r8 + 48] - LONG $0x54100f42; WORD $0x2006 // movups xmm2, oword [rsi + r8 + 32] - WORD $0x550f; BYTE $0xd0 // andnps xmm2, xmm0 - LONG $0x44100f42; WORD $0x3006 // movups xmm0, oword [rsi + r8 + 48] - WORD $0x550f; BYTE $0xc1 // andnps xmm0, xmm1 - LONG $0x54110f42; WORD $0x2002 // movups oword [rdx + r8 + 32], xmm2 - LONG $0x44110f42; WORD $0x3002 // movups oword [rdx + r8 + 48], xmm0 - LONG $0x40c08349 // add r8, 64 - LONG $0x02c28349 // add r10, 2 - JNE LBB2_12 - LONG $0x01c1f641 // test r9b, 1 - JE LBB2_15 - -LBB2_14: - LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] - LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] - LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] - WORD $0x550f; BYTE $0xd0 // andnps xmm2, xmm0 - LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] - WORD $0x550f; BYTE $0xc1 // andnps xmm0, xmm1 - LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 - LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 - -LBB2_15: - WORD $0x3949; BYTE $0xcb // cmp r11, rcx - JNE LBB2_3 - -LBB2_16: - RET - -LBB2_10: - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - LONG $0x01c1f641 // test r9b, 1 - JNE LBB2_14 - JMP LBB2_15 - -TEXT ·_bitmap_aligned_xor_sse4(SB), $0-32 - - MOVQ left+0(FP), DI - MOVQ right+8(FP), SI - MOVQ out+16(FP), DX - MOVQ length+24(FP), CX - - WORD $0x8548; BYTE $0xc9 // test rcx, rcx - JLE LBB3_16 - LONG $0x1ff98348 // cmp rcx, 31 - JA LBB3_7 - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - -LBB3_3: - WORD $0x894d; BYTE $0xd8 // mov r8, r11 - WORD $0xf749; BYTE $0xd0 // not r8 - WORD $0x0149; BYTE $0xc8 // add r8, rcx - WORD $0x8949; BYTE $0xc9 // mov r9, rcx - LONG $0x03e18349 // and r9, 3 - JE LBB3_5 - -LBB3_4: - LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] - LONG $0x1f043242 // xor al, byte [rdi + r11] - LONG $0x1a048842 // mov byte [rdx + r11], al - LONG $0x01c38349 // add r11, 1 - LONG $0xffc18349 // add r9, -1 - JNE LBB3_4 - -LBB3_5: - LONG $0x03f88349 // cmp r8, 3 - JB LBB3_16 - -LBB3_6: - LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] - LONG $0x1f043242 // xor al, byte [rdi + r11] - LONG $0x1a048842 // mov byte [rdx + r11], al - LONG $0x44b60f42; WORD $0x011e // movzx eax, byte [rsi + r11 + 1] - LONG $0x1f443242; BYTE $0x01 // xor al, byte [rdi + r11 + 1] - LONG $0x1a448842; BYTE $0x01 // mov byte [rdx + r11 + 1], al - LONG $0x44b60f42; WORD $0x021e // movzx eax, byte [rsi + r11 + 2] - LONG $0x1f443242; BYTE $0x02 // xor al, byte [rdi + r11 + 2] - LONG $0x1a448842; BYTE $0x02 // mov byte [rdx + r11 + 2], al - LONG $0x44b60f42; WORD $0x031e // movzx eax, byte [rsi + r11 + 3] - LONG $0x1f443242; BYTE $0x03 // xor al, byte [rdi + r11 + 3] - LONG $0x1a448842; BYTE $0x03 // mov byte [rdx + r11 + 3], al - LONG $0x04c38349 // add r11, 4 - WORD $0x394c; BYTE $0xd9 // cmp rcx, r11 - JNE LBB3_6 - JMP LBB3_16 - -LBB3_7: - LONG $0x0a0c8d4c // lea r9, [rdx + rcx] - LONG $0x0f048d48 // lea rax, [rdi + rcx] - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd2970f41 // seta r10b - LONG $0x0e048d48 // lea rax, [rsi + rcx] - WORD $0x3949; BYTE $0xf9 // cmp r9, rdi - WORD $0x970f; BYTE $0xd3 // seta bl - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd0970f41 // seta r8b - WORD $0x3949; BYTE $0xf1 // cmp r9, rsi - LONG $0xd1970f41 // seta r9b - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - WORD $0x8441; BYTE $0xda // test r10b, bl - JNE LBB3_3 - WORD $0x2045; BYTE $0xc8 // and r8b, r9b - JNE LBB3_3 - WORD $0x8949; BYTE $0xcb // mov r11, rcx - LONG $0xe0e38349 // and r11, -32 - LONG $0xe0438d49 // lea rax, [r11 - 32] - WORD $0x8949; BYTE $0xc1 // mov r9, rax - LONG $0x05e9c149 // shr r9, 5 - LONG $0x01c18349 // add r9, 1 - WORD $0x8548; BYTE $0xc0 // test rax, rax - JE LBB3_10 - WORD $0x894d; BYTE $0xca // mov r10, r9 - LONG $0xfee28349 // and r10, -2 - WORD $0xf749; BYTE $0xda // neg r10 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB3_12: - LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] - LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] - LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] - WORD $0x570f; BYTE $0xd0 // xorps xmm2, xmm0 - LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] - WORD $0x570f; BYTE $0xc1 // xorps xmm0, xmm1 - LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 - LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 - LONG $0x44100f42; WORD $0x2007 // movups xmm0, oword [rdi + r8 + 32] - LONG $0x4c100f42; WORD $0x3007 // movups xmm1, oword [rdi + r8 + 48] - LONG $0x54100f42; WORD $0x2006 // movups xmm2, oword [rsi + r8 + 32] - WORD $0x570f; BYTE $0xd0 // xorps xmm2, xmm0 - LONG $0x44100f42; WORD $0x3006 // movups xmm0, oword [rsi + r8 + 48] - WORD $0x570f; BYTE $0xc1 // xorps xmm0, xmm1 - LONG $0x54110f42; WORD $0x2002 // movups oword [rdx + r8 + 32], xmm2 - LONG $0x44110f42; WORD $0x3002 // movups oword [rdx + r8 + 48], xmm0 - LONG $0x40c08349 // add r8, 64 - LONG $0x02c28349 // add r10, 2 - JNE LBB3_12 - LONG $0x01c1f641 // test r9b, 1 - JE LBB3_15 - -LBB3_14: - LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] - LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] - LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] - WORD $0x570f; BYTE $0xd0 // xorps xmm2, xmm0 - LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] - WORD $0x570f; BYTE $0xc1 // xorps xmm0, xmm1 - LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 - LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 - -LBB3_15: - WORD $0x3949; BYTE $0xcb // cmp r11, rcx - JNE LBB3_3 - -LBB3_16: - RET - -LBB3_10: - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - LONG $0x01c1f641 // test r9b, 1 - JNE LBB3_14 - JMP LBB3_15 diff --git a/go/arrow/bitutil/bitmaps.go b/go/arrow/bitutil/bitmaps.go deleted file mode 100644 index fb4fcd597b804..0000000000000 --- a/go/arrow/bitutil/bitmaps.go +++ /dev/null @@ -1,747 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package bitutil - -import ( - "bytes" - "errors" - "math/bits" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow/endian" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" -) - -// BitmapReader is a simple bitmap reader for a byte slice. -type BitmapReader struct { - bitmap []byte - pos int - len int - - current byte - byteOffset int - bitOffset int -} - -// NewBitmapReader creates and returns a new bitmap reader for the given bitmap -func NewBitmapReader(bitmap []byte, offset, length int) *BitmapReader { - curbyte := byte(0) - if length > 0 && bitmap != nil { - curbyte = bitmap[offset/8] - } - return &BitmapReader{ - bitmap: bitmap, - byteOffset: offset / 8, - bitOffset: offset % 8, - current: curbyte, - len: length, - } -} - -// Set returns true if the current bit is set -func (b *BitmapReader) Set() bool { - return (b.current & (1 << b.bitOffset)) != 0 -} - -// NotSet returns true if the current bit is not set -func (b *BitmapReader) NotSet() bool { - return (b.current & (1 << b.bitOffset)) == 0 -} - -// Next advances the reader to the next bit in the bitmap. -func (b *BitmapReader) Next() { - b.bitOffset++ - b.pos++ - if b.bitOffset == 8 { - b.bitOffset = 0 - b.byteOffset++ - if b.pos < b.len { - b.current = b.bitmap[int(b.byteOffset)] - } - } -} - -// Pos returns the current bit position in the bitmap that the reader is looking at -func (b *BitmapReader) Pos() int { return b.pos } - -// Len returns the total number of bits in the bitmap -func (b *BitmapReader) Len() int { return b.len } - -// BitmapWriter is a simple writer for writing bitmaps to byte slices -type BitmapWriter struct { - buf []byte - pos int - length int - - curByte uint8 - bitMask uint8 - byteOffset int -} - -// NewBitmapWriter returns a sequential bitwise writer that preserves surrounding -// bit values as it writes. -func NewBitmapWriter(bitmap []byte, start, length int) *BitmapWriter { - ret := &BitmapWriter{ - buf: bitmap, - length: length, - byteOffset: start / 8, - bitMask: BitMask[start%8], - } - if length > 0 { - ret.curByte = bitmap[int(ret.byteOffset)] - } - return ret -} - -// Reset resets the position and view of the slice to restart writing a bitmap -// to the same byte slice. -func (b *BitmapWriter) Reset(start, length int) { - b.pos = 0 - b.byteOffset = start / 8 - b.bitMask = BitMask[start%8] - b.length = length - if b.length > 0 { - b.curByte = b.buf[int(b.byteOffset)] - } -} - -func (b *BitmapWriter) Pos() int { return b.pos } -func (b *BitmapWriter) Set() { b.curByte |= b.bitMask } -func (b *BitmapWriter) Clear() { b.curByte &= ^b.bitMask } - -// Next increments the writer to the next bit for writing. -func (b *BitmapWriter) Next() { - b.bitMask = b.bitMask << 1 - b.pos++ - if b.bitMask == 0 { - b.bitMask = 0x01 - b.buf[b.byteOffset] = b.curByte - b.byteOffset++ - if b.pos < b.length { - b.curByte = b.buf[int(b.byteOffset)] - } - } -} - -// AppendBools writes a series of booleans to the bitmapwriter and returns -// the number of remaining bytes left in the buffer for writing. -func (b *BitmapWriter) AppendBools(in []bool) int { - space := min(b.length-b.pos, len(in)) - if space == 0 { - return 0 - } - - bitOffset := bits.TrailingZeros32(uint32(b.bitMask)) - // location that the first byte needs to be written to for appending - appslice := b.buf[int(b.byteOffset) : b.byteOffset+int(BytesForBits(int64(bitOffset+space)))] - // update everything but curByte - appslice[0] = b.curByte - for i, b := range in[:space] { - if b { - SetBit(appslice, i+bitOffset) - } else { - ClearBit(appslice, i+bitOffset) - } - } - - b.pos += space - b.bitMask = BitMask[(bitOffset+space)%8] - b.byteOffset += (bitOffset + space) / 8 - b.curByte = appslice[len(appslice)-1] - - return space -} - -// Finish flushes the final byte out to the byteslice in case it was not already -// on a byte aligned boundary. -func (b *BitmapWriter) Finish() { - if b.length > 0 && (b.bitMask != 0x01 || b.pos < b.length) { - b.buf[int(b.byteOffset)] = b.curByte - } -} - -// BitmapWordReader is a reader for bitmaps that reads a word at a time (a word being an 8 byte uint64) -// and then provides functions to grab the individual trailing bytes after the last word -type BitmapWordReader struct { - bitmap []byte - offset int - nwords int - trailingBits int - trailingBytes int - curword uint64 -} - -// NewBitmapWordReader sets up a word reader, calculates the number of trailing bits and -// number of trailing bytes, along with the number of words. -func NewBitmapWordReader(bitmap []byte, offset, length int) *BitmapWordReader { - bitoffset := offset % 8 - byteOffset := offset / 8 - bm := &BitmapWordReader{ - offset: bitoffset, - bitmap: bitmap[byteOffset : byteOffset+int(BytesForBits(int64(bitoffset+length)))], - // decrement wordcount by 1 as we may touch two adjacent words in one iteration - nwords: length/int(unsafe.Sizeof(uint64(0))*8) - 1, - } - if bm.nwords < 0 { - bm.nwords = 0 - } - bm.trailingBits = length - bm.nwords*int(unsafe.Sizeof(uint64(0)))*8 - bm.trailingBytes = int(BytesForBits(int64(bm.trailingBits))) - - if bm.nwords > 0 { - bm.curword = toFromLEFunc(endian.Native.Uint64(bm.bitmap)) - } else if length > 0 { - setLSB(&bm.curword, bm.bitmap[0]) - } - return bm -} - -// NextWord returns the next full word read from the bitmap, should not be called -// if Words() is 0 as it will step outside of the bounds of the bitmap slice and panic. -// -// We don't perform the bounds checking in order to improve performance. -func (bm *BitmapWordReader) NextWord() uint64 { - bm.bitmap = bm.bitmap[unsafe.Sizeof(bm.curword):] - word := bm.curword - nextWord := toFromLEFunc(endian.Native.Uint64(bm.bitmap)) - if bm.offset != 0 { - // combine two adjacent words into one word - // |<------ next ----->|<---- current ---->| - // +-------------+-----+-------------+-----+ - // | --- | A | B | --- | - // +-------------+-----+-------------+-----+ - // | | offset - // v v - // +-----+-------------+ - // | A | B | - // +-----+-------------+ - // |<------ word ----->| - word >>= uint64(bm.offset) - word |= nextWord << (int64(unsafe.Sizeof(uint64(0))*8) - int64(bm.offset)) - } - bm.curword = nextWord - return word -} - -// NextTrailingByte returns the next trailing byte of the bitmap after the last word -// along with the number of valid bits in that byte. When validBits < 8, that -// is the last byte. -// -// If the bitmap ends on a byte alignment, then the last byte can also return 8 valid bits. -// Thus the TrailingBytes function should be used to know how many trailing bytes to read. -func (bm *BitmapWordReader) NextTrailingByte() (val byte, validBits int) { - debug.Assert(bm.trailingBits > 0, "next trailing byte called with no trailing bits") - - if bm.trailingBits <= 8 { - // last byte - validBits = bm.trailingBits - bm.trailingBits = 0 - rdr := NewBitmapReader(bm.bitmap, bm.offset, validBits) - for i := 0; i < validBits; i++ { - val >>= 1 - if rdr.Set() { - val |= 0x80 - } - rdr.Next() - } - val >>= (8 - validBits) - return - } - - bm.bitmap = bm.bitmap[1:] - nextByte := bm.bitmap[0] - val = getLSB(bm.curword) - if bm.offset != 0 { - val >>= byte(bm.offset) - val |= nextByte << (8 - bm.offset) - } - setLSB(&bm.curword, nextByte) - bm.trailingBits -= 8 - bm.trailingBytes-- - validBits = 8 - return -} - -func (bm *BitmapWordReader) Words() int { return bm.nwords } -func (bm *BitmapWordReader) TrailingBytes() int { return bm.trailingBytes } - -// BitmapWordWriter is a bitmap writer for writing a full word at a time (a word being -// a uint64). After the last full word is written, PutNextTrailingByte can be used to -// write the remaining trailing bytes. -type BitmapWordWriter struct { - bitmap []byte - offset int - len int - - bitMask uint64 - currentWord uint64 -} - -// NewBitmapWordWriter initializes a new bitmap word writer which will start writing -// into the byte slice at bit offset start, expecting to write len bits. -func NewBitmapWordWriter(bitmap []byte, start, len int) *BitmapWordWriter { - ret := &BitmapWordWriter{ - bitmap: bitmap[start/8:], - len: len, - offset: start % 8, - bitMask: (uint64(1) << uint64(start%8)) - 1, - } - - if ret.offset != 0 { - if ret.len >= int(unsafe.Sizeof(uint64(0))*8) { - ret.currentWord = toFromLEFunc(endian.Native.Uint64(ret.bitmap)) - } else if ret.len > 0 { - setLSB(&ret.currentWord, ret.bitmap[0]) - } - } - return ret -} - -// PutNextWord writes the given word to the bitmap, potentially splitting across -// two adjacent words. -func (bm *BitmapWordWriter) PutNextWord(word uint64) { - sz := int(unsafe.Sizeof(word)) - if bm.offset != 0 { - // split one word into two adjacent words, don't touch unused bits - // |<------ word ----->| - // +-----+-------------+ - // | A | B | - // +-----+-------------+ - // | | - // v v offset - // +-------------+-----+-------------+-----+ - // | --- | A | B | --- | - // +-------------+-----+-------------+-----+ - // |<------ next ----->|<---- current ---->| - word = (word << uint64(bm.offset)) | (word >> (int64(sz*8) - int64(bm.offset))) - next := toFromLEFunc(endian.Native.Uint64(bm.bitmap[sz:])) - bm.currentWord = (bm.currentWord & bm.bitMask) | (word &^ bm.bitMask) - next = (next &^ bm.bitMask) | (word & bm.bitMask) - endian.Native.PutUint64(bm.bitmap, toFromLEFunc(bm.currentWord)) - endian.Native.PutUint64(bm.bitmap[sz:], toFromLEFunc(next)) - bm.currentWord = next - } else { - endian.Native.PutUint64(bm.bitmap, toFromLEFunc(word)) - } - bm.bitmap = bm.bitmap[sz:] -} - -// PutNextTrailingByte writes the number of bits indicated by validBits from b to -// the bitmap. -func (bm *BitmapWordWriter) PutNextTrailingByte(b byte, validBits int) { - curbyte := getLSB(bm.currentWord) - if validBits == 8 { - if bm.offset != 0 { - b = (b << bm.offset) | (b >> (8 - bm.offset)) - next := bm.bitmap[1] - curbyte = (curbyte & byte(bm.bitMask)) | (b &^ byte(bm.bitMask)) - next = (next &^ byte(bm.bitMask)) | (b & byte(bm.bitMask)) - bm.bitmap[0] = curbyte - bm.bitmap[1] = next - bm.currentWord = uint64(next) - } else { - bm.bitmap[0] = b - } - bm.bitmap = bm.bitmap[1:] - } else { - debug.Assert(validBits > 0 && validBits < 8, "invalid valid bits in bitmap word writer") - debug.Assert(BytesForBits(int64(bm.offset+validBits)) <= int64(len(bm.bitmap)), "writing trailing byte outside of bounds of bitmap") - wr := NewBitmapWriter(bm.bitmap, int(bm.offset), validBits) - for i := 0; i < validBits; i++ { - if b&0x01 != 0 { - wr.Set() - } else { - wr.Clear() - } - wr.Next() - b >>= 1 - } - wr.Finish() - } -} - -type transferMode int8 - -const ( - transferCopy transferMode = iota - transferInvert -) - -func transferBitmap(mode transferMode, src []byte, srcOffset, length int, dst []byte, dstOffset int) { - if length == 0 { - // if there's nothing to write, end early. - return - } - - bitOffset := srcOffset % 8 - destBitOffset := dstOffset % 8 - - // slow path, one of the bitmaps are not byte aligned. - if bitOffset != 0 || destBitOffset != 0 { - rdr := NewBitmapWordReader(src, srcOffset, length) - wr := NewBitmapWordWriter(dst, dstOffset, length) - - nwords := rdr.Words() - for nwords > 0 { - nwords-- - if mode == transferInvert { - wr.PutNextWord(^rdr.NextWord()) - } else { - wr.PutNextWord(rdr.NextWord()) - } - } - nbytes := rdr.TrailingBytes() - for nbytes > 0 { - nbytes-- - bt, validBits := rdr.NextTrailingByte() - if mode == transferInvert { - bt = ^bt - } - wr.PutNextTrailingByte(bt, validBits) - } - return - } - - // fast path, both are starting with byte-aligned bitmaps - nbytes := int(BytesForBits(int64(length))) - - // shift by its byte offset - src = src[srcOffset/8:] - dst = dst[dstOffset/8:] - - // Take care of the trailing bits in the last byte - // E.g., if trailing_bits = 5, last byte should be - // - low 3 bits: new bits from last byte of data buffer - // - high 5 bits: old bits from last byte of dest buffer - trailingBits := nbytes*8 - length - trailMask := byte(uint(1)<<(8-trailingBits)) - 1 - var lastData byte - if mode == transferInvert { - for i, b := range src[:nbytes-1] { - dst[i] = ^b - } - lastData = ^src[nbytes-1] - } else { - copy(dst, src[:nbytes-1]) - lastData = src[nbytes-1] - } - - dst[nbytes-1] &= ^trailMask - dst[nbytes-1] |= lastData & trailMask -} - -// CopyBitmap copies the bitmap indicated by src, starting at bit offset srcOffset, -// and copying length bits into dst, starting at bit offset dstOffset. -func CopyBitmap(src []byte, srcOffset, length int, dst []byte, dstOffset int) { - transferBitmap(transferCopy, src, srcOffset, length, dst, dstOffset) -} - -// InvertBitmap copies a bit range of a bitmap, inverting it as it copies -// over into the destination. -func InvertBitmap(src []byte, srcOffset, length int, dst []byte, dstOffset int) { - transferBitmap(transferInvert, src, srcOffset, length, dst, dstOffset) -} - -type bitOp struct { - opWord func(uint64, uint64) uint64 - opByte func(byte, byte) byte - opAligned func(l, r, o []byte) -} - -var ( - bitAndOp = bitOp{ - opWord: func(l, r uint64) uint64 { return l & r }, - opByte: func(l, r byte) byte { return l & r }, - } - bitOrOp = bitOp{ - opWord: func(l, r uint64) uint64 { return l | r }, - opByte: func(l, r byte) byte { return l | r }, - } - bitAndNotOp = bitOp{ - opWord: func(l, r uint64) uint64 { return l &^ r }, - opByte: func(l, r byte) byte { return l &^ r }, - } - bitXorOp = bitOp{ - opWord: func(l, r uint64) uint64 { return l ^ r }, - opByte: func(l, r byte) byte { return l ^ r }, - } -) - -func alignedBitmapOp(op bitOp, left, right []byte, lOffset, rOffset int64, out []byte, outOffset int64, length int64) { - debug.Assert(lOffset%8 == rOffset%8, "aligned bitmap op called with unaligned offsets") - debug.Assert(lOffset%8 == outOffset%8, "aligned bitmap op called with unaligned output offset") - - nbytes := BytesForBits(length + lOffset%8) - left = left[lOffset/8:] - right = right[rOffset/8:] - out = out[outOffset/8:] - endMask := (lOffset + length%8) - switch nbytes { - case 0: - return - case 1: // everything within a single byte - // (length+lOffset%8) <= 8 - mask := PrecedingBitmask[lOffset%8] - if endMask != 0 { - mask |= TrailingBitmask[(lOffset+length)%8] - } - out[0] = (out[0] & mask) | (op.opByte(left[0], right[0]) &^ mask) - case 2: // don't send zero length to opAligned - firstByteMask := PrecedingBitmask[lOffset%8] - out[0] = (out[0] & firstByteMask) | (op.opByte(left[0], right[0]) &^ firstByteMask) - lastByteMask := byte(0) - if endMask != 0 { - lastByteMask = TrailingBitmask[(lOffset+length)%8] - } - out[1] = (out[1] & lastByteMask) | (op.opByte(left[1], right[1]) &^ lastByteMask) - default: - firstByteMask := PrecedingBitmask[lOffset%8] - out[0] = (out[0] & firstByteMask) | (op.opByte(left[0], right[0]) &^ firstByteMask) - - op.opAligned(left[1:nbytes-1], right[1:nbytes-1], out[1:nbytes-1]) - - lastByteMask := byte(0) - if endMask != 0 { - lastByteMask = TrailingBitmask[(lOffset+length)%8] - } - out[nbytes-1] = (out[nbytes-1] & lastByteMask) | (op.opByte(left[nbytes-1], right[nbytes-1]) &^ lastByteMask) - } -} - -func unalignedBitmapOp(op bitOp, left, right []byte, lOffset, rOffset int64, out []byte, outOffset int64, length int64) { - leftRdr := NewBitmapWordReader(left, int(lOffset), int(length)) - rightRdr := NewBitmapWordReader(right, int(rOffset), int(length)) - writer := NewBitmapWordWriter(out, int(outOffset), int(length)) - - for nwords := leftRdr.Words(); nwords > 0; nwords-- { - writer.PutNextWord(op.opWord(leftRdr.NextWord(), rightRdr.NextWord())) - } - for nbytes := leftRdr.TrailingBytes(); nbytes > 0; nbytes-- { - leftByte, leftValid := leftRdr.NextTrailingByte() - rightByte, rightValid := rightRdr.NextTrailingByte() - debug.Assert(leftValid == rightValid, "unexpected mismatch of valid bits") - writer.PutNextTrailingByte(op.opByte(leftByte, rightByte), leftValid) - } -} - -func BitmapOp(op bitOp, left, right []byte, lOffset, rOffset int64, out []byte, outOffset, length int64) { - if (outOffset%8 == lOffset%8) && (outOffset%8 == rOffset%8) { - // fastcase! - alignedBitmapOp(op, left, right, lOffset, rOffset, out, outOffset, length) - } else { - unalignedBitmapOp(op, left, right, lOffset, rOffset, out, outOffset, length) - } -} - -func BitmapOpAlloc(mem memory.Allocator, op bitOp, left, right []byte, lOffset, rOffset int64, length int64, outOffset int64) *memory.Buffer { - bits := length + outOffset - buf := memory.NewResizableBuffer(mem) - buf.Resize(int(BytesForBits(bits))) - BitmapOp(op, left, right, lOffset, rOffset, buf.Bytes(), outOffset, length) - return buf -} - -func BitmapAnd(left, right []byte, lOffset, rOffset int64, out []byte, outOffset int64, length int64) { - BitmapOp(bitAndOp, left, right, lOffset, rOffset, out, outOffset, length) -} - -func BitmapOr(left, right []byte, lOffset, rOffset int64, out []byte, outOffset int64, length int64) { - BitmapOp(bitOrOp, left, right, lOffset, rOffset, out, outOffset, length) -} - -func BitmapAndAlloc(mem memory.Allocator, left, right []byte, lOffset, rOffset int64, length, outOffset int64) *memory.Buffer { - return BitmapOpAlloc(mem, bitAndOp, left, right, lOffset, rOffset, length, outOffset) -} - -func BitmapOrAlloc(mem memory.Allocator, left, right []byte, lOffset, rOffset int64, length, outOffset int64) *memory.Buffer { - return BitmapOpAlloc(mem, bitOrOp, left, right, lOffset, rOffset, length, outOffset) -} - -func BitmapAndNot(left, right []byte, lOffset, rOffset int64, out []byte, outOffset int64, length int64) { - BitmapOp(bitAndNotOp, left, right, lOffset, rOffset, out, outOffset, length) -} - -func BitmapAndNotAlloc(mem memory.Allocator, left, right []byte, lOffset, rOffset int64, length, outOffset int64) *memory.Buffer { - return BitmapOpAlloc(mem, bitAndNotOp, left, right, lOffset, rOffset, length, outOffset) -} - -func BitmapXor(left, right []byte, lOffset, rOffset int64, out []byte, outOffset int64, length int64) { - BitmapOp(bitXorOp, left, right, lOffset, rOffset, out, outOffset, length) -} - -func BitmapXorAlloc(mem memory.Allocator, left, right []byte, lOffset, rOffset int64, length, outOffset int64) *memory.Buffer { - return BitmapOpAlloc(mem, bitXorOp, left, right, lOffset, rOffset, length, outOffset) -} - -func BitmapEquals(left, right []byte, lOffset, rOffset int64, length int64) bool { - if lOffset%8 == 0 && rOffset%8 == 0 { - // byte aligned, fast path, can use bytes.Equal (memcmp) - byteLen := length / 8 - lStart := lOffset / 8 - rStart := rOffset / 8 - if !bytes.Equal(left[lStart:lStart+byteLen], right[rStart:rStart+byteLen]) { - return false - } - - // check trailing bits - for i := (length / 8) * 8; i < length; i++ { - if BitIsSet(left, int(lOffset+i)) != BitIsSet(right, int(rOffset+i)) { - return false - } - } - return true - } - - lrdr := NewBitmapWordReader(left, int(lOffset), int(length)) - rrdr := NewBitmapWordReader(right, int(rOffset), int(length)) - - nwords := lrdr.Words() - for nwords > 0 { - nwords-- - if lrdr.NextWord() != rrdr.NextWord() { - return false - } - } - - nbytes := lrdr.TrailingBytes() - for nbytes > 0 { - nbytes-- - lbt, _ := lrdr.NextTrailingByte() - rbt, _ := rrdr.NextTrailingByte() - if lbt != rbt { - return false - } - } - return true -} - -// OptionalBitIndexer is a convenience wrapper for getting bits from -// a bitmap which may or may not be nil. -type OptionalBitIndexer struct { - Bitmap []byte - Offset int -} - -func (b *OptionalBitIndexer) GetBit(i int) bool { - return b.Bitmap == nil || BitIsSet(b.Bitmap, b.Offset+i) -} - -type Bitmap struct { - Data []byte - Offset, Len int64 -} - -func bitLength(bitmaps []Bitmap) (int64, error) { - for _, b := range bitmaps[1:] { - if b.Len != bitmaps[0].Len { - return -1, errors.New("bitmaps must be same length") - } - } - return bitmaps[0].Len, nil -} - -func runVisitWordsAndWriteLoop(bitLen int64, rdrs []*BitmapWordReader, wrs []*BitmapWordWriter, visitor func(in, out []uint64)) { - const bitWidth int64 = int64(uint64SizeBits) - - visited := make([]uint64, len(rdrs)) - output := make([]uint64, len(wrs)) - - // every reader will have same number of words, since they are same - // length'ed. This will be inefficient in some cases. When there's - // offsets beyond the Word boundary, every word would have to be - // created from 2 adjoining words - nwords := int64(rdrs[0].Words()) - bitLen -= nwords * bitWidth - for nwords > 0 { - nwords-- - for i := range visited { - visited[i] = rdrs[i].NextWord() - } - visitor(visited, output) - for i := range output { - wrs[i].PutNextWord(output[i]) - } - } - - // every reader will have the same number of trailing bytes, because - // we already confirmed they have the same length. Because - // offsets beyond the Word boundary can cause adjoining words, the - // tailing portion could be more than one word remaining full/partial - // words to write. - if bitLen == 0 { - return - } - - // convert the word visitor to a bytevisitor - byteVisitor := func(in, out []byte) { - for i, w := range in { - visited[i] = uint64(w) - } - visitor(visited, output) - for i, w := range output { - out[i] = byte(w) - } - } - - visitedBytes := make([]byte, len(rdrs)) - outputBytes := make([]byte, len(wrs)) - nbytes := rdrs[0].trailingBytes - for nbytes > 0 { - nbytes-- - memory.Set(visitedBytes, 0) - memory.Set(outputBytes, 0) - - var validBits int - for i := range rdrs { - visitedBytes[i], validBits = rdrs[i].NextTrailingByte() - } - byteVisitor(visitedBytes, outputBytes) - for i, w := range outputBytes { - wrs[i].PutNextTrailingByte(w, validBits) - } - } -} - -// VisitWordsAndWrite visits words of bits from each input bitmap and -// collects outputs to a slice of output Bitmaps. -// -// All bitmaps must have identical lengths. The first bit in a visited -// bitmap may be offset within the first visited word, but words will -// otherwise contain densely packed bits loaded from the bitmap. That -// offset within the first word is returned. -// -// NOTE: this function is efficient on 3+ sufficiently large bitmaps. -// It also has a large prolog/epilog overhead and should be used -// carefully in other cases. For 2 or fewer bitmaps, and/or smaller -// bitmaps, try BitmapReader and or other utilities. -func VisitWordsAndWrite(args []Bitmap, out []Bitmap, visitor func(in, out []uint64)) error { - bitLen, err := bitLength(args) - if err != nil { - return err - } - - rdrs, wrs := make([]*BitmapWordReader, len(args)), make([]*BitmapWordWriter, len(out)) - for i, in := range args { - rdrs[i] = NewBitmapWordReader(in.Data, int(in.Offset), int(in.Len)) - } - for i, o := range out { - wrs[i] = NewBitmapWordWriter(o.Data, int(o.Offset), int(o.Len)) - } - runVisitWordsAndWriteLoop(bitLen, rdrs, wrs, visitor) - return nil -} diff --git a/go/arrow/bitutil/bitmaps_test.go b/go/arrow/bitutil/bitmaps_test.go deleted file mode 100644 index 726bfa050cc4b..0000000000000 --- a/go/arrow/bitutil/bitmaps_test.go +++ /dev/null @@ -1,580 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package bitutil_test - -import ( - "fmt" - "math/rand" - "strconv" - "testing" - - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/suite" -) - -func bitmapFromSlice(vals []int, bitOffset int) []byte { - out := make([]byte, int(bitutil.BytesForBits(int64(len(vals)+bitOffset)))) - writer := bitutil.NewBitmapWriter(out, bitOffset, len(vals)) - for _, val := range vals { - if val == 1 { - writer.Set() - } else { - writer.Clear() - } - writer.Next() - } - writer.Finish() - - return out -} - -func assertReaderVals(t *testing.T, reader *bitutil.BitmapReader, vals []bool) { - for _, v := range vals { - if v { - assert.True(t, reader.Set()) - assert.False(t, reader.NotSet()) - } else { - assert.True(t, reader.NotSet()) - assert.False(t, reader.Set()) - } - reader.Next() - } -} - -func TestNormalOperation(t *testing.T) { - for _, offset := range []int{0, 1, 3, 5, 7, 8, 12, 13, 21, 38, 75, 120} { - buf := bitmapFromSlice([]int{0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1}, offset) - - reader := bitutil.NewBitmapReader(buf, offset, 14) - assertReaderVals(t, reader, []bool{false, true, true, true, false, false, false, true, false, true, false, true, false, true}) - } -} - -func TestDoesNotReadOutOfBounds(t *testing.T) { - var bitmap [16]byte - const length = 128 - - reader := bitutil.NewBitmapReader(bitmap[:], 0, length) - assert.EqualValues(t, length, reader.Len()) - assert.NotPanics(t, func() { - for i := 0; i < length; i++ { - assert.True(t, reader.NotSet()) - reader.Next() - } - }) - assert.EqualValues(t, length, reader.Pos()) - - reader = bitutil.NewBitmapReader(bitmap[:], 5, length-5) - assert.EqualValues(t, length-5, reader.Len()) - assert.NotPanics(t, func() { - for i := 0; i < length-5; i++ { - assert.True(t, reader.NotSet()) - reader.Next() - } - }) - assert.EqualValues(t, length-5, reader.Pos()) - - assert.NotPanics(t, func() { - reader = bitutil.NewBitmapReader(nil, 0, 0) - }) -} - -func writeToWriter(vals []int, wr *bitutil.BitmapWriter) { - for _, v := range vals { - if v != 0 { - wr.Set() - } else { - wr.Clear() - } - wr.Next() - } - wr.Finish() -} - -func TestBitmapWriter(t *testing.T) { - for _, fillByte := range []byte{0x00, 0xFF} { - { - bitmap := []byte{fillByte, fillByte, fillByte, fillByte} - wr := bitutil.NewBitmapWriter(bitmap, 0, 12) - writeToWriter([]int{0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}, wr) - // {0b00110110, 0b....1010, ........, ........} - assert.Equal(t, []byte{0x36, (0x0A | (fillByte & 0xF0)), fillByte, fillByte}, bitmap) - } - { - bitmap := []byte{fillByte, fillByte, fillByte, fillByte} - wr := bitutil.NewBitmapWriter(bitmap, 0, 12) - wr.AppendBools([]bool{false, true, true, false, true, true, false, false, false, true, false, true}) - assert.Equal(t, []byte{0x36, (0x0A | (fillByte & 0xF0)), fillByte, fillByte}, bitmap) - } - { - bitmap := []byte{fillByte, fillByte, fillByte, fillByte} - wr := bitutil.NewBitmapWriter(bitmap, 3, 12) - writeToWriter([]int{0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}, wr) - // {0b10110..., 0b.1010001, ........, ........} - assert.Equal(t, []byte{0xb0 | (fillByte & 0x07), 0x51 | (fillByte & 0x80), fillByte, fillByte}, bitmap) - } - { - bitmap := []byte{fillByte, fillByte, fillByte, fillByte} - wr := bitutil.NewBitmapWriter(bitmap, 3, 12) - wr.AppendBools([]bool{false, true, true, false}) - wr.AppendBools([]bool{true, true, false, false}) - wr.AppendBools([]bool{false, true, false, true}) - assert.Equal(t, []byte{0xb0 | (fillByte & 0x07), 0x51 | (fillByte & 0x80), fillByte, fillByte}, bitmap) - } - { - bitmap := []byte{fillByte, fillByte, fillByte, fillByte} - wr := bitutil.NewBitmapWriter(bitmap, 20, 12) - writeToWriter([]int{0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}, wr) - // {........, ........, 0b0110...., 0b10100011} - assert.Equal(t, []byte{fillByte, fillByte, 0x60 | (fillByte & 0x0f), 0xa3}, bitmap) - } - } -} - -func TestBitmapReader(t *testing.T) { - assertReaderVals := func(vals []int, rdr *bitutil.BitmapReader) { - for _, v := range vals { - if v != 0 { - assert.True(t, rdr.Set()) - assert.False(t, rdr.NotSet()) - } else { - assert.False(t, rdr.Set()) - assert.True(t, rdr.NotSet()) - } - rdr.Next() - } - } - - vals := []int{0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1} - - for _, offset := range []int{0, 1, 3, 5, 7, 8, 12, 13, 21, 38, 75, 120} { - bm := make([]byte, bitutil.BytesForBits(int64(len(vals)+offset))) - wr := bitutil.NewBitmapWriter(bm, offset, len(vals)) - writeToWriter(vals, wr) - - rdr := bitutil.NewBitmapReader(bm, offset, 14) - assertReaderVals(vals, rdr) - } -} - -func TestCopyBitmap(t *testing.T) { - const bufsize = 1000 - lengths := []int{bufsize*8 - 4, bufsize * 8} - offsets := []int{0, 12, 16, 32, 37, 63, 64, 128} - - buffer := make([]byte, bufsize) - - // random bytes - r := rand.New(rand.NewSource(0)) - r.Read(buffer) - - // add 16 byte padding - otherBuffer := make([]byte, bufsize+32) - r.Read(otherBuffer) - - for _, nbits := range lengths { - for _, offset := range offsets { - for _, destOffset := range offsets { - t.Run(fmt.Sprintf("bits %d off %d dst %d", nbits, offset, destOffset), func(t *testing.T) { - copyLen := nbits - offset - - bmCopy := make([]byte, len(otherBuffer)) - copy(bmCopy, otherBuffer) - - bitutil.CopyBitmap(buffer, offset, copyLen, bmCopy, destOffset) - - for i := 0; i < int(destOffset); i++ { - assert.Equalf(t, bitutil.BitIsSet(otherBuffer, i), bitutil.BitIsSet(bmCopy, i), "bit index: %d", i) - } - for i := 0; i < int(copyLen); i++ { - assert.Equalf(t, bitutil.BitIsSet(buffer, i+int(offset)), bitutil.BitIsSet(bmCopy, i+int(destOffset)), "bit index: %d", i) - } - for i := int(destOffset + copyLen); i < len(otherBuffer); i++ { - assert.Equalf(t, bitutil.BitIsSet(otherBuffer, i), bitutil.BitIsSet(bmCopy, i), "bit index: %d", i) - } - }) - } - } - } -} - -func benchmarkCopyBitmapN(b *testing.B, offsetSrc, offsetDest, n int) { - nbits := n * 8 - // random bytes - r := rand.New(rand.NewSource(0)) - src := make([]byte, n) - r.Read(src) - - length := nbits - offsetSrc - - dest := make([]byte, bitutil.BytesForBits(int64(length+offsetDest))) - - b.ResetTimer() - b.SetBytes(int64(n)) - for i := 0; i < b.N; i++ { - bitutil.CopyBitmap(src, offsetSrc, length, dest, offsetDest) - } -} - -// Fast path which is just a memcopy -func BenchmarkCopyBitmapWithoutOffset(b *testing.B) { - for _, sz := range []int{32, 128, 1000, 1024} { - b.Run(strconv.Itoa(sz), func(b *testing.B) { - benchmarkCopyBitmapN(b, 0, 0, sz) - }) - } -} - -// slow path where the source buffer is not byte aligned -func BenchmarkCopyBitmapWithOffset(b *testing.B) { - for _, sz := range []int{32, 128, 1000, 1024} { - b.Run(strconv.Itoa(sz), func(b *testing.B) { - benchmarkCopyBitmapN(b, 4, 0, sz) - }) - } -} - -// slow path where both source and dest are not byte aligned -func BenchmarkCopyBitmapWithOffsetBoth(b *testing.B) { - for _, sz := range []int{32, 128, 1000, 1024} { - b.Run(strconv.Itoa(sz), func(b *testing.B) { - benchmarkCopyBitmapN(b, 3, 7, sz) - }) - } -} - -const bufferSize = 1024 * 8 - -// a naive bitmap reader for a baseline - -type NaiveBitmapReader struct { - bitmap []byte - pos int -} - -func (n *NaiveBitmapReader) IsSet() bool { return bitutil.BitIsSet(n.bitmap, n.pos) } -func (n *NaiveBitmapReader) IsNotSet() bool { return !n.IsSet() } -func (n *NaiveBitmapReader) Next() { n.pos++ } - -// naive bitmap writer for a baseline - -type NaiveBitmapWriter struct { - bitmap []byte - pos int -} - -func (n *NaiveBitmapWriter) Set() { - byteOffset := n.pos / 8 - bitOffset := n.pos % 8 - bitSetMask := uint8(1 << bitOffset) - n.bitmap[byteOffset] |= bitSetMask -} - -func (n *NaiveBitmapWriter) Clear() { - byteOffset := n.pos / 8 - bitOffset := n.pos % 8 - bitClearMask := uint8(0xFF ^ (1 << bitOffset)) - n.bitmap[byteOffset] &= bitClearMask -} - -func (n *NaiveBitmapWriter) Next() { n.pos++ } -func (n *NaiveBitmapWriter) Finish() {} - -func randomBuffer(nbytes int64) []byte { - buf := make([]byte, nbytes) - r := rand.New(rand.NewSource(0)) - r.Read(buf) - return buf -} - -func BenchmarkBitmapReader(b *testing.B) { - buf := randomBuffer(bufferSize) - nbits := bufferSize * 8 - - b.Run("naive baseline", func(b *testing.B) { - b.SetBytes(2 * bufferSize) - for i := 0; i < b.N; i++ { - { - total := 0 - rdr := NaiveBitmapReader{buf, 0} - for j := 0; j < nbits; j++ { - if rdr.IsSet() { - total++ - } - rdr.Next() - } - } - { - total := 0 - rdr := NaiveBitmapReader{buf, 0} - for j := 0; j < nbits; j++ { - if rdr.IsSet() { - total++ - } - rdr.Next() - } - } - } - }) - b.Run("bitmap reader", func(b *testing.B) { - b.SetBytes(2 * bufferSize) - for i := 0; i < b.N; i++ { - { - total := 0 - rdr := bitutil.NewBitmapReader(buf, 0, nbits) - for j := 0; j < nbits; j++ { - if rdr.Set() { - total++ - } - rdr.Next() - } - } - { - total := 0 - rdr := bitutil.NewBitmapReader(buf, 0, nbits) - for j := 0; j < nbits; j++ { - if rdr.Set() { - total++ - } - rdr.Next() - } - } - } - }) -} - -type ( - noAllocFn func(left, right []byte, lOffset, rOffset int64, out []byte, outOffset, length int64) - allocFn func(mem memory.Allocator, left, right []byte, lOffset, rOffset int64, length, outOffset int64) *memory.Buffer - bitmapOp struct { - noAlloc noAllocFn - alloc allocFn - } -) - -type BitmapOpSuite struct { - suite.Suite -} - -func (s *BitmapOpSuite) testAligned(op bitmapOp, leftBits, rightBits []int, resultBits []bool) { - var ( - left, right []byte - out *memory.Buffer - length int64 - ) - for _, lOffset := range []int64{0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120, 65536} { - s.Run(fmt.Sprintf("left offset %d", lOffset), func() { - left = bitmapFromSlice(leftBits, int(lOffset)) - length = int64(len(leftBits)) - for _, rOffset := range []int64{lOffset, lOffset + 8, lOffset + 40} { - s.Run(fmt.Sprintf("right offset %d", rOffset), func() { - right = bitmapFromSlice(rightBits, int(rOffset)) - for _, outOffset := range []int64{lOffset, lOffset + 16, lOffset + 24} { - s.Run(fmt.Sprintf("out offset %d", outOffset), func() { - s.Run("zero-length", func() { - out = op.alloc(memory.DefaultAllocator, left, right, lOffset, rOffset, 0, outOffset) - s.EqualValues(bitutil.BytesForBits(outOffset), out.Len()) - expected := make([]byte, out.Len()) - if out.Len() > 0 { - s.Equal(expected, out.Bytes()) - } else { - s.Nil(out.Bytes()) - } - - memory.Set(out.Bytes(), 0xFF) - op.noAlloc(left, right, lOffset, rOffset, out.Bytes(), outOffset, 0) - if out.Len() > 0 { - memory.Set(expected, 0xFF) - s.Equal(expected, out.Bytes()) - } else { - s.Nil(out.Bytes()) - } - out.Release() - }) - - out = op.alloc(memory.DefaultAllocator, left, right, lOffset, rOffset, length, outOffset) - defer out.Release() - rdr := bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length)) - assertReaderVals(s.T(), rdr, resultBits) - - memory.Set(out.Bytes(), 0x00) - op.noAlloc(left, right, lOffset, rOffset, out.Bytes(), outOffset, length) - rdr = bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length)) - assertReaderVals(s.T(), rdr, resultBits) - }) - } - }) - } - }) - } -} - -func (s *BitmapOpSuite) testUnaligned(op bitmapOp, leftBits, rightBits []int, resultBits []bool) { - var ( - left, right []byte - out *memory.Buffer - length int64 - offsets = []int64{0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120, 65536} - ) - - for _, lOffset := range offsets { - s.Run(fmt.Sprintf("left offset %d", lOffset), func() { - left = bitmapFromSlice(leftBits, int(lOffset)) - length = int64(len(leftBits)) - for _, rOffset := range offsets { - s.Run(fmt.Sprintf("right offset %d", rOffset), func() { - right = bitmapFromSlice(rightBits, int(rOffset)) - for _, outOffset := range offsets { - s.Run(fmt.Sprintf("out offset %d", outOffset), func() { - s.Run("zero-length", func() { - out = op.alloc(memory.DefaultAllocator, left, right, lOffset, rOffset, 0, outOffset) - s.EqualValues(bitutil.BytesForBits(outOffset), out.Len()) - expected := make([]byte, out.Len()) - if out.Len() > 0 { - s.Equal(expected, out.Bytes()) - } else { - s.Nil(out.Bytes()) - } - - memory.Set(out.Bytes(), 0xFF) - op.noAlloc(left, right, lOffset, rOffset, out.Bytes(), outOffset, 0) - if out.Len() > 0 { - memory.Set(expected, 0xFF) - s.Equal(expected, out.Bytes()) - } else { - s.Nil(out.Bytes()) - } - out.Release() - }) - s.Run("alloc", func() { - out = op.alloc(memory.DefaultAllocator, left, right, lOffset, rOffset, length, outOffset) - rdr := bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length)) - assertReaderVals(s.T(), rdr, resultBits) - }) - s.Run("noalloc", func() { - memory.Set(out.Bytes(), 0x00) - op.noAlloc(left, right, lOffset, rOffset, out.Bytes(), outOffset, length) - rdr := bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length)) - assertReaderVals(s.T(), rdr, resultBits) - }) - }) - } - }) - } - }) - } -} - -func (s *BitmapOpSuite) TestBitmapAnd() { - op := bitmapOp{ - noAlloc: bitutil.BitmapAnd, - alloc: bitutil.BitmapAndAlloc, - } - - leftBits := []int{0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1} - rightBits := []int{0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0} - resultBits := []bool{false, false, true, false, false, false, false, false, false, true, false, false, false, false} - - s.Run("aligned", func() { - s.testAligned(op, leftBits, rightBits, resultBits) - }) - s.Run("unaligned", func() { - s.testUnaligned(op, leftBits, rightBits, resultBits) - }) -} - -func (s *BitmapOpSuite) TestBitmapOr() { - op := bitmapOp{ - noAlloc: bitutil.BitmapOr, - alloc: bitutil.BitmapOrAlloc, - } - - leftBits := []int{0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1} - rightBits := []int{0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0} - resultBits := []bool{false, true, true, true, true, true, false, true, true, true, true, true, true, true} - - s.Run("aligned", func() { - s.testAligned(op, leftBits, rightBits, resultBits) - }) - s.Run("unaligned", func() { - s.testUnaligned(op, leftBits, rightBits, resultBits) - }) -} - -func TestBitmapOps(t *testing.T) { - suite.Run(t, new(BitmapOpSuite)) -} - -func TestSmallBitmapOp(t *testing.T) { - // 0b01111111 0b11001111 - left := [2]byte{127, 207} - // 0b11111110 0b01111111 - right := [2]byte{254, 127} - // 0b01111110 0b01001111 - results := [2]byte{126, 79} - - var out [2]byte - bitutil.BitmapAnd(left[:], right[:], 0, 0, out[:], 0, 8) - assert.Equal(t, results[:1], out[:1]) - - bitutil.BitmapAnd(left[:], right[:], 0, 0, out[:], 0, 16) - assert.Equal(t, results, out) -} - -func createRandomBuffer(mem memory.Allocator, src *rand.Rand, nbytes int) []byte { - buf := mem.Allocate(nbytes) - src.Read(buf) - return buf -} - -func benchBitOpImpl(b *testing.B, nBytes, offset int, op noAllocFn) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - r := rand.New(rand.NewSource(0)) - - buf1 := createRandomBuffer(mem, r, nBytes) - buf2 := createRandomBuffer(mem, r, nBytes) - buf3 := createRandomBuffer(mem, r, nBytes) - b.Cleanup(func() { - mem.Free(buf1) - mem.Free(buf2) - mem.Free(buf3) - }) - - numBits := nBytes*8 - offset - b.ResetTimer() - b.SetBytes(bitutil.BytesForBits(int64(numBits)) * 2) - for i := 0; i < b.N; i++ { - op(buf1, buf2, 0, int64(offset), buf3, 0, int64(numBits)) - } -} - -func BenchmarkBitmapAnd(b *testing.B) { - sizes := []int{bufferSize * 4, bufferSize * 16} - offsets := []int{0, 1, 2} - - for _, s := range sizes { - b.Run(fmt.Sprintf("nbytes=%d", s), func(b *testing.B) { - for _, o := range offsets { - b.Run(fmt.Sprintf("%d", o), func(b *testing.B) { - benchBitOpImpl(b, s, o, bitutil.BitmapAnd) - }) - } - }) - } -} diff --git a/go/arrow/bitutil/bitutil.go b/go/arrow/bitutil/bitutil.go deleted file mode 100644 index c4b633c73aa40..0000000000000 --- a/go/arrow/bitutil/bitutil.go +++ /dev/null @@ -1,186 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package bitutil - -import ( - "math" - "math/bits" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow/memory" -) - -var ( - BitMask = [8]byte{1, 2, 4, 8, 16, 32, 64, 128} - FlippedBitMask = [8]byte{254, 253, 251, 247, 239, 223, 191, 127} -) - -// IsMultipleOf8 returns whether v is a multiple of 8. -func IsMultipleOf8(v int64) bool { return v&7 == 0 } - -// IsMultipleOf64 returns whether v is a multiple of 64 -func IsMultipleOf64(v int64) bool { return v&63 == 0 } - -func BytesForBits(bits int64) int64 { return (bits + 7) >> 3 } - -// NextPowerOf2 rounds x to the next power of two. -func NextPowerOf2(x int) int { return 1 << uint(bits.Len(uint(x))) } - -// CeilByte rounds size to the next multiple of 8. -func CeilByte(size int) int { return (size + 7) &^ 7 } - -// CeilByte64 rounds size to the next multiple of 8. -func CeilByte64(size int64) int64 { return (size + 7) &^ 7 } - -// BitIsSet returns true if the bit at index i in buf is set (1). -func BitIsSet(buf []byte, i int) bool { return (buf[uint(i)/8] & BitMask[byte(i)%8]) != 0 } - -// BitIsNotSet returns true if the bit at index i in buf is not set (0). -func BitIsNotSet(buf []byte, i int) bool { return (buf[uint(i)/8] & BitMask[byte(i)%8]) == 0 } - -// SetBit sets the bit at index i in buf to 1. -func SetBit(buf []byte, i int) { buf[uint(i)/8] |= BitMask[byte(i)%8] } - -// ClearBit sets the bit at index i in buf to 0. -func ClearBit(buf []byte, i int) { buf[uint(i)/8] &= FlippedBitMask[byte(i)%8] } - -// SetBitTo sets the bit at index i in buf to val. -func SetBitTo(buf []byte, i int, val bool) { - if val { - SetBit(buf, i) - } else { - ClearBit(buf, i) - } -} - -// CountSetBits counts the number of 1's in buf up to n bits. -func CountSetBits(buf []byte, offset, n int) int { - if offset > 0 { - return countSetBitsWithOffset(buf, offset, n) - } - - count := 0 - - uint64Bytes := n / uint64SizeBits * 8 - for _, v := range bytesToUint64(buf[:uint64Bytes]) { - count += bits.OnesCount64(v) - } - - for _, v := range buf[uint64Bytes : n/8] { - count += bits.OnesCount8(v) - } - - // tail bits - for i := n &^ 0x7; i < n; i++ { - if BitIsSet(buf, i) { - count++ - } - } - - return count -} - -func countSetBitsWithOffset(buf []byte, offset, n int) int { - count := 0 - - beg := offset - begU8 := roundUp(beg, uint64SizeBits) - - init := min(n, begU8-beg) - for i := offset; i < beg+init; i++ { - if BitIsSet(buf, i) { - count++ - } - } - - begU64 := BytesForBits(int64(beg + init)) - return count + CountSetBits(buf[begU64:], 0, n-init) -} - -func roundUp(v, f int) int { - return (v + (f - 1)) / f * f -} - -func min(a, b int) int { - if a < b { - return a - } - return b -} - -const ( - uint64SizeBytes = int(unsafe.Sizeof(uint64(0))) - uint64SizeBits = uint64SizeBytes * 8 -) - -var ( - // PrecedingBitmask is a convenience set of values as bitmasks for checking - // prefix bits of a byte - PrecedingBitmask = [8]byte{0, 1, 3, 7, 15, 31, 63, 127} - // TrailingBitmask is the bitwise complement version of kPrecedingBitmask - TrailingBitmask = [8]byte{255, 254, 252, 248, 240, 224, 192, 128} -) - -// SetBitsTo is a convenience function to quickly set or unset all the bits -// in a bitmap starting at startOffset for length bits. -func SetBitsTo(bits []byte, startOffset, length int64, areSet bool) { - if length == 0 { - return - } - - beg := startOffset - end := startOffset + length - var fill uint8 = 0 - if areSet { - fill = math.MaxUint8 - } - - byteBeg := beg / 8 - byteEnd := end/8 + 1 - - // don't modify bits before the startOffset by using this mask - firstByteMask := PrecedingBitmask[beg%8] - // don't modify bits past the length by using this mask - lastByteMask := TrailingBitmask[end%8] - - if byteEnd == byteBeg+1 { - // set bits within a single byte - onlyByteMask := firstByteMask - if end%8 != 0 { - onlyByteMask = firstByteMask | lastByteMask - } - - bits[byteBeg] &= onlyByteMask - bits[byteBeg] |= fill &^ onlyByteMask - return - } - - // set/clear trailing bits of first byte - bits[byteBeg] &= firstByteMask - bits[byteBeg] |= fill &^ firstByteMask - - if byteEnd-byteBeg > 2 { - memory.Set(bits[byteBeg+1:byteEnd-1], fill) - } - - if end%8 == 0 { - return - } - - bits[byteEnd-1] &= lastByteMask - bits[byteEnd-1] |= fill &^ lastByteMask -} diff --git a/go/arrow/bitutil/bitutil_bytes.go b/go/arrow/bitutil/bitutil_bytes.go deleted file mode 100644 index 09dd5cbc67d39..0000000000000 --- a/go/arrow/bitutil/bitutil_bytes.go +++ /dev/null @@ -1,37 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.20 || tinygo - -package bitutil - -import ( - "unsafe" -) - -func bytesToUint64(b []byte) []uint64 { - if len(b) < uint64SizeBytes { - return nil - } - - ptr := unsafe.SliceData(b) - if ptr == nil { - return nil - } - - return unsafe.Slice((*uint64)(unsafe.Pointer(ptr)), - len(b)/uint64SizeBytes) -} diff --git a/go/arrow/bitutil/bitutil_test.go b/go/arrow/bitutil/bitutil_test.go deleted file mode 100644 index c03bf5268a5ff..0000000000000 --- a/go/arrow/bitutil/bitutil_test.go +++ /dev/null @@ -1,320 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package bitutil_test - -import ( - "fmt" - "math/rand" - "testing" - - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/internal/testing/tools" - "github.com/stretchr/testify/assert" -) - -func TestIsMultipleOf8(t *testing.T) { - for _, tc := range []struct { - v int64 - want bool - }{ - {-16, true}, - {-9, false}, - {-8, true}, - {-7, false}, - {-4, false}, - {-1, false}, - {-0, true}, - {0, true}, - {1, false}, - {4, false}, - {7, false}, - {8, true}, - {9, false}, - {16, true}, - } { - t.Run(fmt.Sprintf("v=%d", tc.v), func(t *testing.T) { - got := bitutil.IsMultipleOf8(tc.v) - if got != tc.want { - t.Fatalf("IsMultipleOf8(%d): got=%v, want=%v", tc.v, got, tc.want) - } - }) - } -} - -func TestCeilByte(t *testing.T) { - tests := []struct { - name string - in, exp int - }{ - {"zero", 0, 0}, - {"five", 5, 8}, - {"sixteen", 16, 16}, - } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - got := bitutil.CeilByte(test.in) - assert.Equal(t, test.exp, got) - }) - } -} - -func TestBitIsSet(t *testing.T) { - buf := make([]byte, 2) - buf[0] = 0xa1 - buf[1] = 0xc2 - exp := []bool{true, false, false, false, false, true, false, true, false, true, false, false, false, false, true, true} - var got []bool - for i := 0; i < 0x10; i++ { - got = append(got, bitutil.BitIsSet(buf, i)) - } - assert.Equal(t, exp, got) -} - -func TestBitIsNotSet(t *testing.T) { - buf := make([]byte, 2) - buf[0] = 0xa1 - buf[1] = 0xc2 - exp := []bool{false, true, true, true, true, false, true, false, true, false, true, true, true, true, false, false} - var got []bool - for i := 0; i < 0x10; i++ { - got = append(got, bitutil.BitIsNotSet(buf, i)) - } - assert.Equal(t, exp, got) -} - -func TestClearBit(t *testing.T) { - buf := make([]byte, 2) - buf[0] = 0xff - buf[1] = 0xff - for i, v := range []bool{false, true, true, true, true, false, true, false, true, false, true, true, true, true, false, false} { - if v { - bitutil.ClearBit(buf, i) - } - } - assert.Equal(t, []byte{0xa1, 0xc2}, buf) -} - -func TestSetBit(t *testing.T) { - buf := make([]byte, 2) - for i, v := range []bool{true, false, false, false, false, true, false, true, false, true, false, false, false, false, true, true} { - if v { - bitutil.SetBit(buf, i) - } - } - assert.Equal(t, []byte{0xa1, 0xc2}, buf) -} - -func TestSetBitTo(t *testing.T) { - buf := make([]byte, 2) - for i, v := range []bool{true, false, false, false, false, true, false, true, false, true, false, false, false, false, true, true} { - bitutil.SetBitTo(buf, i, v) - } - assert.Equal(t, []byte{0xa1, 0xc2}, buf) -} - -func TestCountSetBits(t *testing.T) { - tests := []struct { - name string - buf []byte - off int - n int - exp int - }{ - {"some 03 bits", bbits(0x11000000), 0, 3, 2}, - {"some 11 bits", bbits(0x11000011, 0x01000000), 0, 11, 5}, - {"some 72 bits", bbits(0x11001010, 0x11110000, 0x00001111, 0x11000011, 0x11001010, 0x11110000, 0x00001111, 0x11000011, 0x10001001), 0, 9 * 8, 35}, - {"all 08 bits", bbits(0x11111110), 0, 8, 7}, - {"all 03 bits", bbits(0x11100001), 0, 3, 3}, - {"all 11 bits", bbits(0x11111111, 0x11111111), 0, 11, 11}, - {"all 72 bits", bbits(0x11111111, 0x11111111, 0x11111111, 0x11111111, 0x11111111, 0x11111111, 0x11111111, 0x11111111, 0x11111111), 0, 9 * 8, 72}, - {"none 03 bits", bbits(0x00000001), 0, 3, 0}, - {"none 11 bits", bbits(0x00000000, 0x00000000), 0, 11, 0}, - {"none 72 bits", bbits(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), 0, 9 * 8, 0}, - - {"some 03 bits - offset+1", bbits(0x11000000), 1, 3, 1}, - {"some 03 bits - offset+2", bbits(0x11000000), 2, 3, 0}, - {"some 11 bits - offset+1", bbits(0x11000011, 0x01000000, 0x00000000), 1, 11, 4}, - {"some 11 bits - offset+2", bbits(0x11000011, 0x01000000, 0x00000000), 2, 11, 3}, - {"some 11 bits - offset+3", bbits(0x11000011, 0x01000000, 0x00000000), 3, 11, 3}, - {"some 11 bits - offset+6", bbits(0x11000011, 0x01000000, 0x00000000), 6, 11, 3}, - {"some 11 bits - offset+7", bbits(0x11000011, 0x01000000, 0x00000000), 7, 11, 2}, - {"some 11 bits - offset+8", bbits(0x11000011, 0x01000000, 0x00000000), 8, 11, 1}, - } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - got := bitutil.CountSetBits(test.buf, test.off, test.n) - assert.Equal(t, test.exp, got) - }) - } -} - -func TestCountSetBitsOffset(t *testing.T) { - slowCountSetBits := func(buf []byte, offset, n int) int { - count := 0 - for i := offset; i < offset+n; i++ { - if bitutil.BitIsSet(buf, i) { - count++ - } - } - return count - } - - const ( - bufSize = 1000 - nbits = bufSize * 8 - ) - - offsets := []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 32, 37, 63, 64, 128, nbits - 30, nbits - 64} - - buf := make([]byte, bufSize) - - rng := rand.New(rand.NewSource(0)) - _, err := rng.Read(buf) - if err != nil { - t.Fatal(err) - } - - for i, offset := range offsets { - want := slowCountSetBits(buf, offset, nbits-offset) - got := bitutil.CountSetBits(buf, offset, nbits-offset) - if got != want { - t.Errorf("offset[%2d/%2d]=%5d. got=%5d, want=%5d", i+1, len(offsets), offset, got, want) - } - } -} - -func TestSetBitsTo(t *testing.T) { - for _, fillByte := range []byte{0x00, 0xFF} { - { - // set within a byte - bm := []byte{fillByte, fillByte, fillByte, fillByte} - bitutil.SetBitsTo(bm, 2, 2, true) - bitutil.SetBitsTo(bm, 4, 2, false) - assert.Equal(t, []byte{(fillByte &^ 0x3C) | 0xC}, bm[:1]) - } - { - // test straddling a single byte boundary - bm := []byte{fillByte, fillByte, fillByte, fillByte} - bitutil.SetBitsTo(bm, 4, 7, true) - bitutil.SetBitsTo(bm, 11, 7, false) - assert.Equal(t, []byte{(fillByte & 0xF) | 0xF0, 0x7, fillByte &^ 0x3}, bm[:3]) - } - { - // test byte aligned end - bm := []byte{fillByte, fillByte, fillByte, fillByte} - bitutil.SetBitsTo(bm, 4, 4, true) - bitutil.SetBitsTo(bm, 8, 8, false) - assert.Equal(t, []byte{(fillByte & 0xF) | 0xF0, 0x00, fillByte}, bm[:3]) - } - { - // test byte aligned end, multiple bytes - bm := []byte{fillByte, fillByte, fillByte, fillByte} - bitutil.SetBitsTo(bm, 0, 24, false) - falseByte := byte(0) - assert.Equal(t, []byte{falseByte, falseByte, falseByte, fillByte}, bm) - } - } -} - -func bbits(v ...int32) []byte { - return tools.IntsToBitsLSB(v...) -} - -func BenchmarkBitIsSet(b *testing.B) { - buf := make([]byte, 32) - b.ResetTimer() - for i := 0; i < b.N; i++ { - bitutil.BitIsSet(buf, (i%32)&0x1a) - } -} - -func BenchmarkSetBit(b *testing.B) { - buf := make([]byte, 32) - b.ResetTimer() - for i := 0; i < b.N; i++ { - bitutil.SetBit(buf, (i%32)&0x1a) - } -} - -func BenchmarkSetBitTo(b *testing.B) { - vals := []bool{true, false, false, false, false, true, false, true, false, true, false, false, false, false, true, true} - buf := make([]byte, 32) - b.ResetTimer() - for i := 0; i < b.N; i++ { - bitutil.SetBitTo(buf, i%32, vals[i%len(vals)]) - } -} - -var ( - intval int -) - -func benchmarkCountSetBitsN(b *testing.B, offset, n int) { - nn := n/8 + 1 - buf := make([]byte, nn) - //src := [4]byte{0x1f, 0xaa, 0xba, 0x11} - src := [4]byte{0x01, 0x01, 0x01, 0x01} - for i := 0; i < nn; i++ { - buf[i] = src[i&0x3] - } - b.ResetTimer() - var res int - for i := 0; i < b.N; i++ { - res = bitutil.CountSetBits(buf, offset, n-offset) - } - intval = res -} - -func BenchmarkCountSetBits_3(b *testing.B) { - benchmarkCountSetBitsN(b, 0, 3) -} - -func BenchmarkCountSetBits_32(b *testing.B) { - benchmarkCountSetBitsN(b, 0, 32) -} - -func BenchmarkCountSetBits_128(b *testing.B) { - benchmarkCountSetBitsN(b, 0, 128) -} - -func BenchmarkCountSetBits_1000(b *testing.B) { - benchmarkCountSetBitsN(b, 0, 1000) -} - -func BenchmarkCountSetBits_1024(b *testing.B) { - benchmarkCountSetBitsN(b, 0, 1024) -} - -func BenchmarkCountSetBitsOffset_3(b *testing.B) { - benchmarkCountSetBitsN(b, 1, 3) -} - -func BenchmarkCountSetBitsOffset_32(b *testing.B) { - benchmarkCountSetBitsN(b, 1, 32) -} - -func BenchmarkCountSetBitsOffset_128(b *testing.B) { - benchmarkCountSetBitsN(b, 1, 128) -} - -func BenchmarkCountSetBitsOffset_1000(b *testing.B) { - benchmarkCountSetBitsN(b, 1, 1000) -} - -func BenchmarkCountSetBitsOffset_1024(b *testing.B) { - benchmarkCountSetBitsN(b, 1, 1024) -} diff --git a/go/arrow/bitutil/endian_default.go b/go/arrow/bitutil/endian_default.go deleted file mode 100644 index ecbbaa70d04b6..0000000000000 --- a/go/arrow/bitutil/endian_default.go +++ /dev/null @@ -1,34 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !s390x -// +build !s390x - -package bitutil - -import ( - "unsafe" -) - -var toFromLEFunc = func(in uint64) uint64 { return in } - -func getLSB(v uint64) byte { - return (*[8]byte)(unsafe.Pointer(&v))[0] -} - -func setLSB(v *uint64, b byte) { - (*[8]byte)(unsafe.Pointer(v))[0] = b -} diff --git a/go/arrow/bitutil/endian_s390x.go b/go/arrow/bitutil/endian_s390x.go deleted file mode 100644 index e99605f5848fa..0000000000000 --- a/go/arrow/bitutil/endian_s390x.go +++ /dev/null @@ -1,32 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package bitutil - -import ( - "math/bits" - "unsafe" -) - -var toFromLEFunc = bits.ReverseBytes64 - -func getLSB(v uint64) byte { - return (*[8]byte)(unsafe.Pointer(&v))[7] -} - -func setLSB(v *uint64, b byte) { - (*[8]byte)(unsafe.Pointer(v))[7] = b -} diff --git a/go/arrow/cdata/arrow/c/abi.h b/go/arrow/cdata/arrow/c/abi.h deleted file mode 100644 index d58417e6fbcf2..0000000000000 --- a/go/arrow/cdata/arrow/c/abi.h +++ /dev/null @@ -1,111 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef ARROW_C_DATA_INTERFACE -#define ARROW_C_DATA_INTERFACE - -#define ARROW_FLAG_DICTIONARY_ORDERED 1 -#define ARROW_FLAG_NULLABLE 2 -#define ARROW_FLAG_MAP_KEYS_SORTED 4 - -struct ArrowSchema { - // Array type description - const char* format; - const char* name; - const char* metadata; - int64_t flags; - int64_t n_children; - struct ArrowSchema** children; - struct ArrowSchema* dictionary; - - // Release callback - void (*release)(struct ArrowSchema*); - // Opaque producer-specific data - void* private_data; -}; - -struct ArrowArray { - // Array data description - int64_t length; - int64_t null_count; - int64_t offset; - int64_t n_buffers; - int64_t n_children; - const void** buffers; - struct ArrowArray** children; - struct ArrowArray* dictionary; - - // Release callback - void (*release)(struct ArrowArray*); - // Opaque producer-specific data - void* private_data; -}; - -#endif // ARROW_C_DATA_INTERFACE - -#ifndef ARROW_C_STREAM_INTERFACE -#define ARROW_C_STREAM_INTERFACE - -struct ArrowArrayStream { - // Callback to get the stream type - // (will be the same for all arrays in the stream). - // - // Return value: 0 if successful, an `errno`-compatible error code otherwise. - // - // If successful, the ArrowSchema must be released independently from the stream. - int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out); - - // Callback to get the next array - // (if no error and the array is released, the stream has ended) - // - // Return value: 0 if successful, an `errno`-compatible error code otherwise. - // - // If successful, the ArrowArray must be released independently from the stream. - int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out); - - // Callback to get optional detailed error information. - // This must only be called if the last stream operation failed - // with a non-0 return code. - // - // Return value: pointer to a null-terminated character array describing - // the last error, or NULL if no description is available. - // - // The returned pointer is only valid until the next operation on this stream - // (including release). - const char* (*get_last_error)(struct ArrowArrayStream*); - - // Release callback: release the stream's own resources. - // Note that arrays returned by `get_next` must be individually released. - void (*release)(struct ArrowArrayStream*); - - // Opaque producer-specific data - void* private_data; -}; - -#endif // ARROW_C_STREAM_INTERFACE - -#ifdef __cplusplus -} -#endif diff --git a/go/arrow/cdata/arrow/c/helpers.h b/go/arrow/cdata/arrow/c/helpers.h deleted file mode 100644 index 6581403b57c46..0000000000000 --- a/go/arrow/cdata/arrow/c/helpers.h +++ /dev/null @@ -1,117 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include - -#include "arrow/c/abi.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/// Query whether the C schema is released -static inline int ArrowSchemaIsReleased(const struct ArrowSchema* schema) { - return schema->release == NULL; -} - -/// Mark the C schema released (for use in release callbacks) -static inline void ArrowSchemaMarkReleased(struct ArrowSchema* schema) { - schema->release = NULL; -} - -/// Move the C schema from `src` to `dest` -/// -/// Note `dest` must *not* point to a valid schema already, otherwise there -/// will be a memory leak. -static inline void ArrowSchemaMove(struct ArrowSchema* src, struct ArrowSchema* dest) { - assert(dest != src); - assert(!ArrowSchemaIsReleased(src)); - memcpy(dest, src, sizeof(struct ArrowSchema)); - ArrowSchemaMarkReleased(src); -} - -/// Release the C schema, if necessary, by calling its release callback -static inline void ArrowSchemaRelease(struct ArrowSchema* schema) { - if (!ArrowSchemaIsReleased(schema)) { - schema->release(schema); - assert(ArrowSchemaIsReleased(schema)); - } -} - -/// Query whether the C array is released -static inline int ArrowArrayIsReleased(const struct ArrowArray* array) { - return array->release == NULL; -} - -/// Mark the C array released (for use in release callbacks) -static inline void ArrowArrayMarkReleased(struct ArrowArray* array) { array->release = NULL; } - -/// Move the C array from `src` to `dest` -/// -/// Note `dest` must *not* point to a valid array already, otherwise there -/// will be a memory leak. -static inline void ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dest) { - assert(dest != src); - assert(!ArrowArrayIsReleased(src)); - memcpy(dest, src, sizeof(struct ArrowArray)); - ArrowArrayMarkReleased(src); -} - -/// Release the C array, if necessary, by calling its release callback -static inline void ArrowArrayRelease(struct ArrowArray* array) { - if (!ArrowArrayIsReleased(array)) { - array->release(array); - assert(ArrowArrayIsReleased(array)); - } -} - -/// Query whether the C array stream is released -static inline int ArrowArrayStreamIsReleased(const struct ArrowArrayStream* stream) { - return stream->release == NULL; -} - -/// Mark the C array stream released (for use in release callbacks) -static inline void ArrowArrayStreamMarkReleased(struct ArrowArrayStream* stream) { - stream->release = NULL; -} - -/// Move the C array stream from `src` to `dest` -/// -/// Note `dest` must *not* point to a valid stream already, otherwise there -/// will be a memory leak. -static inline void ArrowArrayStreamMove(struct ArrowArrayStream* src, - struct ArrowArrayStream* dest) { - assert(dest != src); - assert(!ArrowArrayStreamIsReleased(src)); - memcpy(dest, src, sizeof(struct ArrowArrayStream)); - ArrowArrayStreamMarkReleased(src); -} - -/// Release the C array stream, if necessary, by calling its release callback -static inline void ArrowArrayStreamRelease(struct ArrowArrayStream* stream) { - if (!ArrowArrayStreamIsReleased(stream)) { - stream->release(stream); - assert(ArrowArrayStreamIsReleased(stream)); - } -} - -#ifdef __cplusplus -} -#endif diff --git a/go/arrow/cdata/cdata.go b/go/arrow/cdata/cdata.go deleted file mode 100644 index 0562eaed0fb7a..0000000000000 --- a/go/arrow/cdata/cdata.go +++ /dev/null @@ -1,1028 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build cgo -// +build cgo - -package cdata - -// implement handling of the Arrow C Data Interface. At least from a consuming side. - -// #include "arrow/c/abi.h" -// #include "arrow/c/helpers.h" -// #include -// int stream_get_schema(struct ArrowArrayStream* st, struct ArrowSchema* out) { return st->get_schema(st, out); } -// int stream_get_next(struct ArrowArrayStream* st, struct ArrowArray* out) { return st->get_next(st, out); } -// const char* stream_get_last_error(struct ArrowArrayStream* st) { return st->get_last_error(st); } -// struct ArrowArray* get_arr() { -// struct ArrowArray* out = (struct ArrowArray*)(malloc(sizeof(struct ArrowArray))); -// memset(out, 0, sizeof(struct ArrowArray)); -// return out; -// } -// struct ArrowArrayStream* get_stream() { -// struct ArrowArrayStream* out = (struct ArrowArrayStream*)malloc(sizeof(struct ArrowArrayStream)); -// memset(out, 0, sizeof(struct ArrowArrayStream)); -// return out; -// } -// -import "C" - -import ( - "errors" - "fmt" - "io" - "runtime" - "strconv" - "strings" - "syscall" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/memory" - "golang.org/x/xerrors" -) - -type ( - // CArrowSchema is the C Data Interface for ArrowSchemas defined in abi.h - CArrowSchema = C.struct_ArrowSchema - // CArrowArray is the C Data Interface object for Arrow Arrays as defined in abi.h - CArrowArray = C.struct_ArrowArray - // CArrowArrayStream is the C Stream Interface object for handling streams of record batches. - CArrowArrayStream = C.struct_ArrowArrayStream -) - -// Map from the defined strings to their corresponding arrow.DataType interface -// object instances, for types that don't require params. -var formatToSimpleType = map[string]arrow.DataType{ - "n": arrow.Null, - "b": arrow.FixedWidthTypes.Boolean, - "c": arrow.PrimitiveTypes.Int8, - "C": arrow.PrimitiveTypes.Uint8, - "s": arrow.PrimitiveTypes.Int16, - "S": arrow.PrimitiveTypes.Uint16, - "i": arrow.PrimitiveTypes.Int32, - "I": arrow.PrimitiveTypes.Uint32, - "l": arrow.PrimitiveTypes.Int64, - "L": arrow.PrimitiveTypes.Uint64, - "e": arrow.FixedWidthTypes.Float16, - "f": arrow.PrimitiveTypes.Float32, - "g": arrow.PrimitiveTypes.Float64, - "z": arrow.BinaryTypes.Binary, - "Z": arrow.BinaryTypes.LargeBinary, - "u": arrow.BinaryTypes.String, - "U": arrow.BinaryTypes.LargeString, - "vz": arrow.BinaryTypes.BinaryView, - "vu": arrow.BinaryTypes.StringView, - "tdD": arrow.FixedWidthTypes.Date32, - "tdm": arrow.FixedWidthTypes.Date64, - "tts": arrow.FixedWidthTypes.Time32s, - "ttm": arrow.FixedWidthTypes.Time32ms, - "ttu": arrow.FixedWidthTypes.Time64us, - "ttn": arrow.FixedWidthTypes.Time64ns, - "tDs": arrow.FixedWidthTypes.Duration_s, - "tDm": arrow.FixedWidthTypes.Duration_ms, - "tDu": arrow.FixedWidthTypes.Duration_us, - "tDn": arrow.FixedWidthTypes.Duration_ns, - "tiM": arrow.FixedWidthTypes.MonthInterval, - "tiD": arrow.FixedWidthTypes.DayTimeInterval, - "tin": arrow.FixedWidthTypes.MonthDayNanoInterval, -} - -// decode metadata from C which is encoded as -// -// [int32] -> number of metadata pairs -// for 0..n -// [int32] -> number of bytes in key -// [n bytes] -> key value -// [int32] -> number of bytes in value -// [n bytes] -> value -func decodeCMetadata(md *C.char) arrow.Metadata { - if md == nil { - return arrow.Metadata{} - } - - // don't copy the bytes, just reference them directly - const maxlen = 0x7fffffff - data := (*[maxlen]byte)(unsafe.Pointer(md))[:] - - readint32 := func() int32 { - v := *(*int32)(unsafe.Pointer(&data[0])) - data = data[arrow.Int32SizeBytes:] - return v - } - - readstr := func() string { - l := readint32() - s := string(data[:l]) - data = data[l:] - return s - } - - npairs := readint32() - if npairs == 0 { - return arrow.Metadata{} - } - - keys := make([]string, npairs) - vals := make([]string, npairs) - - for i := int32(0); i < npairs; i++ { - keys[i] = readstr() - vals[i] = readstr() - } - - return arrow.NewMetadata(keys, vals) -} - -// convert a C.ArrowSchema to an arrow.Field to maintain metadata with the schema -func importSchema(schema *CArrowSchema) (ret arrow.Field, err error) { - // always release, even on error - defer C.ArrowSchemaRelease(schema) - - var childFields []arrow.Field - if schema.n_children > 0 { - // call ourselves recursively if there are children. - // set up a slice to reference safely - schemaChildren := unsafe.Slice(schema.children, schema.n_children) - childFields = make([]arrow.Field, schema.n_children) - for i, c := range schemaChildren { - childFields[i], err = importSchema((*CArrowSchema)(c)) - if err != nil { - return - } - } - } - - // copy the schema name from the c-string - ret.Name = C.GoString(schema.name) - ret.Nullable = (schema.flags & C.ARROW_FLAG_NULLABLE) != 0 - ret.Metadata = decodeCMetadata(schema.metadata) - - // copies the c-string here, but it's very small - f := C.GoString(schema.format) - // handle our non-parameterized simple types. - dt, ok := formatToSimpleType[f] - if ok { - ret.Type = dt - - if schema.dictionary != nil { - valueField, err := importSchema(schema.dictionary) - if err != nil { - return ret, err - } - - ret.Type = &arrow.DictionaryType{ - IndexType: ret.Type, - ValueType: valueField.Type, - Ordered: schema.dictionary.flags&C.ARROW_FLAG_DICTIONARY_ORDERED != 0} - } - - return - } - - // handle types with params via colon - typs := strings.Split(f, ":") - defaulttz := "" - switch typs[0] { - case "tss": - tz := typs[1] - if len(typs[1]) == 0 { - tz = defaulttz - } - dt = &arrow.TimestampType{Unit: arrow.Second, TimeZone: tz} - case "tsm": - tz := typs[1] - if len(typs[1]) == 0 { - tz = defaulttz - } - dt = &arrow.TimestampType{Unit: arrow.Millisecond, TimeZone: tz} - case "tsu": - tz := typs[1] - if len(typs[1]) == 0 { - tz = defaulttz - } - dt = &arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: tz} - case "tsn": - tz := typs[1] - if len(typs[1]) == 0 { - tz = defaulttz - } - dt = &arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: tz} - case "w": // fixed size binary is "w:##" where ## is the byteWidth - byteWidth, err := strconv.Atoi(typs[1]) - if err != nil { - return ret, err - } - dt = &arrow.FixedSizeBinaryType{ByteWidth: byteWidth} - case "d": // decimal types are d:,[,] size is assumed 128 if left out - props := typs[1] - propList := strings.Split(props, ",") - bitwidth := 128 - var precision, scale int - - if len(propList) < 2 || len(propList) > 3 { - return ret, xerrors.Errorf("invalid decimal spec '%s': wrong number of properties", f) - } else if len(propList) == 3 { - bitwidth, err = strconv.Atoi(propList[2]) - if err != nil { - return ret, xerrors.Errorf("could not parse decimal bitwidth in '%s': %s", f, err.Error()) - } - } - - precision, err = strconv.Atoi(propList[0]) - if err != nil { - return ret, xerrors.Errorf("could not parse decimal precision in '%s': %s", f, err.Error()) - } - - scale, err = strconv.Atoi(propList[1]) - if err != nil { - return ret, xerrors.Errorf("could not parse decimal scale in '%s': %s", f, err.Error()) - } - - if bitwidth == 128 { - dt = &arrow.Decimal128Type{Precision: int32(precision), Scale: int32(scale)} - } else if bitwidth == 256 { - dt = &arrow.Decimal256Type{Precision: int32(precision), Scale: int32(scale)} - } else { - return ret, xerrors.Errorf("only decimal128 and decimal256 are supported, got '%s'", f) - } - } - - if f[0] == '+' { // types with children - switch f[1] { - case 'l': // list - dt = arrow.ListOfField(childFields[0]) - case 'L': // large list - dt = arrow.LargeListOfField(childFields[0]) - case 'v': // list view/large list view - if f[2] == 'l' { - dt = arrow.ListViewOfField(childFields[0]) - } else if f[2] == 'L' { - dt = arrow.LargeListViewOfField(childFields[0]) - } - case 'w': // fixed size list is w:# where # is the list size. - listSize, err := strconv.Atoi(strings.Split(f, ":")[1]) - if err != nil { - return ret, err - } - - dt = arrow.FixedSizeListOfField(int32(listSize), childFields[0]) - case 's': // struct - dt = arrow.StructOf(childFields...) - case 'r': // run-end encoded - if len(childFields) != 2 { - return ret, fmt.Errorf("%w: run-end encoded arrays must have 2 children", arrow.ErrInvalid) - } - dt = arrow.RunEndEncodedOf(childFields[0].Type, childFields[1].Type) - case 'm': // map type is basically a list of structs. - st := childFields[0].Type.(*arrow.StructType) - dt = arrow.MapOf(st.Field(0).Type, st.Field(1).Type) - dt.(*arrow.MapType).KeysSorted = (schema.flags & C.ARROW_FLAG_MAP_KEYS_SORTED) != 0 - case 'u': // union - var mode arrow.UnionMode - switch f[2] { - case 'd': - mode = arrow.DenseMode - case 's': - mode = arrow.SparseMode - default: - err = fmt.Errorf("%w: invalid union type", arrow.ErrInvalid) - return - } - - codes := strings.Split(strings.Split(f, ":")[1], ",") - typeCodes := make([]arrow.UnionTypeCode, 0, len(codes)) - for _, i := range codes { - v, e := strconv.ParseInt(i, 10, 8) - if e != nil { - err = fmt.Errorf("%w: invalid type code: %s", arrow.ErrInvalid, e) - return - } - if v < 0 { - err = fmt.Errorf("%w: negative type code in union: format string %s", arrow.ErrInvalid, f) - return - } - typeCodes = append(typeCodes, arrow.UnionTypeCode(v)) - } - - if len(childFields) != len(typeCodes) { - err = fmt.Errorf("%w: ArrowArray struct number of children incompatible with format string", arrow.ErrInvalid) - return - } - - dt = arrow.UnionOf(mode, childFields, typeCodes) - } - } - - if dt == nil { - // if we didn't find a type, then it's something we haven't implemented. - err = xerrors.New("unimplemented type") - } else { - ret.Type = dt - } - - return -} - -// importer to keep track when importing C ArrowArray objects. -type cimporter struct { - dt arrow.DataType - arr *CArrowArray - data arrow.ArrayData - parent *cimporter - children []cimporter - cbuffers []*C.void - - alloc *importAllocator -} - -func (imp *cimporter) importChild(parent *cimporter, src *CArrowArray) error { - imp.parent, imp.arr, imp.alloc = parent, src, parent.alloc - return imp.doImport() -} - -// import any child arrays for lists, structs, and so on. -func (imp *cimporter) doImportChildren() error { - children := unsafe.Slice(imp.arr.children, imp.arr.n_children) - - if len(children) > 0 { - imp.children = make([]cimporter, len(children)) - } - - // handle the cases - switch imp.dt.ID() { - case arrow.LIST: // only one child to import - imp.children[0].dt = imp.dt.(*arrow.ListType).Elem() - if err := imp.children[0].importChild(imp, children[0]); err != nil { - return err - } - case arrow.LARGE_LIST: // only one child to import - imp.children[0].dt = imp.dt.(*arrow.LargeListType).Elem() - if err := imp.children[0].importChild(imp, children[0]); err != nil { - return err - } - case arrow.LIST_VIEW: // only one child to import - imp.children[0].dt = imp.dt.(*arrow.ListViewType).Elem() - if err := imp.children[0].importChild(imp, children[0]); err != nil { - return err - } - case arrow.LARGE_LIST_VIEW: // only one child to import - imp.children[0].dt = imp.dt.(*arrow.LargeListViewType).Elem() - if err := imp.children[0].importChild(imp, children[0]); err != nil { - return err - } - case arrow.FIXED_SIZE_LIST: // only one child to import - imp.children[0].dt = imp.dt.(*arrow.FixedSizeListType).Elem() - if err := imp.children[0].importChild(imp, children[0]); err != nil { - return err - } - case arrow.STRUCT: // import all the children - st := imp.dt.(*arrow.StructType) - for i, c := range children { - imp.children[i].dt = st.Field(i).Type - imp.children[i].importChild(imp, c) - } - case arrow.RUN_END_ENCODED: // import run-ends and values - st := imp.dt.(*arrow.RunEndEncodedType) - imp.children[0].dt = st.RunEnds() - if err := imp.children[0].importChild(imp, children[0]); err != nil { - return err - } - imp.children[1].dt = st.Encoded() - if err := imp.children[1].importChild(imp, children[1]); err != nil { - return err - } - case arrow.MAP: // only one child to import, it's a struct array - imp.children[0].dt = imp.dt.(*arrow.MapType).Elem() - if err := imp.children[0].importChild(imp, children[0]); err != nil { - return err - } - case arrow.DENSE_UNION: - dt := imp.dt.(*arrow.DenseUnionType) - for i, c := range children { - imp.children[i].dt = dt.Fields()[i].Type - imp.children[i].importChild(imp, c) - } - case arrow.SPARSE_UNION: - dt := imp.dt.(*arrow.SparseUnionType) - for i, c := range children { - imp.children[i].dt = dt.Fields()[i].Type - imp.children[i].importChild(imp, c) - } - } - - return nil -} - -func (imp *cimporter) initarr() { - imp.arr = C.get_arr() - if imp.alloc == nil { - imp.alloc = &importAllocator{arr: imp.arr} - } -} - -func (imp *cimporter) doImportArr(src *CArrowArray) error { - imp.arr = C.get_arr() - C.ArrowArrayMove(src, imp.arr) - if imp.alloc == nil { - imp.alloc = &importAllocator{arr: imp.arr} - } - - // we tie the releasing of the array to when the buffers are - // cleaned up, so if there are no buffers that we've imported - // such as for a null array or a nested array with no bitmap - // and only null columns, then we can release the CArrowArray - // struct immediately after import, since we have no imported - // memory that we have to track the lifetime of. - defer func() { - if imp.alloc.bufCount == 0 { - C.ArrowArrayRelease(imp.arr) - C.free(unsafe.Pointer(imp.arr)) - } - }() - - return imp.doImport() -} - -// import is called recursively as needed for importing an array and its children -// in order to generate array.Data objects -func (imp *cimporter) doImport() error { - // move the array from the src object passed in to the one referenced by - // this importer. That way we can set up a finalizer on the created - // arrow.ArrayData object so we clean up our Array's memory when garbage collected. - defer func(arr *CArrowArray) { - // this should only occur in the case of an error happening - // during import, at which point we need to clean up the - // ArrowArray struct we allocated. - if imp.data == nil { - C.free(unsafe.Pointer(arr)) - } - }(imp.arr) - - // import any children - if err := imp.doImportChildren(); err != nil { - return err - } - - for _, c := range imp.children { - if c.data != nil { - defer c.data.Release() - } - } - - if imp.arr.n_buffers > 0 { - // get a view of the buffers, zero-copy. we're just looking at the pointers - imp.cbuffers = unsafe.Slice((**C.void)(unsafe.Pointer(imp.arr.buffers)), imp.arr.n_buffers) - } - - // handle each of our type cases - switch dt := imp.dt.(type) { - case *arrow.NullType: - if err := imp.checkNoChildren(); err != nil { - return err - } - - imp.data = array.NewData(dt, int(imp.arr.length), nil, nil, int(imp.arr.null_count), int(imp.arr.offset)) - case arrow.FixedWidthDataType: - return imp.importFixedSizePrimitive() - case *arrow.StringType: - return imp.importStringLike(int64(arrow.Int32SizeBytes)) - case *arrow.BinaryType: - return imp.importStringLike(int64(arrow.Int32SizeBytes)) - case *arrow.LargeStringType: - return imp.importStringLike(int64(arrow.Int64SizeBytes)) - case *arrow.LargeBinaryType: - return imp.importStringLike(int64(arrow.Int64SizeBytes)) - case *arrow.StringViewType: - return imp.importBinaryViewLike() - case *arrow.BinaryViewType: - return imp.importBinaryViewLike() - case *arrow.ListType: - return imp.importListLike() - case *arrow.LargeListType: - return imp.importListLike() - case *arrow.ListViewType: - return imp.importListViewLike() - case *arrow.LargeListViewType: - return imp.importListViewLike() - case *arrow.MapType: - return imp.importListLike() - case *arrow.FixedSizeListType: - if err := imp.checkNumChildren(1); err != nil { - return err - } - - if err := imp.checkNumBuffers(1); err != nil { - return err - } - - nulls, err := imp.importNullBitmap(0) - if err != nil { - return err - } - if nulls != nil { - defer nulls.Release() - } - - imp.data = array.NewData(dt, int(imp.arr.length), []*memory.Buffer{nulls}, []arrow.ArrayData{imp.children[0].data}, int(imp.arr.null_count), int(imp.arr.offset)) - case *arrow.StructType: - if err := imp.checkNumBuffers(1); err != nil { - return err - } - - nulls, err := imp.importNullBitmap(0) - if err != nil { - return err - } - if nulls != nil { - defer nulls.Release() - } - - children := make([]arrow.ArrayData, len(imp.children)) - for i := range imp.children { - children[i] = imp.children[i].data - } - - imp.data = array.NewData(dt, int(imp.arr.length), []*memory.Buffer{nulls}, children, int(imp.arr.null_count), int(imp.arr.offset)) - case *arrow.RunEndEncodedType: - if err := imp.checkNumBuffers(0); err != nil { - return err - } - - if len(imp.children) != 2 { - return fmt.Errorf("%w: run-end encoded array should have 2 children", arrow.ErrInvalid) - } - - children := []arrow.ArrayData{imp.children[0].data, imp.children[1].data} - imp.data = array.NewData(dt, int(imp.arr.length), []*memory.Buffer{}, children, int(imp.arr.null_count), int(imp.arr.offset)) - case *arrow.DenseUnionType: - if err := imp.checkNoNulls(); err != nil { - return err - } - - bufs := []*memory.Buffer{nil, nil, nil} - var err error - if imp.arr.n_buffers == 3 { - // legacy format exported by older arrow c++ versions - if bufs[1], err = imp.importFixedSizeBuffer(1, 1); err != nil { - return err - } - defer bufs[1].Release() - if bufs[2], err = imp.importFixedSizeBuffer(2, int64(arrow.Int32SizeBytes)); err != nil { - return err - } - defer bufs[2].Release() - } else { - if err := imp.checkNumBuffers(2); err != nil { - return err - } - - if bufs[1], err = imp.importFixedSizeBuffer(0, 1); err != nil { - return err - } - defer bufs[1].Release() - if bufs[2], err = imp.importFixedSizeBuffer(1, int64(arrow.Int32SizeBytes)); err != nil { - return err - } - defer bufs[2].Release() - } - - children := make([]arrow.ArrayData, len(imp.children)) - for i := range imp.children { - children[i] = imp.children[i].data - } - imp.data = array.NewData(dt, int(imp.arr.length), bufs, children, 0, int(imp.arr.offset)) - case *arrow.SparseUnionType: - if err := imp.checkNoNulls(); err != nil { - return err - } - - var buf *memory.Buffer - var err error - if imp.arr.n_buffers == 2 { - // legacy format exported by older Arrow C++ versions - if buf, err = imp.importFixedSizeBuffer(1, 1); err != nil { - return err - } - defer buf.Release() - } else { - if err := imp.checkNumBuffers(1); err != nil { - return err - } - - if buf, err = imp.importFixedSizeBuffer(0, 1); err != nil { - return err - } - defer buf.Release() - } - - children := make([]arrow.ArrayData, len(imp.children)) - for i := range imp.children { - children[i] = imp.children[i].data - } - imp.data = array.NewData(dt, int(imp.arr.length), []*memory.Buffer{nil, buf}, children, 0, int(imp.arr.offset)) - default: - return fmt.Errorf("unimplemented type %s", dt) - } - - return nil -} - -func (imp *cimporter) importStringLike(offsetByteWidth int64) (err error) { - if err = imp.checkNoChildren(); err != nil { - return - } - - if err = imp.checkNumBuffers(3); err != nil { - return - } - - var ( - nulls, offsets, values *memory.Buffer - ) - if nulls, err = imp.importNullBitmap(0); err != nil { - return - } - if nulls != nil { - defer nulls.Release() - } - - if offsets, err = imp.importOffsetsBuffer(1, offsetByteWidth); err != nil { - return - } - defer offsets.Release() - - var nvals int64 - switch offsetByteWidth { - case 4: - typedOffsets := arrow.Int32Traits.CastFromBytes(offsets.Bytes()) - nvals = int64(typedOffsets[imp.arr.offset+imp.arr.length]) - case 8: - typedOffsets := arrow.Int64Traits.CastFromBytes(offsets.Bytes()) - nvals = typedOffsets[imp.arr.offset+imp.arr.length] - } - if values, err = imp.importVariableValuesBuffer(2, 1, nvals); err != nil { - return - } - defer values.Release() - - imp.data = array.NewData(imp.dt, int(imp.arr.length), []*memory.Buffer{nulls, offsets, values}, nil, int(imp.arr.null_count), int(imp.arr.offset)) - return -} - -func (imp *cimporter) importBinaryViewLike() (err error) { - if err = imp.checkNoChildren(); err != nil { - return - } - - buffers := make([]*memory.Buffer, len(imp.cbuffers)-1) - defer memory.ReleaseBuffers(buffers) - - if buffers[0], err = imp.importNullBitmap(0); err != nil { - return - } - - if buffers[1], err = imp.importFixedSizeBuffer(1, int64(arrow.ViewHeaderSizeBytes)); err != nil { - return - } - - dataBufferSizes := unsafe.Slice((*int64)(unsafe.Pointer(imp.cbuffers[len(buffers)])), len(buffers)-2) - for i, size := range dataBufferSizes { - if buffers[i+2], err = imp.importVariableValuesBuffer(i+2, 1, size); err != nil { - return - } - } - - imp.data = array.NewData(imp.dt, int(imp.arr.length), buffers, nil, int(imp.arr.null_count), int(imp.arr.offset)) - return -} - -func (imp *cimporter) importListLike() (err error) { - if err = imp.checkNumChildren(1); err != nil { - return err - } - - if err = imp.checkNumBuffers(2); err != nil { - return err - } - - var nulls, offsets *memory.Buffer - if nulls, err = imp.importNullBitmap(0); err != nil { - return - } - if nulls != nil { - defer nulls.Release() - } - - offsetSize := imp.dt.Layout().Buffers[1].ByteWidth - if offsets, err = imp.importOffsetsBuffer(1, int64(offsetSize)); err != nil { - return - } - if offsets != nil { - defer offsets.Release() - } - - imp.data = array.NewData(imp.dt, int(imp.arr.length), []*memory.Buffer{nulls, offsets}, []arrow.ArrayData{imp.children[0].data}, int(imp.arr.null_count), int(imp.arr.offset)) - return -} - -func (imp *cimporter) importListViewLike() (err error) { - offsetSize := int64(imp.dt.Layout().Buffers[1].ByteWidth) - - if err = imp.checkNumChildren(1); err != nil { - return err - } - - if err = imp.checkNumBuffers(3); err != nil { - return err - } - - var nulls, offsets, sizes *memory.Buffer - if nulls, err = imp.importNullBitmap(0); err != nil { - return - } - if nulls != nil { - defer nulls.Release() - } - - if offsets, err = imp.importFixedSizeBuffer(1, offsetSize); err != nil { - return - } - if offsets != nil { - defer offsets.Release() - } - - if sizes, err = imp.importFixedSizeBuffer(2, offsetSize); err != nil { - return - } - if sizes != nil { - defer sizes.Release() - } - - imp.data = array.NewData(imp.dt, int(imp.arr.length), []*memory.Buffer{nulls, offsets, sizes}, []arrow.ArrayData{imp.children[0].data}, int(imp.arr.null_count), int(imp.arr.offset)) - return -} - -func (imp *cimporter) importFixedSizePrimitive() error { - if err := imp.checkNoChildren(); err != nil { - return err - } - - if err := imp.checkNumBuffers(2); err != nil { - return err - } - - nulls, err := imp.importNullBitmap(0) - if err != nil { - return err - } - - var values *memory.Buffer - - fw := imp.dt.(arrow.FixedWidthDataType) - if bitutil.IsMultipleOf8(int64(fw.BitWidth())) { - values, err = imp.importFixedSizeBuffer(1, bitutil.BytesForBits(int64(fw.BitWidth()))) - } else { - if fw.BitWidth() != 1 { - return xerrors.New("invalid bitwidth") - } - values, err = imp.importBitsBuffer(1) - } - - if err != nil { - return err - } - - var dict *array.Data - if dt, ok := imp.dt.(*arrow.DictionaryType); ok { - dictImp := &cimporter{dt: dt.ValueType} - if err := dictImp.importChild(imp, imp.arr.dictionary); err != nil { - return err - } - defer dictImp.data.Release() - - dict = dictImp.data.(*array.Data) - } - - if nulls != nil { - defer nulls.Release() - } - if values != nil { - defer values.Release() - } - - imp.data = array.NewDataWithDictionary(imp.dt, int(imp.arr.length), []*memory.Buffer{nulls, values}, int(imp.arr.null_count), int(imp.arr.offset), dict) - return nil -} - -func (imp *cimporter) checkNoChildren() error { return imp.checkNumChildren(0) } - -func (imp *cimporter) checkNoNulls() error { - if imp.arr.null_count != 0 { - return fmt.Errorf("%w: unexpected non-zero null count for imported type %s", arrow.ErrInvalid, imp.dt) - } - return nil -} - -func (imp *cimporter) checkNumChildren(n int64) error { - if int64(imp.arr.n_children) != n { - return fmt.Errorf("expected %d children, for imported type %s, ArrowArray has %d", n, imp.dt, imp.arr.n_children) - } - return nil -} - -func (imp *cimporter) checkNumBuffers(n int64) error { - if int64(imp.arr.n_buffers) != n { - return fmt.Errorf("expected %d buffers for imported type %s, ArrowArray has %d", n, imp.dt, imp.arr.n_buffers) - } - return nil -} - -func (imp *cimporter) importBuffer(bufferID int, sz int64) (*memory.Buffer, error) { - // this is not a copy, we're just having a slice which points at the data - // it's still owned by the C.ArrowArray object and its backing C++ object. - if imp.cbuffers[bufferID] == nil { - if sz != 0 { - return nil, errors.New("invalid buffer") - } - return memory.NewBufferBytes([]byte{}), nil - } - data := unsafe.Slice((*byte)(unsafe.Pointer(imp.cbuffers[bufferID])), sz) - imp.alloc.addBuffer() - return memory.NewBufferWithAllocator(data, imp.alloc), nil -} - -func (imp *cimporter) importBitsBuffer(bufferID int) (*memory.Buffer, error) { - bufsize := bitutil.BytesForBits(int64(imp.arr.length) + int64(imp.arr.offset)) - return imp.importBuffer(bufferID, bufsize) -} - -func (imp *cimporter) importNullBitmap(bufferID int) (*memory.Buffer, error) { - if imp.arr.null_count > 0 && imp.cbuffers[bufferID] == nil { - return nil, fmt.Errorf("arrowarray struct has null bitmap buffer, but non-zero null_count %d", imp.arr.null_count) - } - - if imp.arr.null_count == 0 && imp.cbuffers[bufferID] == nil { - return nil, nil - } - - return imp.importBitsBuffer(bufferID) -} - -func (imp *cimporter) importFixedSizeBuffer(bufferID int, byteWidth int64) (*memory.Buffer, error) { - bufsize := byteWidth * int64(imp.arr.length+imp.arr.offset) - return imp.importBuffer(bufferID, bufsize) -} - -func (imp *cimporter) importOffsetsBuffer(bufferID int, offsetsize int64) (*memory.Buffer, error) { - bufsize := offsetsize * int64((imp.arr.length + imp.arr.offset + 1)) - return imp.importBuffer(bufferID, bufsize) -} - -func (imp *cimporter) importVariableValuesBuffer(bufferID int, byteWidth, nvals int64) (*memory.Buffer, error) { - bufsize := byteWidth * nvals - return imp.importBuffer(bufferID, int64(bufsize)) -} - -func importCArrayAsType(arr *CArrowArray, dt arrow.DataType) (imp *cimporter, err error) { - imp = &cimporter{dt: dt} - err = imp.doImportArr(arr) - return -} - -func initReader(rdr *nativeCRecordBatchReader, stream *CArrowArrayStream) error { - rdr.stream = C.get_stream() - C.ArrowArrayStreamMove(stream, rdr.stream) - rdr.arr = C.get_arr() - runtime.SetFinalizer(rdr, func(r *nativeCRecordBatchReader) { - if r.cur != nil { - r.cur.Release() - } - C.ArrowArrayStreamRelease(r.stream) - C.ArrowArrayRelease(r.arr) - C.free(unsafe.Pointer(r.stream)) - C.free(unsafe.Pointer(r.arr)) - }) - - var sc CArrowSchema - errno := C.stream_get_schema(rdr.stream, &sc) - if errno != 0 { - return rdr.getError(int(errno)) - } - defer C.ArrowSchemaRelease(&sc) - s, err := ImportCArrowSchema((*CArrowSchema)(&sc)) - if err != nil { - return err - } - rdr.schema = s - - return nil -} - -// Record Batch reader that conforms to arrio.Reader for the ArrowArrayStream interface -type nativeCRecordBatchReader struct { - stream *CArrowArrayStream - arr *CArrowArray - schema *arrow.Schema - - cur arrow.Record - err error -} - -// No need to implement retain and release here as we used runtime.SetFinalizer when constructing -// the reader to free up the ArrowArrayStream memory when the garbage collector cleans it up. -func (n *nativeCRecordBatchReader) Retain() {} -func (n *nativeCRecordBatchReader) Release() {} - -func (n *nativeCRecordBatchReader) Err() error { return n.err } -func (n *nativeCRecordBatchReader) Record() arrow.Record { return n.cur } - -func (n *nativeCRecordBatchReader) Next() bool { - err := n.next() - switch { - case err == nil: - return true - case err == io.EOF: - return false - } - n.err = err - return false -} - -func (n *nativeCRecordBatchReader) next() error { - if n.schema == nil { - var sc CArrowSchema - errno := C.stream_get_schema(n.stream, &sc) - if errno != 0 { - return n.getError(int(errno)) - } - defer C.ArrowSchemaRelease(&sc) - s, err := ImportCArrowSchema((*CArrowSchema)(&sc)) - if err != nil { - return err - } - - n.schema = s - } - - if n.cur != nil { - n.cur.Release() - n.cur = nil - } - - errno := C.stream_get_next(n.stream, n.arr) - if errno != 0 { - return n.getError(int(errno)) - } - - if C.ArrowArrayIsReleased(n.arr) == 1 { - return io.EOF - } - - rec, err := ImportCRecordBatchWithSchema(n.arr, n.schema) - if err != nil { - return err - } - - n.cur = rec - return nil -} - -func (n *nativeCRecordBatchReader) Schema() *arrow.Schema { - return n.schema -} - -func (n *nativeCRecordBatchReader) getError(errno int) error { - return fmt.Errorf("%w: %s", syscall.Errno(errno), C.GoString(C.stream_get_last_error(n.stream))) -} - -func (n *nativeCRecordBatchReader) Read() (arrow.Record, error) { - if err := n.next(); err != nil { - n.err = err - return nil, err - } - return n.cur, nil -} - -func releaseArr(arr *CArrowArray) { - C.ArrowArrayRelease(arr) -} - -func releaseSchema(schema *CArrowSchema) { - C.ArrowSchemaRelease(schema) -} diff --git a/go/arrow/cdata/cdata_allocate.go b/go/arrow/cdata/cdata_allocate.go deleted file mode 100644 index da0bd957de1df..0000000000000 --- a/go/arrow/cdata/cdata_allocate.go +++ /dev/null @@ -1,57 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.20 || tinygo - -package cdata - -// #include -// #include "arrow/c/abi.h" -import "C" - -import ( - "unsafe" -) - -func allocateArrowSchemaArr(n int) (out []CArrowSchema) { - return unsafe.Slice((*CArrowSchema)(C.calloc(C.size_t(n), - C.sizeof_struct_ArrowSchema)), n) -} - -func allocateArrowSchemaPtrArr(n int) (out []*CArrowSchema) { - return unsafe.Slice((**CArrowSchema)(C.calloc(C.size_t(n), - C.size_t(unsafe.Sizeof((*CArrowSchema)(nil))))), n) -} - -func allocateArrowArrayArr(n int) (out []CArrowArray) { - return unsafe.Slice((*CArrowArray)(C.calloc(C.size_t(n), - C.sizeof_struct_ArrowArray)), n) -} - -func allocateArrowArrayPtrArr(n int) (out []*CArrowArray) { - return unsafe.Slice((**CArrowArray)(C.calloc(C.size_t(n), - C.size_t(unsafe.Sizeof((*CArrowArray)(nil))))), n) -} - -func allocateBufferPtrArr(n int) (out []*C.void) { - return unsafe.Slice((**C.void)(C.calloc(C.size_t(n), - C.size_t(unsafe.Sizeof((*C.void)(nil))))), n) -} - -func allocateBufferSizeArr(n int) (out []C.int64_t) { - return unsafe.Slice((*C.int64_t)(C.calloc(C.size_t(n), - C.sizeof_int64_t)), n) -} diff --git a/go/arrow/cdata/cdata_exports.go b/go/arrow/cdata/cdata_exports.go deleted file mode 100644 index 59775926d7ef8..0000000000000 --- a/go/arrow/cdata/cdata_exports.go +++ /dev/null @@ -1,480 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package cdata - -// #include -// #include -// #include -// #include "arrow/c/abi.h" -// #include "arrow/c/helpers.h" -// -// extern void releaseExportedSchema(struct ArrowSchema* schema); -// extern void releaseExportedArray(struct ArrowArray* array); -// -// const uint8_t kGoCdataZeroRegion[8] = {0}; -// -// void goReleaseArray(struct ArrowArray* array) { -// releaseExportedArray(array); -// } -// void goReleaseSchema(struct ArrowSchema* schema) { -// releaseExportedSchema(schema); -// } -import "C" - -import ( - "bytes" - "encoding/binary" - "fmt" - "runtime/cgo" - "strconv" - "strings" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/endian" - "github.com/apache/arrow/go/v18/arrow/internal" - "github.com/apache/arrow/go/v18/arrow/ipc" -) - -func encodeCMetadata(keys, values []string) []byte { - if len(keys) != len(values) { - panic("unequal metadata key/values length") - } - npairs := int32(len(keys)) - - var b bytes.Buffer - totalSize := 4 - for i := range keys { - totalSize += 8 + len(keys[i]) + len(values[i]) - } - b.Grow(totalSize) - - b.Write((*[4]byte)(unsafe.Pointer(&npairs))[:]) - for i := range keys { - binary.Write(&b, endian.Native, int32(len(keys[i]))) - b.WriteString(keys[i]) - binary.Write(&b, endian.Native, int32(len(values[i]))) - b.WriteString(values[i]) - } - return b.Bytes() -} - -type schemaExporter struct { - format, name string - - extraMeta arrow.Metadata - metadata []byte - flags int64 - children []schemaExporter - dict *schemaExporter -} - -func (exp *schemaExporter) handleExtension(dt arrow.DataType) arrow.DataType { - if dt.ID() != arrow.EXTENSION { - return dt - } - - ext := dt.(arrow.ExtensionType) - exp.extraMeta = arrow.NewMetadata([]string{ipc.ExtensionTypeKeyName, ipc.ExtensionMetadataKeyName}, []string{ext.ExtensionName(), ext.Serialize()}) - return ext.StorageType() -} - -func (exp *schemaExporter) exportMeta(m *arrow.Metadata) { - var ( - finalKeys []string - finalValues []string - ) - - if m == nil { - if exp.extraMeta.Len() > 0 { - finalKeys = exp.extraMeta.Keys() - finalValues = exp.extraMeta.Values() - } - exp.metadata = encodeCMetadata(finalKeys, finalValues) - return - } - - finalKeys = m.Keys() - finalValues = m.Values() - - if exp.extraMeta.Len() > 0 { - for i, k := range exp.extraMeta.Keys() { - if m.FindKey(k) != -1 { - continue - } - finalKeys = append(finalKeys, k) - finalValues = append(finalValues, exp.extraMeta.Values()[i]) - } - } - exp.metadata = encodeCMetadata(finalKeys, finalValues) -} - -func (exp *schemaExporter) exportFormat(dt arrow.DataType) string { - switch dt := dt.(type) { - case *arrow.NullType: - return "n" - case *arrow.BooleanType: - return "b" - case *arrow.Int8Type: - return "c" - case *arrow.Uint8Type: - return "C" - case *arrow.Int16Type: - return "s" - case *arrow.Uint16Type: - return "S" - case *arrow.Int32Type: - return "i" - case *arrow.Uint32Type: - return "I" - case *arrow.Int64Type: - return "l" - case *arrow.Uint64Type: - return "L" - case *arrow.Float16Type: - return "e" - case *arrow.Float32Type: - return "f" - case *arrow.Float64Type: - return "g" - case *arrow.FixedSizeBinaryType: - return fmt.Sprintf("w:%d", dt.ByteWidth) - case *arrow.Decimal128Type: - return fmt.Sprintf("d:%d,%d", dt.Precision, dt.Scale) - case *arrow.Decimal256Type: - return fmt.Sprintf("d:%d,%d,256", dt.Precision, dt.Scale) - case *arrow.BinaryType: - return "z" - case *arrow.LargeBinaryType: - return "Z" - case *arrow.StringType: - return "u" - case *arrow.LargeStringType: - return "U" - case *arrow.BinaryViewType: - return "vz" - case *arrow.StringViewType: - return "vu" - case *arrow.Date32Type: - return "tdD" - case *arrow.Date64Type: - return "tdm" - case *arrow.Time32Type: - switch dt.Unit { - case arrow.Second: - return "tts" - case arrow.Millisecond: - return "ttm" - default: - panic(fmt.Sprintf("invalid time unit for time32: %s", dt.Unit)) - } - case *arrow.Time64Type: - switch dt.Unit { - case arrow.Microsecond: - return "ttu" - case arrow.Nanosecond: - return "ttn" - default: - panic(fmt.Sprintf("invalid time unit for time64: %s", dt.Unit)) - } - case *arrow.TimestampType: - var b strings.Builder - switch dt.Unit { - case arrow.Second: - b.WriteString("tss:") - case arrow.Millisecond: - b.WriteString("tsm:") - case arrow.Microsecond: - b.WriteString("tsu:") - case arrow.Nanosecond: - b.WriteString("tsn:") - default: - panic(fmt.Sprintf("invalid time unit for timestamp: %s", dt.Unit)) - } - b.WriteString(dt.TimeZone) - return b.String() - case *arrow.DurationType: - switch dt.Unit { - case arrow.Second: - return "tDs" - case arrow.Millisecond: - return "tDm" - case arrow.Microsecond: - return "tDu" - case arrow.Nanosecond: - return "tDn" - default: - panic(fmt.Sprintf("invalid time unit for duration: %s", dt.Unit)) - } - case *arrow.MonthIntervalType: - return "tiM" - case *arrow.DayTimeIntervalType: - return "tiD" - case *arrow.MonthDayNanoIntervalType: - return "tin" - case *arrow.ListType: - return "+l" - case *arrow.LargeListType: - return "+L" - case *arrow.ListViewType: - return "+vl" - case *arrow.LargeListViewType: - return "+vL" - case *arrow.FixedSizeListType: - return fmt.Sprintf("+w:%d", dt.Len()) - case *arrow.StructType: - return "+s" - case *arrow.RunEndEncodedType: - return "+r" - case *arrow.MapType: - if dt.KeysSorted { - exp.flags |= C.ARROW_FLAG_MAP_KEYS_SORTED - } - return "+m" - case *arrow.DictionaryType: - if dt.Ordered { - exp.flags |= C.ARROW_FLAG_DICTIONARY_ORDERED - } - return exp.exportFormat(dt.IndexType) - case arrow.UnionType: - var b strings.Builder - if dt.Mode() == arrow.SparseMode { - b.WriteString("+us:") - } else { - b.WriteString("+ud:") - } - for i, c := range dt.TypeCodes() { - if i != 0 { - b.WriteByte(',') - } - b.WriteString(strconv.Itoa(int(c))) - } - return b.String() - } - panic("unsupported data type for export") -} - -func (exp *schemaExporter) export(field arrow.Field) { - exp.name = field.Name - exp.format = exp.exportFormat(exp.handleExtension(field.Type)) - if field.Nullable { - exp.flags |= C.ARROW_FLAG_NULLABLE - } - - switch dt := field.Type.(type) { - case *arrow.DictionaryType: - exp.dict = new(schemaExporter) - exp.dict.export(arrow.Field{Type: dt.ValueType}) - case arrow.NestedType: - exp.children = make([]schemaExporter, dt.NumFields()) - for i, f := range dt.Fields() { - exp.children[i].export(f) - } - } - - exp.exportMeta(&field.Metadata) -} - -func (exp *schemaExporter) finish(out *CArrowSchema) { - out.dictionary = nil - if exp.dict != nil { - out.dictionary = (*CArrowSchema)(C.calloc(C.sizeof_struct_ArrowSchema, C.size_t(1))) - exp.dict.finish(out.dictionary) - } - out.name = C.CString(exp.name) - out.format = C.CString(exp.format) - out.metadata = (*C.char)(C.CBytes(exp.metadata)) - out.flags = C.int64_t(exp.flags) - out.n_children = C.int64_t(len(exp.children)) - - if len(exp.children) > 0 { - children := allocateArrowSchemaArr(len(exp.children)) - childPtrs := allocateArrowSchemaPtrArr(len(exp.children)) - - for i, c := range exp.children { - c.finish(&children[i]) - childPtrs[i] = &children[i] - } - - out.children = (**CArrowSchema)(unsafe.Pointer(&childPtrs[0])) - } else { - out.children = nil - } - - out.release = (*[0]byte)(C.goReleaseSchema) -} - -func exportField(field arrow.Field, out *CArrowSchema) { - var exp schemaExporter - exp.export(field) - exp.finish(out) -} - -func exportArray(arr arrow.Array, out *CArrowArray, outSchema *CArrowSchema) { - if outSchema != nil { - exportField(arrow.Field{Type: arr.DataType()}, outSchema) - } - - buffers := arr.Data().Buffers() - // Some types don't have validity bitmaps, but we keep them shifted - // to make processing easier in other contexts. This means that - // we have to adjust when exporting. - has_validity_bitmap := internal.DefaultHasValidityBitmap(arr.DataType().ID()) - if len(buffers) > 0 && !has_validity_bitmap { - buffers = buffers[1:] - } - nbuffers := len(buffers) - - has_buffer_sizes_buffer := internal.HasBufferSizesBuffer(arr.DataType().ID()) - if has_buffer_sizes_buffer { - nbuffers++ - } - - out.dictionary = nil - out.null_count = C.int64_t(arr.NullN()) - out.length = C.int64_t(arr.Len()) - out.offset = C.int64_t(arr.Data().Offset()) - out.n_buffers = C.int64_t(nbuffers) - out.buffers = nil - - if nbuffers > 0 { - cBufs := allocateBufferPtrArr(nbuffers) - for i, buf := range buffers { - if buf == nil || buf.Len() == 0 { - if i > 0 || !has_validity_bitmap { - // apache/arrow#33936: export a dummy buffer to be friendly to - // implementations that don't import NULL properly - cBufs[i] = (*C.void)(unsafe.Pointer(&C.kGoCdataZeroRegion)) - } else { - // null pointer permitted for the validity bitmap - // (assuming null count is 0) - cBufs[i] = nil - } - continue - } - - cBufs[i] = (*C.void)(unsafe.Pointer(&buf.Bytes()[0])) - } - - if has_buffer_sizes_buffer { - sizes := allocateBufferSizeArr(len(buffers[2:])) - for i, buf := range buffers[2:] { - sizes[i] = C.int64_t(buf.Len()) - } - if len(sizes) > 0 { - cBufs[nbuffers-1] = (*C.void)(unsafe.Pointer(&sizes[0])) - } - } - out.buffers = (*unsafe.Pointer)(unsafe.Pointer(&cBufs[0])) - } - - arr.Data().Retain() - h := cgo.NewHandle(arr.Data()) - out.private_data = createHandle(h) - out.release = (*[0]byte)(C.goReleaseArray) - switch arr := arr.(type) { - case array.ListLike: - out.n_children = 1 - childPtrs := allocateArrowArrayPtrArr(1) - children := allocateArrowArrayArr(1) - exportArray(arr.ListValues(), &children[0], nil) - childPtrs[0] = &children[0] - out.children = (**CArrowArray)(unsafe.Pointer(&childPtrs[0])) - case *array.Struct: - out.n_children = C.int64_t(arr.NumField()) - childPtrs := allocateArrowArrayPtrArr(arr.NumField()) - children := allocateArrowArrayArr(arr.NumField()) - for i := 0; i < arr.NumField(); i++ { - exportArray(arr.Field(i), &children[i], nil) - childPtrs[i] = &children[i] - } - out.children = (**CArrowArray)(unsafe.Pointer(&childPtrs[0])) - case *array.RunEndEncoded: - out.n_children = 2 - childPtrs := allocateArrowArrayPtrArr(2) - children := allocateArrowArrayArr(2) - exportArray(arr.RunEndsArr(), &children[0], nil) - exportArray(arr.Values(), &children[1], nil) - childPtrs[0], childPtrs[1] = &children[0], &children[1] - out.children = (**CArrowArray)(unsafe.Pointer(&childPtrs[0])) - case *array.Dictionary: - out.dictionary = (*CArrowArray)(C.calloc(C.sizeof_struct_ArrowArray, C.size_t(1))) - exportArray(arr.Dictionary(), out.dictionary, nil) - case array.Union: - out.n_children = C.int64_t(arr.NumFields()) - childPtrs := allocateArrowArrayPtrArr(arr.NumFields()) - children := allocateArrowArrayArr(arr.NumFields()) - for i := 0; i < arr.NumFields(); i++ { - exportArray(arr.Field(i), &children[i], nil) - childPtrs[i] = &children[i] - } - out.children = (**CArrowArray)(unsafe.Pointer(&childPtrs[0])) - default: - out.n_children = 0 - out.children = nil - } -} - -type cRecordReader struct { - rdr array.RecordReader - err *C.char -} - -func (rr cRecordReader) getSchema(out *CArrowSchema) int { - schema := rr.rdr.Schema() - if schema == nil { - return rr.maybeError() - } - ExportArrowSchema(schema, out) - return 0 -} - -func (rr cRecordReader) next(out *CArrowArray) int { - if rr.rdr.Next() { - ExportArrowRecordBatch(rr.rdr.Record(), out, nil) - return 0 - } - C.ArrowArrayMarkReleased(out) - return rr.maybeError() -} - -func (rr cRecordReader) maybeError() int { - err := rr.rdr.Err() - if err != nil { - return C.EIO - } - return 0 -} - -func (rr cRecordReader) getLastError() *C.char { - err := rr.rdr.Err() - if err != nil { - if rr.err != nil { - C.free(unsafe.Pointer(rr.err)) - } - rr.err = C.CString(err.Error()) - } - return rr.err -} - -func (rr cRecordReader) release() { - if rr.err != nil { - C.free(unsafe.Pointer(rr.err)) - } - rr.rdr.Release() -} diff --git a/go/arrow/cdata/cdata_fulltest.c b/go/arrow/cdata/cdata_fulltest.c deleted file mode 100644 index 4291cfff865b5..0000000000000 --- a/go/arrow/cdata/cdata_fulltest.c +++ /dev/null @@ -1,494 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +build cgo -// +build test - -#include -#include -#include -#include -#include -#include -#include "arrow/c/abi.h" -#include "arrow/c/helpers.h" -#include "utils.h" - -int is_little_endian() -{ - unsigned int x = 1; - char *c = (char*) &x; - return (int)*c; -} - -static const int64_t kDefaultFlags = ARROW_FLAG_NULLABLE; - -extern void releaseTestArr(struct ArrowArray* array); -void goReleaseTestArray(struct ArrowArray* array) { - releaseTestArr(array); -} - -static void release_int32_type(struct ArrowSchema* schema) { - // mark released - schema->release = NULL; -} - -void export_int32_type(struct ArrowSchema* schema) { - const char* encoded_metadata; - if (is_little_endian() == 1) { - encoded_metadata = kEncodedMeta1LE; - } else { - encoded_metadata = kEncodedMeta1BE; - } - *schema = (struct ArrowSchema) { - // Type description - .format = "i", - .name = "", - .metadata = encoded_metadata, - .flags = 0, - .n_children = 0, - .children = NULL, - .dictionary = NULL, - // bookkeeping - .release = &release_int32_type, - }; -} - -static bool test1_released = false; - -int test1_is_released() { return test1_released; } - -static void release_int32_array(struct ArrowArray* array) { - assert(array->n_buffers == 2); - // free the buffers and buffers array - free((void *) array->buffers[1]); - free(array->buffers); - // mark released - array->release = NULL; - test1_released = true; -} - -void export_int32_array(const int32_t* data, int64_t nitems, struct ArrowArray* array) { - // initialize primitive fields - *array = (struct ArrowArray) { - .length = nitems, - .offset = 0, - .null_count = 0, - .n_buffers = 2, - .n_children = 0, - .children = NULL, - .dictionary = NULL, - // bookkeeping - .release = &release_int32_array - }; - - // allocate list of buffers - array->buffers = (const void**)malloc(sizeof(void*) * array->n_buffers); - assert(array->buffers != NULL); - array->buffers[0] = NULL; // no nulls, null bitmap can be omitted - array->buffers[1] = data; -} - - -static void release_primitive(struct ArrowSchema* schema) { - free((void *)schema->format); - schema->release = NULL; -} - -static void release_nested_internal(struct ArrowSchema* schema, - int is_dynamic) { - assert(!ArrowSchemaIsReleased(schema)); - for (int i = 0; i < schema->n_children; ++i) { - ArrowSchemaRelease(schema->children[i]); - free(schema->children[i]); - } - if (is_dynamic) { - free((void*)schema->format); - free((void*)schema->name); - } - ArrowSchemaMarkReleased(schema); -} - -static void release_nested_static(struct ArrowSchema* schema) { - release_nested_internal(schema, /*is_dynamic=*/0); -} - -static void release_nested_dynamic(struct ArrowSchema* schema) { - release_nested_internal(schema, /*is_dynamic=*/1); -} - -static void release_nested_dynamic_toplevel(struct ArrowSchema* schema) { - assert(!ArrowSchemaIsReleased(schema)); - for (int i = 0; i < schema->n_children; ++i) { - ArrowSchemaRelease(schema->children[i]); - free(schema->children[i]); - } - free((void*)schema->format); - if (strlen(schema->name) > 0) { - free((void*)schema->name); - } - ArrowSchemaMarkReleased(schema); -} - -void test_primitive(struct ArrowSchema* schema, const char* fmt) { - *schema = (struct ArrowSchema) { - // Type description - .format = fmt, - .name = "", - .metadata = NULL, - .flags = 0, - .n_children = 0, - .children = NULL, - .dictionary = NULL, - // bookkeeping - .release = &release_primitive, - }; -} - -// Since test_lists et al. allocate an entirely array of ArrowSchema pointers, -// need to expose a function to free it. -void free_malloced_schemas(struct ArrowSchema** schemas) { - free(schemas); -} - -struct ArrowSchema** test_lists(const char** fmts, const char** names, const int* nullflags, const int n) { - struct ArrowSchema** schemas = malloc(sizeof(struct ArrowSchema*)*n); - for (int i = 0; i < n; ++i) { - schemas[i] = malloc(sizeof(struct ArrowSchema)); - *schemas[i] = (struct ArrowSchema) { - .format = fmts[i], - .name = names[i], - .metadata = NULL, - .flags = 0, - .children = NULL, - .n_children = 0, - .dictionary = NULL, - .release = &release_nested_dynamic, - }; - if (i != 0) { - schemas[i-1]->n_children = 1; - schemas[i-1]->children = &schemas[i]; - schemas[i]->flags = nullflags[i-1]; - } - } - return schemas; -} - -struct ArrowSchema** fill_structs(const char** fmts, const char** names, int64_t* flags, const int n) { - struct ArrowSchema** schemas = malloc(sizeof(struct ArrowSchema*)*n); - for (int i = 0; i < n; ++i) { - schemas[i] = malloc(sizeof(struct ArrowSchema)); - *schemas[i] = (struct ArrowSchema) { - .format = fmts[i], - .name = names[i], - .metadata = NULL, - .flags = flags[i], - .children = NULL, - .n_children = 0, - .dictionary = NULL, - .release = &release_nested_dynamic, - }; - } - - schemas[0]->children = &schemas[1]; - schemas[0]->n_children = n-1; - return schemas; -} - -struct ArrowSchema** test_struct(const char** fmts, const char** names, int64_t* flags, const int n) { - struct ArrowSchema** schemas = fill_structs(fmts, names, flags, n); - - if (is_little_endian() == 1) { - schemas[n-1]->metadata = kEncodedMeta2LE; - } else { - schemas[n-1]->metadata = kEncodedMeta2BE; - } - - return schemas; -} - -struct ArrowSchema** test_schema(const char** fmts, const char** names, int64_t* flags, const int n) { - struct ArrowSchema** schemas = fill_structs(fmts, names, flags, n); - - if (is_little_endian() == 1) { - schemas[0]->metadata = kEncodedMeta2LE; - schemas[n-1]->metadata = kEncodedMeta1LE; - } else { - schemas[0]->metadata = kEncodedMeta2BE; - schemas[n-1]->metadata = kEncodedMeta1BE; - } - return schemas; -} - -struct ArrowSchema** test_map(const char** fmts, const char** names, int64_t* flags, const int n) { - struct ArrowSchema** schemas = malloc(sizeof(struct ArrowSchema*)*n); - for (int i = 0; i < n; ++i) { - schemas[i] = malloc(sizeof(struct ArrowSchema)); - *schemas[i] = (struct ArrowSchema) { - .format = fmts[i], - .name = names[i], - .metadata = NULL, - .flags = flags[i], - .children = NULL, - .n_children = 0, - .dictionary = NULL, - .release = &release_nested_dynamic, - }; - } - - schemas[0]->n_children = 1; - schemas[0]->children = &schemas[1]; - schemas[1]->n_children = n-2; - schemas[1]->children = &schemas[2]; - - return schemas; -} - -struct ArrowSchema** test_union(const char** fmts, const char** names, int64_t* flags, const int n) { - struct ArrowSchema** schemas = malloc(sizeof(struct ArrowSchema*)*n); - for (int i = 0; i < n; ++i) { - schemas[i] = malloc(sizeof(struct ArrowSchema)); - *schemas[i] = (struct ArrowSchema) { - .format = fmts[i], - .name = names[i], - .metadata = NULL, - .flags = flags[i], - .children = NULL, - .n_children = 0, - .dictionary = NULL, - .release = &release_nested_dynamic, - }; - } - - schemas[0]->n_children = n-1; - schemas[0]->children = &schemas[1]; - return schemas; -} - -struct streamcounter { - int n; - int max; -}; - -static int stream_schema(struct ArrowArrayStream* st, struct ArrowSchema* out) { - out->children = malloc(sizeof(struct ArrowSchema*)*2); - out->n_children = 2; - - out->children[0] = malloc(sizeof(struct ArrowSchema)); - *out->children[0] = (struct ArrowSchema) { - .format = "i", - .name = "a", - .metadata = NULL, - .flags = ARROW_FLAG_NULLABLE, - .children = NULL, - .n_children = 0, - .dictionary = NULL, - .release = &release_nested_static, - }; - - out->children[1] = malloc(sizeof(struct ArrowSchema)); - *out->children[1] = (struct ArrowSchema) { - .format = "u", - .name = "b", - .metadata = NULL, - .flags = ARROW_FLAG_NULLABLE, - .children = NULL, - .n_children = 0, - .dictionary = NULL, - .release = &release_nested_static, - }; - - out->format = "+s"; - out->release = &release_nested_static; - - return 0; -} - -static void release_stream(struct ArrowArrayStream* st) { - free(st->private_data); - ArrowArrayStreamMarkReleased(st); -} - -static void release_the_array(struct ArrowArray* out) { - for (int i = 0; i < out->n_children; ++i) { - ArrowArrayRelease(out->children[i]); - } - free((void*)out->children); - free(out->buffers); - out->release = NULL; -} - -void export_int32_array(const int32_t*, int64_t, struct ArrowArray*); - -static void release_str_array(struct ArrowArray* array) { - assert(array->n_buffers == 3); - free((void*) array->buffers[1]); - free((void*) array->buffers[2]); - free(array->buffers); - array->release = NULL; -} - -void export_str_array(const char* data, const int32_t* offsets, int64_t nitems, struct ArrowArray* out) { - *out = (struct ArrowArray) { - .length = nitems, - .offset = 0, - .null_count = 0, - .n_buffers = 3, - .n_children = 0, - .children = NULL, - .dictionary = NULL, - // bookkeeping - .release = &release_str_array - }; - - out->buffers = (const void**)malloc(sizeof(void*) * out->n_buffers); - assert(out->buffers != NULL); - out->buffers[0] = NULL; - out->buffers[1] = offsets; - out->buffers[2] = data; -} - -static int next_record(struct ArrowArrayStream* st, struct ArrowArray* out) { - struct streamcounter* cnter = (struct streamcounter*)(st->private_data); - if (cnter->n == cnter->max) { - ArrowArrayMarkReleased(out); - return 0; - } - - cnter->n++; - - *out = (struct ArrowArray) { - .offset = 0, - .dictionary = NULL, - .length = 3, - .null_count = 0, - .buffers = (const void**)malloc(sizeof(void*)), - .n_children = 2, - .n_buffers = 1, - .release = &release_the_array - }; - - out->buffers[0] = NULL; - out->children = (struct ArrowArray**)malloc(sizeof(struct ArrowArray*)*2); - int32_t* intdata = malloc(sizeof(int32_t)*3); - for (int i = 0; i < 3; ++i) { - intdata[i] = cnter->n * (i+1); - } - - out->children[0] = malloc(sizeof(struct ArrowArray)); - export_int32_array(intdata, 3, out->children[0]); - out->children[1] = malloc(sizeof(struct ArrowArray)); - char* strdata = strdup("foobarbaz"); - int32_t* offsets = malloc(sizeof(int32_t)*4); - offsets[0] = 0; - offsets[1] = 3; - offsets[2] = 6; - offsets[3] = 9; - export_str_array(strdata, offsets, 3, out->children[1]); - - return 0; -} - -void setup_array_stream_test(const int n_batches, struct ArrowArrayStream* out) { - struct streamcounter* cnt = malloc(sizeof(struct streamcounter)); - cnt->max = n_batches; - cnt->n = 0; - - out->get_next = &next_record; - out->get_schema = &stream_schema; - out->release = &release_stream; - out->private_data = cnt; -} - -int test_exported_stream(struct ArrowArrayStream* stream) { - while (1) { - struct ArrowArray array; - memset(&array, 0, sizeof(array)); - // Garbage - implementation should not try to call it, though! - array.release = (void*)0xDEADBEEF; - int rc = stream->get_next(stream, &array); - if (rc != 0) return rc; - - if (array.release == NULL) { - stream->release(stream); - break; - } - } - return 0; -} - -struct FallibleStream { - // empty structs are a GNU extension - int dummy; -}; - -const char* FallibleGetLastError(struct ArrowArrayStream* stream) { - return "Expected error message"; -} - -int FallibleGetSchema(struct ArrowArrayStream* stream, struct ArrowSchema* schema) { - return EINVAL; -} - -int FallibleGetNext(struct ArrowArrayStream* stream, struct ArrowArray* array) { - return EINVAL; -} - -void FallibleRelease(struct ArrowArrayStream* stream) { - memset(stream, 0, sizeof(*stream)); -} - -static struct FallibleStream kFallibleStream; - -void test_stream_schema_fallible(struct ArrowArrayStream* stream) { - stream->get_last_error = FallibleGetLastError; - stream->get_schema = FallibleGetSchema; - stream->get_next = FallibleGetNext; - stream->private_data = &kFallibleStream; - stream->release = FallibleRelease; -} - -int confuse_go_gc(struct ArrowArrayStream* stream, unsigned int seed) { - struct ArrowSchema schema; - // Try to confuse the Go GC by putting what looks like a Go pointer here. -#ifdef _WIN32 - // Thread-safe on Windows with the multithread CRT -#define DORAND rand() -#else -#define DORAND rand_r(&seed) -#endif - schema.name = (char*)(0xc000000000L + (DORAND % 0x2000)); - schema.format = (char*)(0xc000000000L + (DORAND % 0x2000)); - int rc = stream->get_schema(stream, &schema); - if (rc != 0) return rc; - schema.release(&schema); - - while (1) { - struct ArrowArray array; - array.release = (void*)(0xc000000000L + (DORAND % 0x2000)); - array.private_data = (void*)(0xc000000000L + (DORAND % 0x2000)); - int rc = stream->get_next(stream, &array); - if (rc != 0) return rc; - - if (array.release == NULL) { - stream->release(stream); - break; - } - array.release(&array); - } - return 0; -#undef DORAND -} diff --git a/go/arrow/cdata/cdata_test.go b/go/arrow/cdata/cdata_test.go deleted file mode 100644 index 3563aeb5f0f1e..0000000000000 --- a/go/arrow/cdata/cdata_test.go +++ /dev/null @@ -1,1027 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build cgo && test -// +build cgo,test - -// use test tag so that we only run these tests when the "test" tag is present -// so that the .c and other framework infrastructure is only compiled in during -// testing, and the .c files and symbols are not present in release builds. - -package cdata - -import ( - "encoding/json" - "errors" - "fmt" - "io" - "runtime" - "runtime/cgo" - "sync" - "testing" - "time" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/internal/arrdata" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/arrow/memory/mallocator" - "github.com/stretchr/testify/assert" -) - -func TestSchemaExport(t *testing.T) { - sc := exportInt32TypeSchema() - f, err := importSchema(&sc) - assert.NoError(t, err) - - keys, _ := getMetadataKeys() - vals, _ := getMetadataValues() - - assert.Equal(t, arrow.PrimitiveTypes.Int32, f.Type) - assert.Equal(t, keys, f.Metadata.Keys()) - assert.Equal(t, vals, f.Metadata.Values()) - - // schema was released when importing - assert.True(t, schemaIsReleased(&sc)) -} - -func TestSimpleArrayExport(t *testing.T) { - assert.False(t, test1IsReleased()) - - testarr := exportInt32Array() - arr, err := ImportCArrayWithType(testarr, arrow.PrimitiveTypes.Int32) - assert.NoError(t, err) - - assert.False(t, test1IsReleased()) - assert.True(t, isReleased(testarr)) - - arr.Release() - runtime.GC() - assert.Eventually(t, test1IsReleased, 1*time.Second, 10*time.Millisecond) -} - -func TestSimpleArrayAndSchema(t *testing.T) { - sc := exportInt32TypeSchema() - testarr := exportInt32Array() - - // grab address of the buffer we stuck into the ArrowArray object - buflist := (*[2]unsafe.Pointer)(unsafe.Pointer(testarr.buffers)) - origvals := (*[10]int32)(unsafe.Pointer(buflist[1])) - - fld, arr, err := ImportCArray(testarr, &sc) - assert.NoError(t, err) - assert.Equal(t, arrow.PrimitiveTypes.Int32, fld.Type) - assert.EqualValues(t, 10, arr.Len()) - - // verify that the address is the same of the first integer for the - // slice that is being used by the arrow.Array and the original buffer - vals := arr.(*array.Int32).Int32Values() - assert.Same(t, &vals[0], &origvals[0]) - - // and that the values are correct - for i, v := range vals { - assert.Equal(t, int32(i+1), v) - } -} - -func TestPrimitiveSchemas(t *testing.T) { - tests := []struct { - typ arrow.DataType - fmt string - }{ - {arrow.PrimitiveTypes.Int8, "c"}, - {arrow.PrimitiveTypes.Int16, "s"}, - {arrow.PrimitiveTypes.Int32, "i"}, - {arrow.PrimitiveTypes.Int64, "l"}, - {arrow.PrimitiveTypes.Uint8, "C"}, - {arrow.PrimitiveTypes.Uint16, "S"}, - {arrow.PrimitiveTypes.Uint32, "I"}, - {arrow.PrimitiveTypes.Uint64, "L"}, - {arrow.FixedWidthTypes.Boolean, "b"}, - {arrow.Null, "n"}, - {arrow.FixedWidthTypes.Float16, "e"}, - {arrow.PrimitiveTypes.Float32, "f"}, - {arrow.PrimitiveTypes.Float64, "g"}, - {&arrow.FixedSizeBinaryType{ByteWidth: 3}, "w:3"}, - {arrow.BinaryTypes.Binary, "z"}, - {arrow.BinaryTypes.LargeBinary, "Z"}, - {arrow.BinaryTypes.String, "u"}, - {arrow.BinaryTypes.LargeString, "U"}, - {&arrow.Decimal128Type{Precision: 16, Scale: 4}, "d:16,4"}, - {&arrow.Decimal128Type{Precision: 15, Scale: 0}, "d:15,0"}, - {&arrow.Decimal128Type{Precision: 15, Scale: -4}, "d:15,-4"}, - {&arrow.Decimal256Type{Precision: 15, Scale: -4}, "d:15,-4,256"}, - } - - for _, tt := range tests { - t.Run(tt.typ.Name(), func(t *testing.T) { - sc := testPrimitive(tt.fmt) - - f, err := ImportCArrowField(&sc) - assert.NoError(t, err) - - assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) - - assert.True(t, schemaIsReleased(&sc)) - }) - } -} - -func TestDecimalSchemaErrors(t *testing.T) { - tests := []struct { - fmt string - errorMessage string - }{ - {"d:", "invalid decimal spec 'd:': wrong number of properties"}, - {"d:1", "invalid decimal spec 'd:1': wrong number of properties"}, - {"d:1,2,3,4", "invalid decimal spec 'd:1,2,3,4': wrong number of properties"}, - {"d:a,2,3", "could not parse decimal precision in 'd:a,2,3':"}, - {"d:1,a,3", "could not parse decimal scale in 'd:1,a,3':"}, - {"d:1,2,a", "could not parse decimal bitwidth in 'd:1,2,a':"}, - {"d:1,2,384", "only decimal128 and decimal256 are supported, got 'd:1,2,384'"}, - } - - for _, tt := range tests { - t.Run(tt.fmt, func(t *testing.T) { - sc := testPrimitive(tt.fmt) - - _, err := ImportCArrowField(&sc) - assert.Error(t, err) - assert.Contains(t, err.Error(), tt.errorMessage) - }) - } -} - -func TestImportTemporalSchema(t *testing.T) { - tests := []struct { - typ arrow.DataType - fmt string - }{ - {arrow.FixedWidthTypes.Date32, "tdD"}, - {arrow.FixedWidthTypes.Date64, "tdm"}, - {arrow.FixedWidthTypes.Time32s, "tts"}, - {arrow.FixedWidthTypes.Time32ms, "ttm"}, - {arrow.FixedWidthTypes.Time64us, "ttu"}, - {arrow.FixedWidthTypes.Time64ns, "ttn"}, - {arrow.FixedWidthTypes.Duration_s, "tDs"}, - {arrow.FixedWidthTypes.Duration_ms, "tDm"}, - {arrow.FixedWidthTypes.Duration_us, "tDu"}, - {arrow.FixedWidthTypes.Duration_ns, "tDn"}, - {arrow.FixedWidthTypes.MonthInterval, "tiM"}, - {arrow.FixedWidthTypes.DayTimeInterval, "tiD"}, - {arrow.FixedWidthTypes.MonthDayNanoInterval, "tin"}, - {arrow.FixedWidthTypes.Timestamp_s, "tss:UTC"}, - {&arrow.TimestampType{Unit: arrow.Second}, "tss:"}, - {&arrow.TimestampType{Unit: arrow.Second, TimeZone: "Europe/Paris"}, "tss:Europe/Paris"}, - {arrow.FixedWidthTypes.Timestamp_ms, "tsm:UTC"}, - {&arrow.TimestampType{Unit: arrow.Millisecond}, "tsm:"}, - {&arrow.TimestampType{Unit: arrow.Millisecond, TimeZone: "Europe/Paris"}, "tsm:Europe/Paris"}, - {arrow.FixedWidthTypes.Timestamp_us, "tsu:UTC"}, - {&arrow.TimestampType{Unit: arrow.Microsecond}, "tsu:"}, - {&arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: "Europe/Paris"}, "tsu:Europe/Paris"}, - {arrow.FixedWidthTypes.Timestamp_ns, "tsn:UTC"}, - {&arrow.TimestampType{Unit: arrow.Nanosecond}, "tsn:"}, - {&arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "Europe/Paris"}, "tsn:Europe/Paris"}, - } - - for _, tt := range tests { - t.Run(tt.typ.Name(), func(t *testing.T) { - sc := testPrimitive(tt.fmt) - - f, err := ImportCArrowField(&sc) - assert.NoError(t, err) - - assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) - - assert.True(t, schemaIsReleased(&sc)) - }) - } -} - -func TestListSchemas(t *testing.T) { - tests := []struct { - typ arrow.DataType - fmts []string - names []string - isnull []bool - }{ - {arrow.ListOf(arrow.PrimitiveTypes.Int8), []string{"+l", "c"}, []string{"", "item"}, []bool{true}}, - {arrow.FixedSizeListOfNonNullable(2, arrow.PrimitiveTypes.Int64), []string{"+w:2", "l"}, []string{"", "item"}, []bool{false}}, - {arrow.ListOfNonNullable(arrow.ListOf(arrow.PrimitiveTypes.Int32)), []string{"+l", "+l", "i"}, []string{"", "item", "item"}, []bool{false, true}}, - } - - for _, tt := range tests { - t.Run(tt.typ.Name(), func(t *testing.T) { - sc := testNested(tt.fmts, tt.names, tt.isnull) - defer freeMallocedSchemas(sc) - - top := (*[1]*CArrowSchema)(unsafe.Pointer(sc))[0] - f, err := ImportCArrowField(top) - assert.NoError(t, err) - - assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) - - assert.True(t, schemaIsReleased(top)) - }) - } -} - -func TestStructSchemas(t *testing.T) { - tests := []struct { - typ arrow.DataType - fmts []string - names []string - flags []int64 - }{ - {arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - arrow.Field{Name: "b", Type: arrow.BinaryTypes.String, Nullable: true, Metadata: metadata2}, - ), []string{"+s", "c", "u"}, []string{"", "a", "b"}, []int64{flagIsNullable, flagIsNullable, flagIsNullable}}, - } - - for _, tt := range tests { - t.Run(tt.typ.Name(), func(t *testing.T) { - sc := testStruct(tt.fmts, tt.names, tt.flags) - defer freeMallocedSchemas(sc) - - top := (*[1]*CArrowSchema)(unsafe.Pointer(sc))[0] - f, err := ImportCArrowField(top) - assert.NoError(t, err) - - assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) - - assert.True(t, schemaIsReleased(top)) - }) - } -} - -func TestMapSchemas(t *testing.T) { - tests := []struct { - typ *arrow.MapType - keysSorted bool - fmts []string - names []string - flags []int64 - }{ - {arrow.MapOf(arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String), false, []string{"+m", "+s", "c", "u"}, []string{"", "entries", "key", "value"}, []int64{flagIsNullable, 0, 0, flagIsNullable}}, - {arrow.MapOf(arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String), true, []string{"+m", "+s", "c", "u"}, []string{"", "entries", "key", "value"}, []int64{flagIsNullable | flagMapKeysSorted, 0, 0, flagIsNullable}}, - } - - for _, tt := range tests { - t.Run(tt.typ.Name(), func(t *testing.T) { - sc := testMap(tt.fmts, tt.names, tt.flags) - defer freeMallocedSchemas(sc) - - top := (*[1]*CArrowSchema)(unsafe.Pointer(sc))[0] - f, err := ImportCArrowField(top) - assert.NoError(t, err) - - tt.typ.KeysSorted = tt.keysSorted - assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) - - assert.True(t, schemaIsReleased(top)) - }) - } -} - -func TestSchema(t *testing.T) { - // schema is exported as an equivalent struct type (+ top-level metadata) - sc := arrow.NewSchema([]arrow.Field{ - {Name: "nulls", Type: arrow.Null, Nullable: false}, - {Name: "values", Type: arrow.PrimitiveTypes.Int64, Nullable: true, Metadata: metadata1}, - }, &metadata2) - - cst := testSchema([]string{"+s", "n", "l"}, []string{"", "nulls", "values"}, []int64{0, 0, flagIsNullable}) - defer freeMallocedSchemas(cst) - - top := (*[1]*CArrowSchema)(unsafe.Pointer(cst))[0] - out, err := ImportCArrowSchema(top) - assert.NoError(t, err) - - assert.True(t, sc.Equal(out)) - assert.True(t, sc.Metadata().Equal(out.Metadata())) - - assert.True(t, schemaIsReleased(top)) -} - -func createTestInt8Arr() arrow.Array { - bld := array.NewInt8Builder(memory.DefaultAllocator) - defer bld.Release() - - bld.AppendValues([]int8{1, 2, 0, -3}, []bool{true, true, false, true}) - return bld.NewInt8Array() -} - -func createTestInt16Arr() arrow.Array { - bld := array.NewInt16Builder(memory.DefaultAllocator) - defer bld.Release() - - bld.AppendValues([]int16{1, 2, -3}, []bool{true, true, true}) - return bld.NewInt16Array() -} - -func createTestInt32Arr() arrow.Array { - bld := array.NewInt32Builder(memory.DefaultAllocator) - defer bld.Release() - - bld.AppendValues([]int32{1, 2, 0, -3}, []bool{true, true, false, true}) - return bld.NewInt32Array() -} - -func createTestInt64Arr() arrow.Array { - bld := array.NewInt64Builder(memory.DefaultAllocator) - defer bld.Release() - - bld.AppendValues([]int64{1, 2, -3}, []bool{true, true, true}) - return bld.NewInt64Array() -} - -func createTestUint8Arr() arrow.Array { - bld := array.NewUint8Builder(memory.DefaultAllocator) - defer bld.Release() - - bld.AppendValues([]uint8{1, 2, 0, 3}, []bool{true, true, false, true}) - return bld.NewUint8Array() -} - -func createTestUint16Arr() arrow.Array { - bld := array.NewUint16Builder(memory.DefaultAllocator) - defer bld.Release() - - bld.AppendValues([]uint16{1, 2, 3}, []bool{true, true, true}) - return bld.NewUint16Array() -} - -func createTestUint32Arr() arrow.Array { - bld := array.NewUint32Builder(memory.DefaultAllocator) - defer bld.Release() - - bld.AppendValues([]uint32{1, 2, 0, 3}, []bool{true, true, false, true}) - return bld.NewUint32Array() -} - -func createTestUint64Arr() arrow.Array { - bld := array.NewUint64Builder(memory.DefaultAllocator) - defer bld.Release() - - bld.AppendValues([]uint64{1, 2, 3}, []bool{true, true, true}) - return bld.NewUint64Array() -} - -func createTestBoolArr() arrow.Array { - bld := array.NewBooleanBuilder(memory.DefaultAllocator) - defer bld.Release() - - bld.AppendValues([]bool{true, false, false}, []bool{true, true, false}) - return bld.NewBooleanArray() -} - -func createTestNullArr() arrow.Array { - return array.NewNull(2) -} - -func createTestFloat32Arr() arrow.Array { - bld := array.NewFloat32Builder(memory.DefaultAllocator) - defer bld.Release() - - bld.AppendValues([]float32{1.5, 0}, []bool{true, false}) - return bld.NewFloat32Array() -} - -func createTestFloat64Arr() arrow.Array { - bld := array.NewFloat64Builder(memory.DefaultAllocator) - defer bld.Release() - - bld.AppendValues([]float64{1.5, 0}, []bool{true, false}) - return bld.NewFloat64Array() -} - -func createTestFSBArr() arrow.Array { - bld := array.NewFixedSizeBinaryBuilder(memory.DefaultAllocator, &arrow.FixedSizeBinaryType{ByteWidth: 3}) - defer bld.Release() - - bld.AppendValues([][]byte{[]byte("foo"), []byte("bar"), nil}, []bool{true, true, false}) - return bld.NewFixedSizeBinaryArray() -} - -func createTestBinaryArr() arrow.Array { - bld := array.NewBinaryBuilder(memory.DefaultAllocator, arrow.BinaryTypes.Binary) - defer bld.Release() - - bld.AppendValues([][]byte{[]byte("foo"), []byte("bar"), nil}, []bool{true, true, false}) - return bld.NewBinaryArray() -} - -func createTestStrArr() arrow.Array { - bld := array.NewStringBuilder(memory.DefaultAllocator) - defer bld.Release() - - bld.AppendValues([]string{"foo", "bar", ""}, []bool{true, true, false}) - return bld.NewStringArray() -} - -func createTestLargeBinaryArr() arrow.Array { - bld := array.NewBinaryBuilder(memory.DefaultAllocator, arrow.BinaryTypes.LargeBinary) - defer bld.Release() - - bld.AppendValues([][]byte{[]byte("foo"), []byte("bar"), nil}, []bool{true, true, false}) - return bld.NewLargeBinaryArray() -} - -func createTestLargeStrArr() arrow.Array { - bld := array.NewLargeStringBuilder(memory.DefaultAllocator) - defer bld.Release() - - bld.AppendValues([]string{"foo", "bar", ""}, []bool{true, true, false}) - return bld.NewLargeStringArray() -} - -func createTestDecimalArr() arrow.Array { - bld := array.NewDecimal128Builder(memory.DefaultAllocator, &arrow.Decimal128Type{Precision: 16, Scale: 4}) - defer bld.Release() - - bld.AppendValues([]decimal128.Num{decimal128.FromU64(12345670), decimal128.FromU64(0)}, []bool{true, false}) - return bld.NewDecimal128Array() -} - -func TestPrimitiveArrs(t *testing.T) { - tests := []struct { - name string - fn func() arrow.Array - }{ - {"int8", createTestInt8Arr}, - {"uint8", createTestUint8Arr}, - {"int16", createTestInt16Arr}, - {"uint16", createTestUint16Arr}, - {"int32", createTestInt32Arr}, - {"uint32", createTestUint32Arr}, - {"int64", createTestInt64Arr}, - {"uint64", createTestUint64Arr}, - {"bool", createTestBoolArr}, - {"null", createTestNullArr}, - {"float32", createTestFloat32Arr}, - {"float64", createTestFloat64Arr}, - {"fixed size binary", createTestFSBArr}, - {"binary", createTestBinaryArr}, - {"utf8", createTestStrArr}, - {"largebinary", createTestLargeBinaryArr}, - {"largeutf8", createTestLargeStrArr}, - {"decimal128", createTestDecimalArr}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - arr := tt.fn() - defer arr.Release() - - mem := mallocator.NewMallocator() - defer mem.AssertSize(t, 0) - - carr := createCArr(arr, mem) - defer freeTestMallocatorArr(carr, mem) - - imported, err := ImportCArrayWithType(carr, arr.DataType()) - assert.NoError(t, err) - assert.True(t, array.Equal(arr, imported)) - assert.True(t, isReleased(carr)) - - imported.Release() - }) - } -} - -func TestPrimitiveSliced(t *testing.T) { - arr := createTestInt16Arr() - defer arr.Release() - - sl := array.NewSlice(arr, 1, 2) - defer sl.Release() - - mem := mallocator.NewMallocator() - defer mem.AssertSize(t, 0) - - carr := createCArr(sl, mem) - defer freeTestMallocatorArr(carr, mem) - - imported, err := ImportCArrayWithType(carr, arr.DataType()) - assert.NoError(t, err) - assert.True(t, array.Equal(sl, imported)) - assert.True(t, array.SliceEqual(arr, 1, 2, imported, 0, int64(imported.Len()))) - assert.True(t, isReleased(carr)) - - imported.Release() -} - -func createTestListArr() arrow.Array { - bld := array.NewListBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8) - defer bld.Release() - - vb := bld.ValueBuilder().(*array.Int8Builder) - - bld.Append(true) - vb.AppendValues([]int8{1, 2}, []bool{true, true}) - - bld.Append(true) - vb.AppendValues([]int8{3, 0}, []bool{true, false}) - - bld.AppendNull() - - return bld.NewArray() -} - -func createTestLargeListArr() arrow.Array { - bld := array.NewLargeListBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8) - defer bld.Release() - - vb := bld.ValueBuilder().(*array.Int8Builder) - - bld.Append(true) - vb.AppendValues([]int8{1, 2}, []bool{true, true}) - - bld.Append(true) - vb.AppendValues([]int8{3, 0}, []bool{true, false}) - - bld.AppendNull() - - return bld.NewArray() -} - -func createTestFixedSizeList() arrow.Array { - bld := array.NewFixedSizeListBuilder(memory.DefaultAllocator, 2, arrow.PrimitiveTypes.Int64) - defer bld.Release() - - vb := bld.ValueBuilder().(*array.Int64Builder) - - bld.Append(true) - vb.AppendValues([]int64{1, 2}, []bool{true, true}) - - bld.Append(true) - vb.AppendValues([]int64{3, 0}, []bool{true, false}) - - bld.AppendNull() - return bld.NewArray() -} - -func createTestStructArr() arrow.Array { - bld := array.NewStructBuilder(memory.DefaultAllocator, arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - arrow.Field{Name: "b", Type: arrow.BinaryTypes.String, Nullable: true}, - )) - defer bld.Release() - - f1bld := bld.FieldBuilder(0).(*array.Int8Builder) - f2bld := bld.FieldBuilder(1).(*array.StringBuilder) - - bld.Append(true) - f1bld.Append(1) - f2bld.Append("foo") - - bld.Append(true) - f1bld.Append(2) - f2bld.AppendNull() - - return bld.NewArray() -} - -func createTestRunEndsArr() arrow.Array { - bld := array.NewRunEndEncodedBuilder(memory.DefaultAllocator, - arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int8) - defer bld.Release() - - if err := json.Unmarshal([]byte(`[1, 2, 2, 3, null, null, null, 4]`), bld); err != nil { - panic(err) - } - - return bld.NewArray() -} - -func createTestMapArr() arrow.Array { - bld := array.NewMapBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String, false) - defer bld.Release() - - kb := bld.KeyBuilder().(*array.Int8Builder) - vb := bld.ItemBuilder().(*array.StringBuilder) - - bld.Append(true) - kb.Append(1) - vb.Append("foo") - kb.Append(2) - vb.AppendNull() - - bld.Append(true) - kb.Append(3) - vb.Append("bar") - - return bld.NewArray() -} - -func createTestSparseUnion() arrow.Array { - return createTestUnionArr(arrow.SparseMode) -} - -func createTestDenseUnion() arrow.Array { - return createTestUnionArr(arrow.DenseMode) -} - -func createTestUnionArr(mode arrow.UnionMode) arrow.Array { - fields := []arrow.Field{ - arrow.Field{Name: "u0", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, - arrow.Field{Name: "u1", Type: arrow.PrimitiveTypes.Uint8, Nullable: true}, - } - typeCodes := []arrow.UnionTypeCode{5, 10} - bld := array.NewBuilder(memory.DefaultAllocator, arrow.UnionOf(mode, fields, typeCodes)).(array.UnionBuilder) - defer bld.Release() - - u0Bld := bld.Child(0).(*array.Int32Builder) - u1Bld := bld.Child(1).(*array.Uint8Builder) - - bld.Append(5) - if mode == arrow.SparseMode { - u1Bld.AppendNull() - } - u0Bld.Append(128) - bld.Append(5) - if mode == arrow.SparseMode { - u1Bld.AppendNull() - } - u0Bld.Append(256) - bld.Append(10) - if mode == arrow.SparseMode { - u0Bld.AppendNull() - } - u1Bld.Append(127) - bld.Append(10) - if mode == arrow.SparseMode { - u0Bld.AppendNull() - } - u1Bld.Append(25) - - return bld.NewArray() -} - -func TestNestedArrays(t *testing.T) { - tests := []struct { - name string - fn func() arrow.Array - }{ - {"list", createTestListArr}, - {"large list", createTestLargeListArr}, - {"fixed size list", createTestFixedSizeList}, - {"struct", createTestStructArr}, - {"map", createTestMapArr}, - {"sparse union", createTestSparseUnion}, - {"dense union", createTestDenseUnion}, - {"run-end encoded", createTestRunEndsArr}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - arr := tt.fn() - defer arr.Release() - - mem := mallocator.NewMallocator() - defer mem.AssertSize(t, 0) - - carr := createCArr(arr, mem) - defer freeTestMallocatorArr(carr, mem) - - imported, err := ImportCArrayWithType(carr, arr.DataType()) - assert.NoError(t, err) - assert.True(t, array.Equal(arr, imported)) - assert.True(t, isReleased(carr)) - - imported.Release() - }) - } -} - -func TestRecordBatch(t *testing.T) { - mem := mallocator.NewMallocator() - defer mem.AssertSize(t, 0) - - arr := createTestStructArr() - defer arr.Release() - - carr := createCArr(arr, mem) - defer freeTestMallocatorArr(carr, mem) - - sc := testStruct([]string{"+s", "c", "u"}, []string{"", "a", "b"}, []int64{0, flagIsNullable, flagIsNullable}) - defer freeMallocedSchemas(sc) - - top := (*[1]*CArrowSchema)(unsafe.Pointer(sc))[0] - rb, err := ImportCRecordBatch(carr, top) - assert.NoError(t, err) - defer rb.Release() - - assert.EqualValues(t, 2, rb.NumCols()) - rbschema := rb.Schema() - assert.Equal(t, "a", rbschema.Field(0).Name) - assert.Equal(t, "b", rbschema.Field(1).Name) - - rec := array.NewRecord(rbschema, []arrow.Array{arr.(*array.Struct).Field(0), arr.(*array.Struct).Field(1)}, -1) - defer rec.Release() - - assert.True(t, array.RecordEqual(rb, rec)) -} - -func TestRecordReaderStream(t *testing.T) { - stream := arrayStreamTest() - defer releaseStream(stream) - - rdr := ImportCArrayStream(stream, nil) - i := 0 - for { - rec, err := rdr.Read() - if err != nil { - if errors.Is(err, io.EOF) { - break - } - assert.NoError(t, err) - } - - assert.EqualValues(t, 2, rec.NumCols()) - assert.Equal(t, "a", rec.ColumnName(0)) - assert.Equal(t, "b", rec.ColumnName(1)) - i++ - for j := 0; j < int(rec.NumRows()); j++ { - assert.Equal(t, int32((j+1)*i), rec.Column(0).(*array.Int32).Value(j)) - } - assert.Equal(t, "foo", rec.Column(1).(*array.String).Value(0)) - assert.Equal(t, "bar", rec.Column(1).(*array.String).Value(1)) - assert.Equal(t, "baz", rec.Column(1).(*array.String).Value(2)) - } -} - -func TestExportRecordReaderStream(t *testing.T) { - reclist := arrdata.Records["primitives"] - rdr, _ := array.NewRecordReader(reclist[0].Schema(), reclist) - - out := createTestStreamObj() - ExportRecordReader(rdr, out) - - assert.NotNil(t, out.get_schema) - assert.NotNil(t, out.get_next) - assert.NotNil(t, out.get_last_error) - assert.NotNil(t, out.release) - assert.NotNil(t, out.private_data) - - h := *(*cgo.Handle)(out.private_data) - assert.Same(t, rdr, h.Value().(cRecordReader).rdr) - - importedRdr := ImportCArrayStream(out, nil) - i := 0 - for { - rec, err := importedRdr.Read() - if err != nil { - if errors.Is(err, io.EOF) { - break - } - assert.NoError(t, err) - } - - assert.Truef(t, array.RecordEqual(reclist[i], rec), "expected: %s\ngot: %s", reclist[i], rec) - i++ - } - assert.EqualValues(t, len(reclist), i) -} - -func TestExportRecordReaderStreamLifetime(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - schema := arrow.NewSchema([]arrow.Field{ - {Name: "strings", Type: arrow.BinaryTypes.String, Nullable: false}, - }, nil) - - bldr := array.NewBuilder(mem, &arrow.StringType{}) - defer bldr.Release() - - arr := bldr.NewArray() - defer arr.Release() - - rec := array.NewRecord(schema, []arrow.Array{arr}, 0) - defer rec.Release() - - rdr, _ := array.NewRecordReader(schema, []arrow.Record{rec}) - defer rdr.Release() - - out := createTestStreamObj() - ExportRecordReader(rdr, out) - - // C Stream is holding on to memory - assert.NotEqual(t, 0, mem.CurrentAlloc()) - releaseStream(out) -} - -func TestEmptyListExport(t *testing.T) { - bldr := array.NewBuilder(memory.DefaultAllocator, arrow.LargeListOf(arrow.PrimitiveTypes.Int32)) - defer bldr.Release() - - arr := bldr.NewArray() - defer arr.Release() - - var out CArrowArray - ExportArrowArray(arr, &out, nil) - - assert.Zero(t, out.length) - assert.Zero(t, out.null_count) - assert.Zero(t, out.offset) - assert.EqualValues(t, 2, out.n_buffers) - assert.NotNil(t, out.buffers) - assert.EqualValues(t, 1, out.n_children) - assert.NotNil(t, out.children) -} - -func TestEmptyDictExport(t *testing.T) { - bldr := array.NewBuilder(memory.DefaultAllocator, &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.BinaryTypes.String, Ordered: true}) - defer bldr.Release() - - arr := bldr.NewArray() - defer arr.Release() - - var out CArrowArray - var sc CArrowSchema - ExportArrowArray(arr, &out, &sc) - - assert.EqualValues(t, 'c', *sc.format) - assert.NotZero(t, sc.flags&1) - assert.Zero(t, sc.n_children) - assert.NotNil(t, sc.dictionary) - assert.EqualValues(t, 'u', *sc.dictionary.format) - - assert.Zero(t, out.length) - assert.Zero(t, out.null_count) - assert.Zero(t, out.offset) - assert.EqualValues(t, 2, out.n_buffers) - assert.Zero(t, out.n_children) - assert.Nil(t, out.children) - assert.NotNil(t, out.dictionary) - - assert.Zero(t, out.dictionary.length) - assert.Zero(t, out.dictionary.null_count) - assert.Zero(t, out.dictionary.offset) - assert.EqualValues(t, 3, out.dictionary.n_buffers) - assert.Zero(t, out.dictionary.n_children) - assert.Nil(t, out.dictionary.children) - assert.Nil(t, out.dictionary.dictionary) -} - -func TestEmptyStringExport(t *testing.T) { - // apache/arrow#33936: regression test - bldr := array.NewBuilder(memory.DefaultAllocator, &arrow.StringType{}) - defer bldr.Release() - - arr := bldr.NewArray() - defer arr.Release() - - var out CArrowArray - var sc CArrowSchema - ExportArrowArray(arr, &out, &sc) - - assert.EqualValues(t, 'u', *sc.format) - assert.Zero(t, sc.n_children) - assert.Nil(t, sc.dictionary) - - assert.EqualValues(t, 3, out.n_buffers) - buffers := (*[3]unsafe.Pointer)(unsafe.Pointer(out.buffers)) - assert.EqualValues(t, unsafe.Pointer(nil), buffers[0]) - assert.NotEqualValues(t, unsafe.Pointer(nil), buffers[1]) - assert.NotEqualValues(t, unsafe.Pointer(nil), buffers[2]) -} - -func TestEmptyUnionExport(t *testing.T) { - // apache/arrow#33936: regression test - bldr := array.NewBuilder(memory.DefaultAllocator, arrow.SparseUnionOf([]arrow.Field{ - {Name: "child", Type: &arrow.Int64Type{}}, - }, []arrow.UnionTypeCode{0})) - defer bldr.Release() - - arr := bldr.NewArray() - defer arr.Release() - - var out CArrowArray - var sc CArrowSchema - ExportArrowArray(arr, &out, &sc) - - assert.EqualValues(t, 1, sc.n_children) - assert.Nil(t, sc.dictionary) - - assert.EqualValues(t, 1, out.n_buffers) - buffers := (*[1]unsafe.Pointer)(unsafe.Pointer(out.buffers)) - assert.NotEqualValues(t, unsafe.Pointer(nil), buffers[0]) -} - -func TestRecordReaderExport(t *testing.T) { - // Regression test for apache/arrow#33767 - reclist := arrdata.Records["primitives"] - rdr, _ := array.NewRecordReader(reclist[0].Schema(), reclist) - - if err := exportedStreamTest(rdr); err != nil { - t.Fatalf("Failed to test exported stream: %#v", err) - } -} - -type failingReader struct { - opCount int -} - -func (r *failingReader) Retain() {} -func (r *failingReader) Release() {} -func (r *failingReader) Schema() *arrow.Schema { - r.opCount -= 1 - if r.opCount == 0 { - return nil - } - return arrdata.Records["primitives"][0].Schema() -} -func (r *failingReader) Next() bool { - r.opCount -= 1 - return r.opCount > 0 -} -func (r *failingReader) Record() arrow.Record { - arrdata.Records["primitives"][0].Retain() - return arrdata.Records["primitives"][0] -} -func (r *failingReader) Err() error { - if r.opCount == 0 { - return fmt.Errorf("Expected error message") - } - return nil -} - -func TestRecordReaderError(t *testing.T) { - // Regression test for apache/arrow#33789 - err := roundTripStreamTest(&failingReader{opCount: 1}) - if err == nil { - t.Fatalf("Expected error but got none") - } - assert.Contains(t, err.Error(), "Expected error message") - - err = roundTripStreamTest(&failingReader{opCount: 2}) - if err == nil { - t.Fatalf("Expected error but got none") - } - assert.Contains(t, err.Error(), "Expected error message") - - err = roundTripStreamTest(&failingReader{opCount: 3}) - if err == nil { - t.Fatalf("Expected error but got none") - } - assert.Contains(t, err.Error(), "Expected error message") -} - -func TestRecordReaderImportError(t *testing.T) { - // Regression test for apache/arrow#35974 - - err := fallibleSchemaTestDeprecated() - if err == nil { - t.Fatalf("Expected error but got nil") - } - assert.Contains(t, err.Error(), "Expected error message") - - err = fallibleSchemaTest() - if err == nil { - t.Fatalf("Expected error but got nil") - } - assert.Contains(t, err.Error(), "Expected error message") -} - -func TestConfuseGoGc(t *testing.T) { - // Regression test for https://github.com/apache/arrow-adbc/issues/729 - reclist := arrdata.Records["primitives"] - - var wg sync.WaitGroup - concurrency := 32 - wg.Add(concurrency) - - // XXX: this test is a bit expensive - for i := 0; i < concurrency; i++ { - go func() { - for i := 0; i < 256; i++ { - rdr, err := array.NewRecordReader(reclist[0].Schema(), reclist) - assert.NoError(t, err) - runtime.GC() - assert.NoError(t, confuseGoGc(rdr)) - runtime.GC() - } - wg.Done() - }() - } - - wg.Wait() -} diff --git a/go/arrow/cdata/cdata_test_framework.go b/go/arrow/cdata/cdata_test_framework.go deleted file mode 100644 index 968b28b4e4afb..0000000000000 --- a/go/arrow/cdata/cdata_test_framework.go +++ /dev/null @@ -1,451 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build test -// +build test - -package cdata - -// #include -// #include -// #include -// #include "arrow/c/abi.h" -// #include "arrow/c/helpers.h" -// -// void setup_array_stream_test(const int n_batches, struct ArrowArrayStream* out); -// static struct ArrowArray* get_test_arr() { -// struct ArrowArray* array = (struct ArrowArray*)malloc(sizeof(struct ArrowArray)); -// memset(array, 0, sizeof(*array)); -// return array; -// } -// static struct ArrowArrayStream* get_test_stream() { -// struct ArrowArrayStream* out = (struct ArrowArrayStream*)malloc(sizeof(struct ArrowArrayStream)); -// memset(out, 0, sizeof(struct ArrowArrayStream)); -// return out; -// } -// -// void release_test_arr(struct ArrowArray* arr); -// -// static int32_t* get_data() { -// int32_t* data = malloc(sizeof(int32_t)*10); -// for (int i = 0; i < 10; ++i) { data[i] = i+1; } -// return data; -// } -// void export_int32_type(struct ArrowSchema* schema); -// void export_int32_array(const int32_t*, int64_t, struct ArrowArray*); -// int test1_is_released(); -// void test_primitive(struct ArrowSchema* schema, const char* fmt); -// void free_malloced_schemas(struct ArrowSchema**); -// struct ArrowSchema** test_lists(const char** fmts, const char** names, const int* nullflags, const int n); -// struct ArrowSchema** test_struct(const char** fmts, const char** names, int64_t* flags, const int n); -// struct ArrowSchema** test_map(const char** fmts, const char** names, int64_t* flags, const int n); -// struct ArrowSchema** test_schema(const char** fmts, const char** names, int64_t* flags, const int n); -// struct ArrowSchema** test_union(const char** fmts, const char** names, int64_t* flags, const int n); -// int test_exported_stream(struct ArrowArrayStream* stream); -// void test_stream_schema_fallible(struct ArrowArrayStream* stream); -// int confuse_go_gc(struct ArrowArrayStream* stream, unsigned int seed); -// extern void releaseTestArr(struct ArrowArray* array); -// extern void goReleaseTestArray(struct ArrowArray* array); -import "C" - -import ( - "errors" - "fmt" - "io" - "math/rand" - "runtime/cgo" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/internal" - "github.com/apache/arrow/go/v18/arrow/memory/mallocator" -) - -const ( - flagIsNullable = C.ARROW_FLAG_NULLABLE - flagMapKeysSorted = C.ARROW_FLAG_MAP_KEYS_SORTED -) - -var ( - metadata1 = arrow.NewMetadata([]string{"key1", "key2"}, []string{"", "bar"}) - metadata2 = arrow.NewMetadata([]string{"key"}, []string{"abcde"}) -) - -func exportInt32TypeSchema() CArrowSchema { - var s CArrowSchema - C.export_int32_type(&s) - return s -} - -func releaseStream(s *CArrowArrayStream) { - C.ArrowArrayStreamRelease(s) -} - -func schemaIsReleased(s *CArrowSchema) bool { - return C.ArrowSchemaIsReleased(s) == 1 -} - -func getMetadataKeys() ([]string, []string) { - return []string{"key1", "key2"}, []string{"key"} -} - -func getMetadataValues() ([]string, []string) { - return []string{"", "bar"}, []string{"abcde"} -} - -func exportInt32Array() *CArrowArray { - arr := C.get_test_arr() - C.export_int32_array(C.get_data(), C.int64_t(10), arr) - return arr -} - -func isReleased(arr *CArrowArray) bool { - return C.ArrowArrayIsReleased(arr) == 1 -} - -func test1IsReleased() bool { - return C.test1_is_released() == 1 -} - -func testPrimitive(fmtstr string) CArrowSchema { - var s CArrowSchema - fmt := C.CString(fmtstr) - C.test_primitive(&s, fmt) - return s -} - -func freeMallocedSchemas(schemas **CArrowSchema) { - C.free_malloced_schemas(schemas) -} - -func testNested(fmts, names []string, isnull []bool) **CArrowSchema { - if len(fmts) != len(names) { - panic("testing nested lists must have same size fmts and names") - } - cfmts := make([]*C.char, len(fmts)) - cnames := make([]*C.char, len(names)) - nulls := make([]C.int, len(isnull)) - - for i := range fmts { - cfmts[i] = C.CString(fmts[i]) - cnames[i] = C.CString(names[i]) - } - - for i, v := range isnull { - if v { - nulls[i] = C.ARROW_FLAG_NULLABLE - } else { - nulls[i] = 0 - } - } - - return C.test_lists((**C.char)(unsafe.Pointer(&cfmts[0])), (**C.char)(unsafe.Pointer(&cnames[0])), (*C.int)(unsafe.Pointer(&nulls[0])), C.int(len(fmts))) -} - -func testStruct(fmts, names []string, flags []int64) **CArrowSchema { - if len(fmts) != len(names) || len(names) != len(flags) { - panic("testing structs must all have the same size slices in args") - } - - cfmts := make([]*C.char, len(fmts)) - cnames := make([]*C.char, len(names)) - cflags := make([]C.int64_t, len(flags)) - - for i := range fmts { - cfmts[i] = C.CString(fmts[i]) - cnames[i] = C.CString(names[i]) - cflags[i] = C.int64_t(flags[i]) - } - - return C.test_struct((**C.char)(unsafe.Pointer(&cfmts[0])), (**C.char)(unsafe.Pointer(&cnames[0])), (*C.int64_t)(unsafe.Pointer(&cflags[0])), C.int(len(fmts))) -} - -func testMap(fmts, names []string, flags []int64) **CArrowSchema { - if len(fmts) != len(names) || len(names) != len(flags) { - panic("testing maps must all have the same size slices in args") - } - - cfmts := make([]*C.char, len(fmts)) - cnames := make([]*C.char, len(names)) - cflags := make([]C.int64_t, len(flags)) - - for i := range fmts { - cfmts[i] = C.CString(fmts[i]) - cnames[i] = C.CString(names[i]) - cflags[i] = C.int64_t(flags[i]) - } - - return C.test_map((**C.char)(unsafe.Pointer(&cfmts[0])), (**C.char)(unsafe.Pointer(&cnames[0])), (*C.int64_t)(unsafe.Pointer(&cflags[0])), C.int(len(fmts))) -} - -func testUnion(fmts, names []string, flags []int64) **CArrowSchema { - if len(fmts) != len(names) || len(names) != len(flags) { - panic("testing unions must all have the same size slices in args") - } - - cfmts := make([]*C.char, len(fmts)) - cnames := make([]*C.char, len(names)) - cflags := make([]C.int64_t, len(flags)) - - for i := range fmts { - cfmts[i] = C.CString(fmts[i]) - cnames[i] = C.CString(names[i]) - cflags[i] = C.int64_t(flags[i]) - } - - return C.test_union((**C.char)(unsafe.Pointer(&cfmts[0])), (**C.char)(unsafe.Pointer(&cnames[0])), (*C.int64_t)(unsafe.Pointer(&cflags[0])), C.int(len(fmts))) -} - -func testSchema(fmts, names []string, flags []int64) **CArrowSchema { - if len(fmts) != len(names) || len(names) != len(flags) { - panic("testing structs must all have the same size slices in args") - } - - cfmts := make([]*C.char, len(fmts)) - cnames := make([]*C.char, len(names)) - cflags := make([]C.int64_t, len(flags)) - - for i := range fmts { - cfmts[i] = C.CString(fmts[i]) - cnames[i] = C.CString(names[i]) - cflags[i] = C.int64_t(flags[i]) - } - - return C.test_schema((**C.char)(unsafe.Pointer(&cfmts[0])), (**C.char)(unsafe.Pointer(&cnames[0])), (*C.int64_t)(unsafe.Pointer(&cflags[0])), C.int(len(fmts))) -} - -func freeAny[T any](alloc *mallocator.Mallocator, p *T, n int) { - raw := unsafe.Slice((*byte)(unsafe.Pointer(p)), int(unsafe.Sizeof(*p))*n) - alloc.Free(raw) -} - -func freeTestMallocatorArr(carr *CArrowArray, alloc *mallocator.Mallocator) { - freeAny(alloc, carr, 1) -} - -func getTestArr(alloc *mallocator.Mallocator) *CArrowArray { - raw := alloc.Allocate(C.sizeof_struct_ArrowArray) - return (*CArrowArray)(unsafe.Pointer(&raw[0])) -} - -type testReleaser struct { - alloc *mallocator.Mallocator - bufs [][]byte -} - -//export releaseTestArr -func releaseTestArr(arr *CArrowArray) { - if C.ArrowArrayIsReleased(arr) == 1 { - return - } - defer C.ArrowArrayMarkReleased(arr) - - h := getHandle(arr.private_data) - tr := h.Value().(*testReleaser) - - alloc := tr.alloc - for _, b := range tr.bufs { - alloc.Free(b) - } - - if arr.n_buffers > 0 { - freeAny(alloc, arr.buffers, int(arr.n_buffers)) - } - - if arr.dictionary != nil { - C.ArrowArrayRelease(arr.dictionary) - freeAny(alloc, arr.dictionary, 1) - } - - if arr.n_children > 0 { - children := unsafe.Slice(arr.children, arr.n_children) - for _, c := range children { - C.ArrowArrayRelease(c) - freeTestMallocatorArr(c, alloc) - } - - freeAny(alloc, arr.children, int(arr.n_children)) - } - - h.Delete() - C.free(unsafe.Pointer(arr.private_data)) -} - -func allocateBufferMallocatorPtrArr(alloc *mallocator.Mallocator, n int) []*C.void { - raw := alloc.Allocate(int(unsafe.Sizeof((*C.void)(nil))) * n) - return unsafe.Slice((**C.void)(unsafe.Pointer(&raw[0])), n) -} - -func allocateChildrenPtrArr(alloc *mallocator.Mallocator, n int) []*CArrowArray { - raw := alloc.Allocate(int(unsafe.Sizeof((*CArrowArray)(nil))) * n) - return unsafe.Slice((**CArrowArray)(unsafe.Pointer(&raw[0])), n) -} - -func createCArr(arr arrow.Array, alloc *mallocator.Mallocator) *CArrowArray { - var ( - carr = getTestArr(alloc) - children = (**CArrowArray)(nil) - nchildren = C.int64_t(0) - ) - - switch arr := arr.(type) { - case array.ListLike: - clist := allocateChildrenPtrArr(alloc, 1) - clist[0] = createCArr(arr.ListValues(), alloc) - children = (**CArrowArray)(unsafe.Pointer(&clist[0])) - nchildren += 1 - case *array.Struct: - clist := allocateChildrenPtrArr(alloc, arr.NumField()) - for i := 0; i < arr.NumField(); i++ { - clist[i] = createCArr(arr.Field(i), alloc) - nchildren += 1 - } - children = (**CArrowArray)(unsafe.Pointer(&clist[0])) - case *array.RunEndEncoded: - clist := allocateChildrenPtrArr(alloc, 2) - clist[0] = createCArr(arr.RunEndsArr(), alloc) - clist[1] = createCArr(arr.Values(), alloc) - children = (**CArrowArray)(unsafe.Pointer(&clist[0])) - nchildren += 2 - case array.Union: - clist := allocateChildrenPtrArr(alloc, arr.NumFields()) - for i := 0; i < arr.NumFields(); i++ { - clist[i] = createCArr(arr.Field(i), alloc) - nchildren += 1 - } - children = (**CArrowArray)(unsafe.Pointer(&clist[0])) - } - - carr.children = children - carr.n_children = nchildren - carr.dictionary = nil - carr.length = C.int64_t(arr.Len()) - carr.null_count = C.int64_t(arr.NullN()) - carr.offset = C.int64_t(arr.Data().Offset()) - carr.release = (*[0]byte)(C.goReleaseTestArray) - tr := &testReleaser{alloc: alloc} - h := cgo.NewHandle(tr) - carr.private_data = createHandle(h) - - buffers := arr.Data().Buffers() - bufOffset, nbuffers := 0, len(buffers) - hasValidityBitmap := internal.DefaultHasValidityBitmap(arr.DataType().ID()) - if nbuffers > 0 && !hasValidityBitmap { - nbuffers-- - bufOffset++ - } - - if nbuffers == 0 { - return carr - } - - tr.bufs = make([][]byte, 0, nbuffers) - cbufs := allocateBufferMallocatorPtrArr(alloc, nbuffers) - for i, b := range buffers[bufOffset:] { - if b != nil { - raw := alloc.Allocate(b.Len()) - copy(raw, b.Bytes()) - tr.bufs = append(tr.bufs, raw) - cbufs[i] = (*C.void)(unsafe.Pointer(&raw[0])) - } else { - cbufs[i] = nil - } - } - - carr.n_buffers = C.int64_t(len(cbufs)) - if len(cbufs) > 0 { - carr.buffers = (*unsafe.Pointer)(unsafe.Pointer(&cbufs[0])) - } - - return carr -} - -func createTestStreamObj() *CArrowArrayStream { - return C.get_test_stream() -} - -func arrayStreamTest() *CArrowArrayStream { - st := C.get_test_stream() - C.setup_array_stream_test(2, st) - return st -} - -func exportedStreamTest(reader array.RecordReader) error { - out := C.get_test_stream() - ExportRecordReader(reader, out) - rc := C.test_exported_stream(out) - C.free(unsafe.Pointer(out)) - if rc == 0 { - return nil - } - return fmt.Errorf("Exported stream test failed with return code %d", int(rc)) -} - -func roundTripStreamTest(reader array.RecordReader) error { - out := C.get_test_stream() - ExportRecordReader(reader, out) - rdr, err := ImportCRecordReader(out, nil) - - if err != nil { - return err - } - - for { - _, err = rdr.Read() - if errors.Is(err, io.EOF) { - break - } else if err != nil { - return err - } - } - return nil -} - -func fallibleSchemaTestDeprecated() (err error) { - stream := CArrowArrayStream{} - C.test_stream_schema_fallible(&stream) - - defer func() { - if r := recover(); r != nil { - err = fmt.Errorf("Panicked: %#v", r) - } - }() - _ = ImportCArrayStream(&stream, nil) - return nil -} - -func fallibleSchemaTest() error { - stream := CArrowArrayStream{} - C.test_stream_schema_fallible(&stream) - - _, err := ImportCRecordReader(&stream, nil) - if err != nil { - return err - } - return nil -} - -func confuseGoGc(reader array.RecordReader) error { - out := C.get_test_stream() - ExportRecordReader(reader, out) - rc := C.confuse_go_gc(out, C.uint(rand.Int())) - C.free(unsafe.Pointer(out)) - if rc == 0 { - return nil - } - return fmt.Errorf("Exported stream test failed with return code %d", int(rc)) -} diff --git a/go/arrow/cdata/exports.go b/go/arrow/cdata/exports.go deleted file mode 100644 index 6dbcde831d889..0000000000000 --- a/go/arrow/cdata/exports.go +++ /dev/null @@ -1,157 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package cdata - -import ( - "runtime/cgo" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" -) - -// #include -// #include "arrow/c/helpers.h" -// -// typedef const char cchar_t; -// extern int streamGetSchema(struct ArrowArrayStream*, struct ArrowSchema*); -// extern int streamGetNext(struct ArrowArrayStream*, struct ArrowArray*); -// extern const char* streamGetError(struct ArrowArrayStream*); -// extern void streamRelease(struct ArrowArrayStream*); -// // XXX(https://github.com/apache/arrow-adbc/issues/729) -// int streamGetSchemaTrampoline(struct ArrowArrayStream* stream, struct ArrowSchema* out); -// int streamGetNextTrampoline(struct ArrowArrayStream* stream, struct ArrowArray* out); -// -import "C" - -//export releaseExportedSchema -func releaseExportedSchema(schema *CArrowSchema) { - if C.ArrowSchemaIsReleased(schema) == 1 { - return - } - defer C.ArrowSchemaMarkReleased(schema) - - C.free(unsafe.Pointer(schema.name)) - C.free(unsafe.Pointer(schema.format)) - C.free(unsafe.Pointer(schema.metadata)) - - if schema.n_children == 0 { - return - } - - if schema.dictionary != nil { - C.ArrowSchemaRelease(schema.dictionary) - C.free(unsafe.Pointer(schema.dictionary)) - } - - children := unsafe.Slice(schema.children, schema.n_children) - for _, c := range children { - C.ArrowSchemaRelease(c) - } - - C.free(unsafe.Pointer(children[0])) - C.free(unsafe.Pointer(schema.children)) -} - -// apache/arrow#33864: allocate a new cgo.Handle and store its address -// in a heap-allocated uintptr_t. -func createHandle(hndl cgo.Handle) unsafe.Pointer { - // uintptr_t* hptr = malloc(sizeof(uintptr_t)); - hptr := (*C.uintptr_t)(C.malloc(C.sizeof_uintptr_t)) - // *hptr = (uintptr)hndl; - *hptr = C.uintptr_t(uintptr(hndl)) - return unsafe.Pointer(hptr) -} - -func getHandle(ptr unsafe.Pointer) cgo.Handle { - // uintptr_t* hptr = (uintptr_t*)ptr; - hptr := (*C.uintptr_t)(ptr) - return cgo.Handle((uintptr)(*hptr)) -} - -//export releaseExportedArray -func releaseExportedArray(arr *CArrowArray) { - if C.ArrowArrayIsReleased(arr) == 1 { - return - } - defer C.ArrowArrayMarkReleased(arr) - - if arr.n_buffers > 0 { - C.free(unsafe.Pointer(arr.buffers)) - } - - if arr.dictionary != nil { - C.ArrowArrayRelease(arr.dictionary) - C.free(unsafe.Pointer(arr.dictionary)) - } - - if arr.n_children > 0 { - children := unsafe.Slice(arr.children, arr.n_children) - - for _, c := range children { - C.ArrowArrayRelease(c) - } - C.free(unsafe.Pointer(children[0])) - C.free(unsafe.Pointer(arr.children)) - } - - h := getHandle(arr.private_data) - h.Value().(arrow.ArrayData).Release() - h.Delete() - C.free(unsafe.Pointer(arr.private_data)) -} - -//export streamGetSchema -func streamGetSchema(handle *CArrowArrayStream, out *CArrowSchema) C.int { - h := getHandle(handle.private_data) - rdr := h.Value().(cRecordReader) - return C.int(rdr.getSchema(out)) -} - -//export streamGetNext -func streamGetNext(handle *CArrowArrayStream, out *CArrowArray) C.int { - h := getHandle(handle.private_data) - rdr := h.Value().(cRecordReader) - return C.int(rdr.next(out)) -} - -//export streamGetError -func streamGetError(handle *CArrowArrayStream) *C.cchar_t { - h := getHandle(handle.private_data) - rdr := h.Value().(cRecordReader) - return rdr.getLastError() -} - -//export streamRelease -func streamRelease(handle *CArrowArrayStream) { - h := getHandle(handle.private_data) - h.Value().(cRecordReader).release() - h.Delete() - C.free(unsafe.Pointer(handle.private_data)) - handle.release = nil - handle.private_data = nil -} - -func exportStream(rdr array.RecordReader, out *CArrowArrayStream) { - out.get_schema = (*[0]byte)(C.streamGetSchemaTrampoline) - out.get_next = (*[0]byte)(C.streamGetNextTrampoline) - out.get_last_error = (*[0]byte)(C.streamGetError) - out.release = (*[0]byte)(C.streamRelease) - rdr.Retain() - h := cgo.NewHandle(cRecordReader{rdr: rdr, err: nil}) - out.private_data = createHandle(h) -} diff --git a/go/arrow/cdata/import_allocator.go b/go/arrow/cdata/import_allocator.go deleted file mode 100644 index 4e5c2a7b38c72..0000000000000 --- a/go/arrow/cdata/import_allocator.go +++ /dev/null @@ -1,58 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package cdata - -import ( - "sync/atomic" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow/internal/debug" -) - -// #include "arrow/c/helpers.h" -// #include -import "C" - -type importAllocator struct { - bufCount int64 - - arr *CArrowArray -} - -func (i *importAllocator) addBuffer() { - atomic.AddInt64(&i.bufCount, 1) -} - -func (*importAllocator) Allocate(int) []byte { - panic("cannot allocate from importAllocator") -} - -func (*importAllocator) Reallocate(int, []byte) []byte { - panic("cannot reallocate from importAllocator") -} - -func (i *importAllocator) Free([]byte) { - debug.Assert(atomic.LoadInt64(&i.bufCount) > 0, "too many releases") - - if atomic.AddInt64(&i.bufCount, -1) == 0 { - defer C.free(unsafe.Pointer(i.arr)) - C.ArrowArrayRelease(i.arr) - if C.ArrowArrayIsReleased(i.arr) != 1 { - panic("did not release C mem") - } - } -} diff --git a/go/arrow/cdata/interface.go b/go/arrow/cdata/interface.go deleted file mode 100644 index 005dda73ff0ec..0000000000000 --- a/go/arrow/cdata/interface.go +++ /dev/null @@ -1,284 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build cgo -// +build cgo - -package cdata - -import ( - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/arrio" - "github.com/apache/arrow/go/v18/arrow/memory" - "golang.org/x/xerrors" -) - -// SchemaFromPtr is a simple helper function to cast a uintptr to a *CArrowSchema -func SchemaFromPtr(ptr uintptr) *CArrowSchema { return (*CArrowSchema)(unsafe.Pointer(ptr)) } - -// ArrayFromPtr is a simple helper function to cast a uintptr to a *CArrowArray -func ArrayFromPtr(ptr uintptr) *CArrowArray { return (*CArrowArray)(unsafe.Pointer(ptr)) } - -// ImportCArrowField takes in an ArrowSchema from the C Data interface, it -// will copy the metadata and type definitions rather than keep direct references -// to them. It is safe to call C.ArrowSchemaRelease after receiving the field -// from this function. -func ImportCArrowField(out *CArrowSchema) (arrow.Field, error) { - return importSchema(out) -} - -// ImportCArrowSchema takes in the ArrowSchema from the C Data Interface, it -// will copy the metadata and schema definitions over from the C object rather -// than keep direct references to them. This function will call ArrowSchemaRelease -// on the passed in schema regardless of whether or not there is an error returned. -// -// This version is intended to take in a schema for a record batch, which means -// that the top level of the schema should be a struct of the schema fields. If -// importing a single array's schema, then use ImportCArrowField instead. -func ImportCArrowSchema(out *CArrowSchema) (*arrow.Schema, error) { - ret, err := importSchema(out) - if err != nil { - return nil, err - } - - return arrow.NewSchema(ret.Type.(*arrow.StructType).Fields(), &ret.Metadata), nil -} - -// ImportCArrayWithType takes a pointer to a C Data ArrowArray and interprets the values -// as an array with the given datatype. If err is not nil, then ArrowArrayRelease must still -// be called on arr to release the memory. -// -// The underlying buffers will not be copied, but will instead be referenced directly -// by the resulting array interface object. The passed in ArrowArray will have it's ownership -// transferred to the resulting arrow.Array via ArrowArrayMove. The underlying array.Data -// object that is owned by the Array will now be the owner of the memory pointer and -// will call ArrowArrayRelease when it is released and garbage collected via runtime.SetFinalizer. -// -// NOTE: The array takes ownership of the underlying memory buffers via ArrowArrayMove, -// it does not take ownership of the actual arr object itself. -func ImportCArrayWithType(arr *CArrowArray, dt arrow.DataType) (arrow.Array, error) { - imp, err := importCArrayAsType(arr, dt) - if err != nil { - return nil, err - } - defer imp.data.Release() - return array.MakeFromData(imp.data), nil -} - -// ImportCArray takes a pointer to both a C Data ArrowArray and C Data ArrowSchema in order -// to import them into usable Go Objects. If err is not nil, then ArrowArrayRelease must still -// be called on arr to release the memory. The ArrowSchemaRelease will be called on the passed in -// schema regardless of whether there is an error or not. -// -// The Schema will be copied with the information used to populate the returned Field, complete -// with metadata. The array will reference the same memory that is referred to by the ArrowArray -// object and take ownership of it as per ImportCArrayWithType. The returned arrow.Array will -// own the C memory and call ArrowArrayRelease when the array.Data object is cleaned up. -// -// NOTE: The array takes ownership of the underlying memory buffers via ArrowArrayMove, -// it does not take ownership of the actual arr object itself. -func ImportCArray(arr *CArrowArray, schema *CArrowSchema) (arrow.Field, arrow.Array, error) { - field, err := importSchema(schema) - if err != nil { - return field, nil, err - } - - ret, err := ImportCArrayWithType(arr, field.Type) - return field, ret, err -} - -// ImportCRecordBatchWithSchema is used for importing a Record Batch array when the schema -// is already known such as when receiving record batches through a stream. -// -// All of the semantics regarding memory ownership are the same as when calling -// ImportCRecordBatch directly with a schema. -// -// NOTE: The array takes ownership of the underlying memory buffers via ArrowArrayMove, -// it does not take ownership of the actual arr object itself. -func ImportCRecordBatchWithSchema(arr *CArrowArray, sc *arrow.Schema) (arrow.Record, error) { - imp, err := importCArrayAsType(arr, arrow.StructOf(sc.Fields()...)) - if err != nil { - return nil, err - } - defer imp.data.Release() - - st := array.NewStructData(imp.data) - defer st.Release() - - // now that we have our fields, we can split them out into the slice of arrays - // and construct a record batch from them to return. - cols := make([]arrow.Array, st.NumField()) - for i := 0; i < st.NumField(); i++ { - cols[i] = st.Field(i) - } - - return array.NewRecord(sc, cols, int64(st.Len())), nil -} - -// ImportCRecordBatch imports an ArrowArray from C as a record batch. If err is not nil, -// then ArrowArrayRelease must still be called to release the memory. -// -// A record batch is represented in the C Data Interface as a Struct Array whose fields -// are the columns of the record batch. Thus after importing the schema passed in here, -// if it is not a Struct type, this will return an error. As with ImportCArray, the -// columns in the record batch will take ownership of the CArrowArray memory if successful. -// Since ArrowArrayMove is used, it's still safe to call ArrowArrayRelease on the source -// regardless. But if there is an error, it *MUST* be called to ensure there is no memory leak. -// -// NOTE: The array takes ownership of the underlying memory buffers via ArrowArrayMove, -// it does not take ownership of the actual arr object itself. -func ImportCRecordBatch(arr *CArrowArray, sc *CArrowSchema) (arrow.Record, error) { - field, err := importSchema(sc) - if err != nil { - return nil, err - } - - if field.Type.ID() != arrow.STRUCT { - return nil, xerrors.New("recordbatch array import must be of struct type") - } - - return ImportCRecordBatchWithSchema(arr, arrow.NewSchema(field.Type.(*arrow.StructType).Fields(), &field.Metadata)) -} - -// ImportCArrayStream creates an arrio.Reader from an ArrowArrayStream taking ownership -// of the underlying stream object via ArrowArrayStreamMove. -// -// The records returned by this reader must be released manually after they are returned. -// The reader itself will release the stream via SetFinalizer when it is garbage collected. -// It will return (nil, io.EOF) from the Read function when there are no more records to return. -// -// NOTE: The reader takes ownership of the underlying memory buffers via ArrowArrayStreamMove, -// it does not take ownership of the actual stream object itself. -// -// Deprecated: This will panic if importing the schema fails (which is possible). -// Prefer ImportCRecordReader instead. -func ImportCArrayStream(stream *CArrowArrayStream, schema *arrow.Schema) arrio.Reader { - reader, err := ImportCRecordReader(stream, schema) - if err != nil { - panic(err) - } - return reader -} - -// ImportCStreamReader creates an arrio.Reader from an ArrowArrayStream taking ownership -// of the underlying stream object via ArrowArrayStreamMove. -// -// The records returned by this reader must be released manually after they are returned. -// The reader itself will release the stream via SetFinalizer when it is garbage collected. -// It will return (nil, io.EOF) from the Read function when there are no more records to return. -// -// NOTE: The reader takes ownership of the underlying memory buffers via ArrowArrayStreamMove, -// it does not take ownership of the actual stream object itself. -func ImportCRecordReader(stream *CArrowArrayStream, schema *arrow.Schema) (arrio.Reader, error) { - out := &nativeCRecordBatchReader{schema: schema} - if err := initReader(out, stream); err != nil { - return nil, err - } - return out, nil -} - -// ExportArrowSchema populates the passed in CArrowSchema with the schema passed in so -// that it can be passed to some consumer of the C Data Interface. The `release` function -// is tied to a callback in order to properly release any memory that was allocated during -// the populating of the struct. Any memory allocated will be allocated using malloc -// which means that it is invisible to the Go Garbage Collector and must be freed manually -// using the callback on the CArrowSchema object. -// -// WARNING: the output ArrowSchema MUST BE ZERO INITIALIZED, or the Go garbage collector -// may error at runtime, due to CGO rules ("the current implementation may sometimes -// cause a runtime error if the contents of the C memory appear to be a Go pointer"). -// You have been warned! -func ExportArrowSchema(schema *arrow.Schema, out *CArrowSchema) { - dummy := arrow.Field{Type: arrow.StructOf(schema.Fields()...), Metadata: schema.Metadata()} - exportField(dummy, out) -} - -// ExportArrowRecordBatch populates the passed in CArrowArray (and optionally the schema too) -// by sharing the memory used for the buffers of each column's arrays. It does not -// copy the data, and will internally increment the reference counters so that releasing -// the record will not free the memory prematurely. -// -// When using CGO, memory passed to C is pinned so that the Go garbage collector won't -// move where it is allocated out from under the C pointer locations, ensuring the C pointers -// stay valid. This is only true until the CGO call returns, at which point the garbage collector -// is free to move things around again. As a result, if the function you're calling is going to -// hold onto the pointers or otherwise continue to reference the memory *after* the call returns, -// you should use the CgoArrowAllocator rather than the GoAllocator (or DefaultAllocator) so that -// the memory which is allocated for the record batch in the first place is allocated in C, -// not by the Go runtime and is therefore not subject to the Garbage collection. -// -// The release function on the populated CArrowArray will properly decrease the reference counts, -// and release the memory if the record has already been released. But since this must be explicitly -// done, make sure it is released so that you do not create a memory leak. -// -// WARNING: the output ArrowArray MUST BE ZERO INITIALIZED, or the Go garbage collector -// may error at runtime, due to CGO rules ("the current implementation may sometimes -// cause a runtime error if the contents of the C memory appear to be a Go pointer"). -// You have been warned! -func ExportArrowRecordBatch(rb arrow.Record, out *CArrowArray, outSchema *CArrowSchema) { - children := make([]arrow.ArrayData, rb.NumCols()) - for i := range rb.Columns() { - children[i] = rb.Column(i).Data() - } - - data := array.NewData(arrow.StructOf(rb.Schema().Fields()...), int(rb.NumRows()), []*memory.Buffer{nil}, - children, 0, 0) - defer data.Release() - arr := array.NewStructData(data) - defer arr.Release() - - if outSchema != nil { - ExportArrowSchema(rb.Schema(), outSchema) - } - - exportArray(arr, out, nil) -} - -// ExportArrowArray populates the CArrowArray that is passed in with the pointers to the memory -// being used by the arrow.Array passed in, in order to share with zero-copy across the C -// Data Interface. See the documentation for ExportArrowRecordBatch for details on how to ensure -// you do not leak memory and prevent unwanted, undefined or strange behaviors. -// -// WARNING: the output ArrowArray MUST BE ZERO INITIALIZED, or the Go garbage collector -// may error at runtime, due to CGO rules ("the current implementation may sometimes -// cause a runtime error if the contents of the C memory appear to be a Go pointer"). -// You have been warned! -func ExportArrowArray(arr arrow.Array, out *CArrowArray, outSchema *CArrowSchema) { - exportArray(arr, out, outSchema) -} - -// ExportRecordReader populates the CArrowArrayStream that is passed in with the appropriate -// callbacks to be a working ArrowArrayStream utilizing the passed in RecordReader. The -// CArrowArrayStream takes ownership of the RecordReader until the consumer calls the release -// callback, as such it is unnecessary to call Release on the passed in reader unless it has -// previously been retained. -// -// WARNING: the output ArrowArrayStream MUST BE ZERO INITIALIZED, or the Go garbage -// collector may error at runtime, due to CGO rules ("the current implementation may -// sometimes cause a runtime error if the contents of the C memory appear to be a Go -// pointer"). You have been warned! -func ExportRecordReader(reader array.RecordReader, out *CArrowArrayStream) { - exportStream(reader, out) -} - -// ReleaseCArrowArray calls ArrowArrayRelease on the passed in cdata array -func ReleaseCArrowArray(arr *CArrowArray) { releaseArr(arr) } - -// ReleaseCArrowSchema calls ArrowSchemaRelease on the passed in cdata schema -func ReleaseCArrowSchema(schema *CArrowSchema) { releaseSchema(schema) } diff --git a/go/arrow/cdata/test/test_cimport.go b/go/arrow/cdata/test/test_cimport.go deleted file mode 100644 index 5315853fc59ca..0000000000000 --- a/go/arrow/cdata/test/test_cimport.go +++ /dev/null @@ -1,178 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build cdata_test -// +build cdata_test - -package main - -import ( - "fmt" - "runtime" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/cdata" - "github.com/apache/arrow/go/v18/arrow/memory" -) - -// #include -import "C" - -var alloc = memory.NewCheckedAllocator(memory.NewGoAllocator()) - -//export totalAllocated -func totalAllocated() int64 { - return int64(alloc.CurrentAlloc()) -} - -//export runGC -func runGC() { - runtime.GC() -} - -//export importSchema -func importSchema(ptr uintptr) { - schema, err := cdata.ImportCArrowSchema(cdata.SchemaFromPtr(ptr)) - if err != nil { - panic(err) - } - - expectedMetadata := arrow.NewMetadata([]string{"key1"}, []string{"value1"}) - expectedSchema := arrow.NewSchema([]arrow.Field{{Name: "ints", Type: arrow.ListOf(arrow.PrimitiveTypes.Int32), Nullable: true}}, &expectedMetadata) - if !schema.Equal(expectedSchema) { - panic(fmt.Sprintf("schema didn't match: expected %s, got %s", expectedSchema, schema)) - } - if !schema.Metadata().Equal(expectedMetadata) { - panic(fmt.Sprintf("metadata didn't match: expected %s, got %s", expectedMetadata, schema.Metadata())) - } - - fmt.Println("schema matches! Huzzah!") -} - -//export importRecordBatch -func importRecordBatch(scptr, rbptr uintptr) { - sc := cdata.SchemaFromPtr(scptr) - rb := cdata.ArrayFromPtr(rbptr) - - rec, err := cdata.ImportCRecordBatch(rb, sc) - if err != nil { - panic(err) - } - defer rec.Release() - - expectedMetadata := arrow.NewMetadata([]string{"key1"}, []string{"value1"}) - expectedSchema := arrow.NewSchema([]arrow.Field{{Name: "ints", Type: arrow.ListOf(arrow.PrimitiveTypes.Int32), Nullable: true}}, &expectedMetadata) - - bldr := array.NewRecordBuilder(alloc, expectedSchema) - defer bldr.Release() - - lb := bldr.Field(0).(*array.ListBuilder) - vb := lb.ValueBuilder().(*array.Int32Builder) - - // [[[1], [], None [2, 42]]] - lb.Append(true) - vb.Append(int32(1)) - - lb.Append(true) - lb.Append(false) - - lb.Append(true) - vb.AppendValues([]int32{2, 42}, nil) - - expectedRec := bldr.NewRecord() - defer expectedRec.Release() - - if !array.RecordEqual(expectedRec, rec) { - panic(fmt.Sprintf("records didn't match: expected %s\n got %s", expectedRec, rec)) - } - - fmt.Println("record batch matches huzzah!") -} - -func makeSchema() *arrow.Schema { - meta := arrow.NewMetadata([]string{"key1"}, []string{"value1"}) - return arrow.NewSchema([]arrow.Field{ - {Name: "ints", Type: arrow.ListOf(arrow.PrimitiveTypes.Int32), Nullable: true}, - }, &meta) -} - -func makeBatch() arrow.Record { - bldr := array.NewRecordBuilder(alloc, makeSchema()) - defer bldr.Release() - - fbldr := bldr.Field(0).(*array.ListBuilder) - valbldr := fbldr.ValueBuilder().(*array.Int32Builder) - - fbldr.Append(true) - valbldr.Append(1) - - fbldr.Append(true) - fbldr.AppendNull() - fbldr.Append(true) - valbldr.Append(2) - valbldr.Append(42) - - return bldr.NewRecord() -} - -//export exportSchema -func exportSchema(schema uintptr) { - cdata.ExportArrowSchema(makeSchema(), cdata.SchemaFromPtr(schema)) -} - -//export exportRecordBatch -func exportRecordBatch(schema, record uintptr) { - batch := makeBatch() - defer batch.Release() - - cdata.ExportArrowRecordBatch(batch, cdata.ArrayFromPtr(record), cdata.SchemaFromPtr(schema)) -} - -//export importThenExportSchema -func importThenExportSchema(input, output uintptr) { - schema, err := cdata.ImportCArrowSchema(cdata.SchemaFromPtr(input)) - if err != nil { - panic(err) - } - - cdata.ExportArrowSchema(schema, cdata.SchemaFromPtr(output)) -} - -//export importThenExportRecord -func importThenExportRecord(schemaIn, arrIn uintptr, schemaOut, arrOut uintptr) { - rec, err := cdata.ImportCRecordBatch(cdata.ArrayFromPtr(arrIn), cdata.SchemaFromPtr(schemaIn)) - if err != nil { - panic(err) - } - - defer rec.Release() - cdata.ExportArrowRecordBatch(rec, cdata.ArrayFromPtr(arrOut), cdata.SchemaFromPtr(schemaOut)) -} - -//export roundtripArray -func roundtripArray(arrIn, schema, arrOut uintptr) { - _, arr, err := cdata.ImportCArray(cdata.ArrayFromPtr(arrIn), cdata.SchemaFromPtr(schema)) - if err != nil { - panic(err) - } - defer arr.Release() - - outArr := cdata.ArrayFromPtr(arrOut) - cdata.ExportArrowArray(arr, outArr, nil) -} - -func main() {} diff --git a/go/arrow/cdata/test/test_export_to_cgo.py b/go/arrow/cdata/test/test_export_to_cgo.py deleted file mode 100644 index 4b669f6424437..0000000000000 --- a/go/arrow/cdata/test/test_export_to_cgo.py +++ /dev/null @@ -1,230 +0,0 @@ -#!/usr/bin/env python3 -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import contextlib -import gc -import os -import unittest - -import pyarrow as pa -from pyarrow.cffi import ffi - - -def load_cgotest(): - # XXX what about Darwin? - libext = 'so' - if os.name == 'nt': - libext = 'dll' - - ffi.cdef( - """ - long long totalAllocated(); - void importSchema(uintptr_t ptr); - void importRecordBatch(uintptr_t scptr, uintptr_t rbptr); - void runGC(); - void exportSchema(uintptr_t ptr); - void exportRecordBatch(uintptr_t schema, uintptr_t record); - void importThenExportSchema(uintptr_t input, uintptr_t output); - void importThenExportRecord(uintptr_t schemaIn, uintptr_t arrIn, - uintptr_t schemaOut, uintptr_t arrOut); - void roundtripArray(uintptr_t arrIn, uintptr_t schema, uintptr_t arrOut); - """) - return ffi.dlopen(f'./cgotest.{libext}') - - -cgotest = load_cgotest() - -class BaseTestGoPython(unittest.TestCase): - def setUp(self): - self.c_schema = ffi.new("struct ArrowSchema*") - self.ptr_schema = int(ffi.cast("uintptr_t", self.c_schema)) - self.c_array = ffi.new("struct ArrowArray*") - self.ptr_array = int(ffi.cast("uintptr_t", self.c_array)) - - def make_schema(self): - return pa.schema([('ints', pa.list_(pa.int32()))], - metadata={b'key1': b'value1'}) - - def make_batch(self): - return pa.record_batch([[[1], [], None, [2, 42]]], - self.make_schema()) - - def run_gc(self): - # Several Go GC runs can be required to run all finalizers - for i in range(5): - cgotest.runGC() - gc.collect() - - @contextlib.contextmanager - def assert_pyarrow_memory_released(self): - self.run_gc() - old_allocated = pa.total_allocated_bytes() - old_go_allocated = cgotest.totalAllocated() - yield - self.run_gc() - diff = pa.total_allocated_bytes() - old_allocated - godiff = cgotest.totalAllocated() - old_go_allocated - self.assertEqual( - pa.total_allocated_bytes(), old_allocated, - f"PyArrow memory was not adequately released: {diff} bytes lost") - self.assertEqual( - cgotest.totalAllocated(), old_go_allocated, - f"Go memory was not properly released: {godiff} bytes lost") - - -class TestPythonToGo(BaseTestGoPython): - - def test_schema(self): - with self.assert_pyarrow_memory_released(): - self.make_schema()._export_to_c(self.ptr_schema) - # Will panic if expectations are not met - cgotest.importSchema(self.ptr_schema) - - def test_record_batch(self): - with self.assert_pyarrow_memory_released(): - self.make_schema()._export_to_c(self.ptr_schema) - self.make_batch()._export_to_c(self.ptr_array) - # Will panic if expectations are not met - cgotest.importRecordBatch(self.ptr_schema, self.ptr_array) - - -class TestGoToPython(BaseTestGoPython): - - def test_get_schema(self): - with self.assert_pyarrow_memory_released(): - cgotest.exportSchema(self.ptr_schema) - - sc = pa.Schema._import_from_c(self.ptr_schema) - assert sc == self.make_schema() - - def test_get_batch(self): - with self.assert_pyarrow_memory_released(): - cgotest.exportRecordBatch(self.ptr_schema, self.ptr_array) - arrnew = pa.RecordBatch._import_from_c(self.ptr_array, self.ptr_schema) - assert arrnew == self.make_batch() - del arrnew - -class TestRoundTrip(BaseTestGoPython): - - def test_schema_roundtrip(self): - with self.assert_pyarrow_memory_released(): - # make sure that Python -> Go -> Python ends up with - # the same exact schema - schema = self.make_schema() - schema._export_to_c(self.ptr_schema) - del schema - - c_schema = ffi.new("struct ArrowSchema*") - ptr_schema = int(ffi.cast("uintptr_t", c_schema)) - - cgotest.importThenExportSchema(self.ptr_schema, ptr_schema) - schema_new = pa.Schema._import_from_c(ptr_schema) - assert schema_new == self.make_schema() - del c_schema - - def test_batch_roundtrip(self): - with self.assert_pyarrow_memory_released(): - # make sure that Python -> Go -> Python for record - # batches works correctly and gets the same data in the end - schema = self.make_schema() - batch = self.make_batch() - schema._export_to_c(self.ptr_schema) - batch._export_to_c(self.ptr_array) - del schema - del batch - - c_schema = ffi.new("struct ArrowSchema*") - c_batch = ffi.new("struct ArrowArray*") - ptr_schema = int(ffi.cast("uintptr_t", c_schema)) - ptr_batch = int(ffi.cast("uintptr_t", c_batch)) - - cgotest.importThenExportRecord(self.ptr_schema, self.ptr_array, - ptr_schema, ptr_batch) - batch_new = pa.RecordBatch._import_from_c(ptr_batch, ptr_schema) - assert batch_new == self.make_batch() - del batch_new - del c_schema - del c_batch - - # commented out types can be uncommented after - # GH-14875 is addressed - _test_pyarrow_types = [ - pa.null(), - pa.bool_(), - pa.int32(), - pa.time32("s"), - pa.time64("us"), - pa.date32(), - pa.timestamp("us"), - pa.timestamp("us", tz="UTC"), - pa.timestamp("us", tz="Europe/Paris"), - pa.duration("s"), - pa.duration("ms"), - pa.duration("us"), - pa.duration("ns"), - pa.float16(), - pa.float32(), - pa.float64(), - pa.decimal128(19, 4), - pa.string(), - pa.binary(), - pa.binary(10), - pa.large_string(), - pa.large_binary(), - pa.list_(pa.int32()), - pa.list_(pa.int32(), 2), - pa.large_list(pa.uint16()), - pa.struct([ - pa.field("a", pa.int32()), - pa.field("b", pa.int8()), - pa.field("c", pa.string()), - ]), - pa.struct([ - pa.field("a", pa.int32(), nullable=False), - pa.field("b", pa.int8(), nullable=False), - pa.field("c", pa.string()), - ]), - pa.dictionary(pa.int8(), pa.int64()), - pa.dictionary(pa.int8(), pa.string()), - pa.map_(pa.string(), pa.int32()), - pa.map_(pa.int64(), pa.int32()), - # pa.run_end_encoded(pa.int16(), pa.int64()), - ] - - def test_empty_roundtrip(self): - for typ in self._test_pyarrow_types: - with self.subTest(typ=typ): - with self.assert_pyarrow_memory_released(): - a = pa.array([], typ) - a._export_to_c(self.ptr_array) - typ._export_to_c(self.ptr_schema) - - c_arr = ffi.new("struct ArrowArray*") - ptr_arr = int(ffi.cast("uintptr_t", c_arr)) - - cgotest.roundtripArray(self.ptr_array, self.ptr_schema, ptr_arr) - b = pa.Array._import_from_c(ptr_arr, typ) - b.validate(full=True) - assert a.to_pylist() == b.to_pylist() - assert a.type == b.type - del a - del b - -if __name__ == '__main__': - unittest.main(verbosity=2) diff --git a/go/arrow/cdata/trampoline.c b/go/arrow/cdata/trampoline.c deleted file mode 100644 index 01db13fab4845..0000000000000 --- a/go/arrow/cdata/trampoline.c +++ /dev/null @@ -1,34 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -#include "arrow/c/abi.h" - -int streamGetSchema(struct ArrowArrayStream*, struct ArrowSchema*); -int streamGetNext(struct ArrowArrayStream*, struct ArrowArray*); - -int streamGetSchemaTrampoline(struct ArrowArrayStream* stream, struct ArrowSchema* out) { - // XXX(https://github.com/apache/arrow-adbc/issues/729) - memset(out, 0, sizeof(*out)); - return streamGetSchema(stream, out); -} - -int streamGetNextTrampoline(struct ArrowArrayStream* stream, struct ArrowArray* out) { - // XXX(https://github.com/apache/arrow-adbc/issues/729) - memset(out, 0, sizeof(*out)); - return streamGetNext(stream, out); -} diff --git a/go/arrow/cdata/utils.h b/go/arrow/cdata/utils.h deleted file mode 100644 index dda46b72b728b..0000000000000 --- a/go/arrow/cdata/utils.h +++ /dev/null @@ -1,45 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +build cgo -// +build test - -// metadata keys 1: {"key1", "key2"} -// metadata values 1: {"", "bar"} -static const char kEncodedMeta1LE[] = { - 2, 0, 0, 0, - 4, 0, 0, 0, 'k', 'e', 'y', '1', 0, 0, 0, 0, - 4, 0, 0, 0, 'k', 'e', 'y', '2', 3, 0, 0, 0, 'b', 'a', 'r'}; - -static const char kEncodedMeta1BE[] = { - 0, 0, 0, 2, - 0, 0, 0, 4, 'k', 'e', 'y', '1', 0, 0, 0, 0, - 0, 0, 0, 4, 'k', 'e', 'y', '2', 0, 0, 0, 3, 'b', 'a', 'r'}; - -static const char* kMetadataKeys2[] = {"key"}; -static const char* kMetadataValues2[] = {"abcde"}; - -// metadata keys 2: {"key"} -// metadata values 2: {"abcde"} -static const char kEncodedMeta2LE[] = { - 1, 0, 0, 0, - 3, 0, 0, 0, 'k', 'e', 'y', 5, 0, 0, 0, 'a', 'b', 'c', 'd', 'e'}; - -static const char kEncodedMeta2BE[] = { - 0, 0, 0, 1, - 0, 0, 0, 3, 'k', 'e', 'y', 0, 0, 0, 5, 'a', 'b', 'c', 'd', 'e'}; - - diff --git a/go/arrow/compare.go b/go/arrow/compare.go deleted file mode 100644 index 58569b332c4f1..0000000000000 --- a/go/arrow/compare.go +++ /dev/null @@ -1,153 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package arrow - -import ( - "reflect" -) - -type typeEqualsConfig struct { - metadata bool -} - -// TypeEqualOption is a functional option type used for configuring type -// equality checks. -type TypeEqualOption func(*typeEqualsConfig) - -// CheckMetadata is an option for TypeEqual that allows checking for metadata -// equality besides type equality. It only makes sense for types with metadata. -func CheckMetadata() TypeEqualOption { - return func(cfg *typeEqualsConfig) { - cfg.metadata = true - } -} - -// TypeEqual checks if two DataType are the same, optionally checking metadata -// equality for STRUCT types. -func TypeEqual(left, right DataType, opts ...TypeEqualOption) bool { - var cfg typeEqualsConfig - for _, opt := range opts { - opt(&cfg) - } - - switch { - case left == nil || right == nil: - return left == nil && right == nil - case left.ID() != right.ID(): - return false - } - - switch l := left.(type) { - case ExtensionType: - return l.ExtensionEquals(right.(ExtensionType)) - case *ListType: - if !TypeEqual(l.Elem(), right.(*ListType).Elem(), opts...) { - return false - } - if cfg.metadata && !l.elem.Metadata.Equal(right.(*ListType).elem.Metadata) { - return false - } - return l.elem.Nullable == right.(*ListType).elem.Nullable - case *FixedSizeListType: - if !TypeEqual(l.Elem(), right.(*FixedSizeListType).Elem(), opts...) { - return false - } - if cfg.metadata && !l.elem.Metadata.Equal(right.(*FixedSizeListType).elem.Metadata) { - return false - } - return l.n == right.(*FixedSizeListType).n && l.elem.Nullable == right.(*FixedSizeListType).elem.Nullable - case *MapType: - if !TypeEqual(l.KeyType(), right.(*MapType).KeyType(), opts...) { - return false - } - if !TypeEqual(l.ItemType(), right.(*MapType).ItemType(), opts...) { - return false - } - if l.KeyField().Nullable != right.(*MapType).KeyField().Nullable { - return false - } - if l.ItemField().Nullable != right.(*MapType).ItemField().Nullable { - return false - } - if cfg.metadata { - if !l.KeyField().Metadata.Equal(right.(*MapType).KeyField().Metadata) { - return false - } - if !l.ItemField().Metadata.Equal(right.(*MapType).ItemField().Metadata) { - return false - } - } - return true - case *StructType: - r := right.(*StructType) - switch { - case len(l.fields) != len(r.fields): - return false - case !reflect.DeepEqual(l.index, r.index): - return false - } - for i := range l.fields { - leftField, rightField := l.fields[i], r.fields[i] - switch { - case leftField.Name != rightField.Name: - return false - case leftField.Nullable != rightField.Nullable: - return false - case !TypeEqual(leftField.Type, rightField.Type, opts...): - return false - case cfg.metadata && !leftField.Metadata.Equal(rightField.Metadata): - return false - } - } - return true - case UnionType: - r := right.(UnionType) - if l.Mode() != r.Mode() { - return false - } - - if !reflect.DeepEqual(l.ChildIDs(), r.ChildIDs()) { - return false - } - - for i := range l.Fields() { - leftField, rightField := l.Fields()[i], r.Fields()[i] - switch { - case leftField.Name != rightField.Name: - return false - case leftField.Nullable != rightField.Nullable: - return false - case !TypeEqual(leftField.Type, rightField.Type, opts...): - return false - case cfg.metadata && !leftField.Metadata.Equal(rightField.Metadata): - return false - case l.TypeCodes()[i] != r.TypeCodes()[i]: - return false - } - } - return true - case *TimestampType: - r := right.(*TimestampType) - return l.Unit == r.Unit && l.TimeZone == r.TimeZone - case *RunEndEncodedType: - r := right.(*RunEndEncodedType) - return TypeEqual(l.Encoded(), r.Encoded(), opts...) && - TypeEqual(l.runEnds, r.runEnds, opts...) - default: - return reflect.DeepEqual(left, right) - } -} diff --git a/go/arrow/compare_test.go b/go/arrow/compare_test.go deleted file mode 100644 index ca87621eadcb9..0000000000000 --- a/go/arrow/compare_test.go +++ /dev/null @@ -1,397 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package arrow - -import ( - "testing" - "time" -) - -func TestTypeEqual(t *testing.T) { - tests := []struct { - left, right DataType - want bool - checkMetadata bool - }{ - { - nil, nil, true, false, - }, - { - nil, PrimitiveTypes.Uint8, false, false, - }, - { - PrimitiveTypes.Float32, nil, false, false, - }, - { - PrimitiveTypes.Float64, PrimitiveTypes.Int32, false, false, - }, - { - Null, Null, true, false, - }, - { - Null, new(NullType), true, false, - }, - { - &BinaryType{}, &StringType{}, false, false, - }, - { - &LargeBinaryType{}, &LargeStringType{}, false, false, - }, - { - BinaryTypes.LargeBinary, &LargeBinaryType{}, true, false, - }, - { - BinaryTypes.LargeString, &LargeStringType{}, true, false, - }, - { - &Time32Type{Unit: Second}, &Time32Type{Unit: Second}, true, false, - }, - { - &Time32Type{Unit: Millisecond}, &Time32Type{Unit: Second}, false, false, - }, - { - &Time64Type{Unit: Nanosecond}, &Time64Type{Unit: Nanosecond}, true, false, - }, - { - &Time64Type{Unit: Nanosecond}, &Time64Type{Unit: Microsecond}, false, false, - }, - { - &TimestampType{Unit: Second, TimeZone: "UTC"}, &TimestampType{Unit: Second, TimeZone: "UTC"}, true, false, - }, - { - &TimestampType{Unit: Microsecond, TimeZone: "UTC"}, &TimestampType{Unit: Millisecond, TimeZone: "UTC"}, false, false, - }, - { - &TimestampType{Unit: Second, TimeZone: "UTC"}, &TimestampType{Unit: Second, TimeZone: "CET"}, false, false, - }, - { - &TimestampType{Unit: Second, TimeZone: "UTC"}, &TimestampType{Unit: Nanosecond, TimeZone: "CET"}, false, false, - }, - { - &ListType{elem: Field{Type: PrimitiveTypes.Uint64}}, &ListType{elem: Field{Type: PrimitiveTypes.Uint64}}, true, false, - }, - { - &ListType{elem: Field{Type: PrimitiveTypes.Uint64}}, &ListType{elem: Field{Type: PrimitiveTypes.Uint32}}, false, false, - }, - { - &ListType{elem: Field{Type: &Time32Type{Unit: Millisecond}}}, &ListType{elem: Field{Type: &Time32Type{Unit: Millisecond}}}, true, false, - }, - { - &ListType{elem: Field{Type: &Time32Type{Unit: Millisecond}}}, &ListType{elem: Field{Type: &Time32Type{Unit: Second}}}, false, false, - }, - { - &ListType{elem: Field{Type: &ListType{elem: Field{Type: PrimitiveTypes.Uint16}}}}, &ListType{elem: Field{Type: &ListType{elem: Field{Type: PrimitiveTypes.Uint16}}}}, true, false, - }, - { - &ListType{elem: Field{Type: &ListType{elem: Field{Type: PrimitiveTypes.Uint16}}}}, &ListType{elem: Field{Type: &ListType{elem: Field{Type: PrimitiveTypes.Uint8}}}}, false, false, - }, - { - &ListType{elem: Field{Type: &ListType{elem: Field{Type: &ListType{elem: Field{Type: PrimitiveTypes.Uint16}}}}}}, &ListType{elem: Field{Type: &ListType{elem: Field{Type: PrimitiveTypes.Uint8}}}}, false, false, - }, - { - &ListType{elem: Field{Type: PrimitiveTypes.Uint64, Nullable: true}}, &ListType{elem: Field{Type: PrimitiveTypes.Uint64, Nullable: false}}, false, true, - }, - { - &FixedSizeListType{n: 2, elem: Field{Type: PrimitiveTypes.Uint64, Nullable: false}}, &FixedSizeListType{n: 3, elem: Field{Type: PrimitiveTypes.Uint64, Nullable: false}}, false, true, - }, - { - &FixedSizeListType{n: 2, elem: Field{Type: PrimitiveTypes.Uint64, Nullable: false}}, &FixedSizeListType{n: 2, elem: Field{Type: PrimitiveTypes.Uint64, Nullable: false}}, true, true, - }, - { - &FixedSizeListType{n: 2, elem: Field{Type: PrimitiveTypes.Uint64, Nullable: false}}, &FixedSizeListType{n: 2, elem: Field{Type: PrimitiveTypes.Uint64, Nullable: true}}, false, true, - }, - { - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, - }, - index: map[string][]int{"f1": {0}}, - }, - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f1": {0}}, - }, - false, true, - }, - { - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: false}, - }, - index: map[string][]int{"f1": {0}}, - }, - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f1": {0}}, - }, - false, false, - }, - { - &StructType{ - fields: []Field{ - {Name: "f0", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f0": {0}}, - }, - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f1": {0}}, - }, - false, false, - }, - { - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f1": {0}}, - }, - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - {Name: "f2", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f1": {0}, "f2": {1}}, - }, - false, true, - }, - { - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f1": {0}}, - }, - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - {Name: "f2", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f1": {0}, "f2": {1}}, - }, - false, false, - }, - { - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f1": {0}}, - }, - &StructType{ - fields: []Field{ - {Name: "f2", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f2": {0}}, - }, - false, false, - }, - { - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, - {Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false}, - }, - index: map[string][]int{"f1": {0}, "f2": {1}}, - }, - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, - {Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false}, - }, - index: map[string][]int{"f1": {0}, "f2": {1}}, - }, - true, false, - }, - { - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, - {Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false}, - }, - index: map[string][]int{"f1": {0}, "f2": {1}}, - }, - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, - {Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false}, - }, - index: map[string][]int{"f1": {0}, "f2": {1}}, - }, - true, false, - }, - { - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, - {Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false}, - }, - index: map[string][]int{"f1": {0}, "f2": {1}}, - meta: MetadataFrom(map[string]string{"k1": "v1", "k2": "v2"}), - }, - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, - {Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false}, - }, - index: map[string][]int{"f1": {0}, "f2": {1}}, - meta: MetadataFrom(map[string]string{"k2": "v2", "k1": "v1"}), - }, - true, true, - }, - { - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f1": {0}}, - meta: MetadataFrom(map[string]string{"k1": "v1"}), - }, - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f1": {0}}, - meta: MetadataFrom(map[string]string{"k1": "v2"}), - }, - true, false, - }, - { - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true, Metadata: MetadataFrom(map[string]string{"k1": "v1"})}, - {Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false}, - }, - index: map[string][]int{"f1": {0}, "f2": {1}}, - }, - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true, Metadata: MetadataFrom(map[string]string{"k1": "v2"})}, - {Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false}, - }, - index: map[string][]int{"f1": {0}, "f2": {1}}, - }, - false, true, - }, - { - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f1": {0, 1}}, - }, - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f1": {0, 1}}, - }, - true, true, - }, - { - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, - }, - index: map[string][]int{"f1": {0, 1}}, - }, - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f1": {0, 1}}, - }, - false, true, - }, - { - MapOf(BinaryTypes.String, PrimitiveTypes.Int32), - MapOf(BinaryTypes.String, PrimitiveTypes.Int32), - true, false, - }, - { - MapOf(PrimitiveTypes.Int32, FixedWidthTypes.Timestamp_ns), - MapOf(PrimitiveTypes.Int32, FixedWidthTypes.Timestamp_ns), - true, false, - }, - { - MapOf(BinaryTypes.String, &TimestampType{ - Unit: 0, - TimeZone: "UTC", - loc: time.UTC, - }), - MapOf(BinaryTypes.String, &TimestampType{ - Unit: 0, - TimeZone: "UTC", - }), - true, false, - }, - { - MapOf(PrimitiveTypes.Int32, FixedWidthTypes.Timestamp_ns), - MapOf(PrimitiveTypes.Int32, FixedWidthTypes.Timestamp_us), - false, false, - }, - { - MapOf(BinaryTypes.String, FixedWidthTypes.Timestamp_ns), - MapOf(PrimitiveTypes.Int32, FixedWidthTypes.Timestamp_ns), - false, false, - }, - { - MapOfWithMetadata(BinaryTypes.String, MetadataFrom(map[string]string{"key": "v1"}), FixedWidthTypes.Timestamp_ns, MetadataFrom(map[string]string{"item": "v1"})), - MapOfWithMetadata(BinaryTypes.String, MetadataFrom(map[string]string{"key": "v1"}), FixedWidthTypes.Timestamp_ns, MetadataFrom(map[string]string{"item": "v1"})), - true, true, - }, - { - MapOfWithMetadata(BinaryTypes.String, MetadataFrom(map[string]string{"key": "v1"}), FixedWidthTypes.Timestamp_ns, MetadataFrom(map[string]string{"item": "v1"})), - MapOfWithMetadata(BinaryTypes.String, MetadataFrom(map[string]string{"key": "v2"}), FixedWidthTypes.Timestamp_ns, MetadataFrom(map[string]string{"item": "v2"})), - true, false, - }, - { - MapOfWithMetadata(BinaryTypes.String, MetadataFrom(map[string]string{"key": "v1"}), FixedWidthTypes.Timestamp_ns, MetadataFrom(map[string]string{"item": "v1"})), - MapOfWithMetadata(BinaryTypes.String, MetadataFrom(map[string]string{"key": "v1"}), FixedWidthTypes.Timestamp_ns, MetadataFrom(map[string]string{"item": "v2"})), - false, true, - }, - { - MapOfWithMetadata(BinaryTypes.String, MetadataFrom(map[string]string{"key": "v1"}), FixedWidthTypes.Timestamp_ns, MetadataFrom(map[string]string{"item": "v1"})), - MapOfWithMetadata(BinaryTypes.String, MetadataFrom(map[string]string{"key": "v2"}), FixedWidthTypes.Timestamp_ns, MetadataFrom(map[string]string{"item": "v1"})), - false, true, - }, - } - - for _, test := range tests { - t.Run("", func(t *testing.T) { - var got bool - if test.checkMetadata { - got = TypeEqual(test.left, test.right, CheckMetadata()) - } else { - got = TypeEqual(test.left, test.right) - } - if got != test.want { - t.Fatalf("TypeEqual(%v, %v, %v): got=%v, want=%v", test.left, test.right, test.checkMetadata, got, test.want) - } - }) - } -} diff --git a/go/arrow/compute/arithmetic.go b/go/arrow/compute/arithmetic.go deleted file mode 100644 index 51ca027d53375..0000000000000 --- a/go/arrow/compute/arithmetic.go +++ /dev/null @@ -1,1229 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package compute - -import ( - "context" - "fmt" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/compute/exec" - "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/decimal256" - "github.com/apache/arrow/go/v18/arrow/scalar" -) - -type ( - RoundOptions = kernels.RoundOptions - RoundMode = kernels.RoundMode - RoundToMultipleOptions = kernels.RoundToMultipleOptions -) - -const ( - // Round to nearest integer less than or equal in magnitude (aka "floor") - RoundDown = kernels.RoundDown - // Round to nearest integer greater than or equal in magnitude (aka "ceil") - RoundUp = kernels.RoundUp - // Get integral part without fractional digits (aka "trunc") - RoundTowardsZero = kernels.TowardsZero - // Round negative values with DOWN and positive values with UP - RoundTowardsInfinity = kernels.AwayFromZero - // Round ties with DOWN (aka "round half towards negative infinity") - RoundHalfDown = kernels.HalfDown - // Round ties with UP (aka "round half towards positive infinity") - RoundHalfUp = kernels.HalfUp - // Round ties with TowardsZero (aka "round half away from infinity") - RoundHalfTowardsZero = kernels.HalfTowardsZero - // Round ties with AwayFromZero (aka "round half towards infinity") - RoundHalfTowardsInfinity = kernels.HalfAwayFromZero - // Round ties to nearest even integer - RoundHalfToEven = kernels.HalfToEven - // Round ties to nearest odd integer - RoundHalfToOdd = kernels.HalfToOdd -) - -var ( - DefaultRoundOptions = RoundOptions{NDigits: 0, Mode: RoundHalfToEven} - DefaultRoundToMultipleOptions = RoundToMultipleOptions{ - Multiple: scalar.NewFloat64Scalar(1), Mode: RoundHalfToEven} -) - -type arithmeticFunction struct { - ScalarFunction - - promote decimalPromotion -} - -func (fn *arithmeticFunction) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error) { - return execInternal(ctx, fn, opts, -1, args...) -} - -func (fn *arithmeticFunction) checkDecimals(vals ...arrow.DataType) error { - if !hasDecimal(vals...) { - return nil - } - - if len(vals) != 2 { - return nil - } - - if fn.promote == decPromoteNone { - return fmt.Errorf("%w: invalid decimal function: %s", arrow.ErrInvalid, fn.name) - } - - return castBinaryDecimalArgs(fn.promote, vals...) -} - -func (fn *arithmeticFunction) DispatchBest(vals ...arrow.DataType) (exec.Kernel, error) { - if err := fn.checkArity(len(vals)); err != nil { - return nil, err - } - - if err := fn.checkDecimals(vals...); err != nil { - return nil, err - } - - if kn, err := fn.DispatchExact(vals...); err == nil { - return kn, nil - } - - ensureDictionaryDecoded(vals...) - - // only promote types for binary funcs - if len(vals) == 2 { - replaceNullWithOtherType(vals...) - if unit, istime := commonTemporalResolution(vals...); istime { - replaceTemporalTypes(unit, vals...) - } else { - if dt := commonNumeric(vals...); dt != nil { - replaceTypes(dt, vals...) - } - } - } - - return fn.DispatchExact(vals...) -} - -// an arithmetic function which promotes integers and decimal -// arguments to doubles. -type arithmeticFloatingPointFunc struct { - arithmeticFunction -} - -func (fn *arithmeticFloatingPointFunc) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error) { - return execInternal(ctx, fn, opts, -1, args...) -} - -func (fn *arithmeticFloatingPointFunc) DispatchBest(vals ...arrow.DataType) (exec.Kernel, error) { - if err := fn.checkArity(len(vals)); err != nil { - return nil, err - } - - if kn, err := fn.DispatchExact(vals...); err == nil { - return kn, nil - } - - ensureDictionaryDecoded(vals...) - - if len(vals) == 2 { - replaceNullWithOtherType(vals...) - } - - for i, v := range vals { - if arrow.IsInteger(v.ID()) || arrow.IsDecimal(v.ID()) { - vals[i] = arrow.PrimitiveTypes.Float64 - } - } - - if dt := commonNumeric(vals...); dt != nil { - replaceTypes(dt, vals...) - } - - return fn.DispatchExact(vals...) -} - -// function that promotes only decimal arguments to float64 -type arithmeticDecimalToFloatingPointFunc struct { - arithmeticFunction -} - -func (fn *arithmeticDecimalToFloatingPointFunc) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error) { - return execInternal(ctx, fn, opts, -1, args...) -} - -func (fn *arithmeticDecimalToFloatingPointFunc) DispatchBest(vals ...arrow.DataType) (exec.Kernel, error) { - if err := fn.checkArity(len(vals)); err != nil { - return nil, err - } - - if kn, err := fn.DispatchExact(vals...); err == nil { - return kn, nil - } - - ensureDictionaryDecoded(vals...) - if len(vals) == 2 { - replaceNullWithOtherType(vals...) - } - - for i, t := range vals { - if arrow.IsDecimal(t.ID()) { - vals[i] = arrow.PrimitiveTypes.Float64 - } - } - - if dt := commonNumeric(vals...); dt != nil { - replaceTypes(dt, vals...) - } - - return fn.DispatchExact(vals...) -} - -// function that promotes only integer arguments to float64 -type arithmeticIntegerToFloatingPointFunc struct { - arithmeticFunction -} - -func (fn *arithmeticIntegerToFloatingPointFunc) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error) { - return execInternal(ctx, fn, opts, -1, args...) -} - -func (fn *arithmeticIntegerToFloatingPointFunc) DispatchBest(vals ...arrow.DataType) (exec.Kernel, error) { - if err := fn.checkArity(len(vals)); err != nil { - return nil, err - } - - if err := fn.checkDecimals(vals...); err != nil { - return nil, err - } - - if kn, err := fn.DispatchExact(vals...); err == nil { - return kn, nil - } - - ensureDictionaryDecoded(vals...) - if len(vals) == 2 { - replaceNullWithOtherType(vals...) - } - - for i, t := range vals { - if arrow.IsInteger(t.ID()) { - vals[i] = arrow.PrimitiveTypes.Float64 - } - } - - if dt := commonNumeric(vals...); dt != nil { - replaceTypes(dt, vals...) - } - - return fn.DispatchExact(vals...) -} - -var ( - absoluteValueUncheckedDoc = FunctionDoc{ - Summary: "Calculate the absolute value of the argument, element-wise", - Description: `Results will wrap around on integer overflow -Use function "abs" if you want overflows to return an error`, - ArgNames: []string{"x"}, - } - absoluteValueDoc = FunctionDoc{ - Summary: "Calculate the absolute value of the argument element-wise", - Description: `This function returns an error on overflow. For a variant that -won't fail on overflow, use function "abs_unchecked"`, - ArgNames: []string{"x"}, - } - addUncheckedDoc = FunctionDoc{ - Summary: "Add the arguments element-wise", - Description: `Results will wrap around on integer overflow -Use the function "add" if you want overflow to return an error`, - ArgNames: []string{"x", "y"}, - } - addDoc = FunctionDoc{ - Summary: "Add the arguments element-wise", - Description: `This function returns an error on overflow. -For a variant that won't fail on overflow, use function "add_unchecked"`, - ArgNames: []string{"x", "y"}, - } - subUncheckedDoc = FunctionDoc{ - Summary: "Subtract the arguments element-wise", - Description: `This Results will wrap around on integer overflow. -Use the function "sub" if you want overflow to return an error`, - ArgNames: []string{"x", "y"}, - } - subDoc = FunctionDoc{ - Summary: "Subtract the arguments element-wise", - Description: `This function returns an error on overflow. -For a variant that won't fail on overflow, use the function "sub_unchecked"`, - ArgNames: []string{"x", "y"}, - } - mulUncheckedDoc = FunctionDoc{ - Summary: "Multiply the arguments element-wise", - Description: `Results will wrap around on integer overflow. -Use function "multiply" if you want overflow to return an error`, - ArgNames: []string{"x", "y"}, - } - mulDoc = FunctionDoc{ - Summary: "Multiply the arguments element-wise", - Description: `This function returns an error on overflow. -For a variant that won't fail on overflow, use the function -"multiply_unchecked"`, - ArgNames: []string{"x", "y"}, - } - divUncheckedDoc = FunctionDoc{ - Summary: "Divide the arguments element-wise", - Description: `Integer division by zero returns an error. However integer -overflow wraps around, and floating-point division by zero returns Inf. -Use the function "divide" if you want to get an error in all the -aforementioned cases.`, - ArgNames: []string{"dividend", "divisor"}, - } - divDoc = FunctionDoc{ - Summary: "Divide the arguments element-wise", - Description: `An error is returned when trying to divide by zero, -or when integer overflow is encountered.`, - ArgNames: []string{"dividend", "divisor"}, - } - negateUncheckedDoc = FunctionDoc{ - Summary: "Negate the argument element-wise", - Description: `Results will wrap around on integer overflow -Use function "negate" if you want overflow to return an error`, - ArgNames: []string{"x"}, - } - negateDoc = FunctionDoc{ - Summary: "Negate the argument element-wise", - Description: `This function returns an error on overflow. For a variant -that doesn't fail on overflow, use the function "negate_unchecked".`, - ArgNames: []string{"x"}, - } - powUncheckedDoc = FunctionDoc{ - Summary: "Raise argument to a power element-wise", - Description: `Integers to negative integer powers return an error. -However, integer overflow wraps around. If either base or exponent is null -the result will be null.`, - ArgNames: []string{"base", "exponent"}, - } - powDoc = FunctionDoc{ - Summary: "Raise argument to a power element-wise", - Description: `An error is returned when an integer is raised to a negative -power or an integer overflow occurs.`, - ArgNames: []string{"base", "exponent"}, - } - sqrtUncheckedDoc = FunctionDoc{ - Summary: "Takes the square root of arguments element-wise", - Description: `A negative argument returns an NaN. For a variant that returns -an error, use function "sqrt"`, - ArgNames: []string{"x"}, - } - sqrtDoc = FunctionDoc{ - Summary: "Takes the square root of arguments element-wise", - Description: `A negative argument returns an error. For a variant that -instead returns NaN, use function "sqrt_unchecked"`, - ArgNames: []string{"x"}, - } - signDoc = FunctionDoc{ - Summary: "Get the signedness of the arguments element-wise", - Description: `Output is -1 if <0, 1 if >0 and 0 for 0. -NaN values return NaN. Integral values return signedness as Int8, -and floating-point values return it with the same type as the input values.`, - ArgNames: []string{"x"}, - } - bitWiseNotDoc = FunctionDoc{ - Summary: "Bit-wise negate the arguments element-wise", - Description: "Null values return null", - ArgNames: []string{"x"}, - } - bitWiseAndDoc = FunctionDoc{ - Summary: "Bit-wise AND the arguments element-wise", - Description: "Null values return null", - ArgNames: []string{"x", "y"}, - } - bitWiseOrDoc = FunctionDoc{ - Summary: "Bit-wise OR the arguments element-wise", - Description: "Null values return null", - ArgNames: []string{"x", "y"}, - } - bitWiseXorDoc = FunctionDoc{ - Summary: "Bit-wise XOR the arguments element-wise", - Description: "Null values return null", - ArgNames: []string{"x", "y"}, - } - shiftLeftUncheckedDoc = FunctionDoc{ - Summary: "Left shift `x` by `y`", - Description: `The shift operates as if on the two's complement representation -of the number. In other words, this is equivalent to multiplying "x" by 2 -to the power of "y", even if overflow occurs. -"x" is returned if "y" (the amount to shift by) is (1) negative or (2) -greater than or equal to the precision of "x". -Use function "shift_left" if you want an invalid shift amount to -return an error.`, - ArgNames: []string{"x", "y"}, - } - shiftLeftDoc = FunctionDoc{ - Summary: "Left shift `x` by `y`", - Description: `The shift operates as if on the two's complement representation -of the number. In other words, this is equivalent to multiplying "x" by 2 -to the power of "y", even if overflow occurs. -An error is raised if "y" (the amount to shift by) is (1) negative or (2) -greater than or equal to the precision of "x". -See "shift_left_unchecked" for a variant that doesn't fail for an invalid -shift amount.`, - ArgNames: []string{"x", "y"}, - } - shiftRightUncheckedDoc = FunctionDoc{ - Summary: "Right shift `x` by `y`", - Description: `This is equivalent to dividing "x" by 2 to the power "y". -"x" is returned if "y" (the amount to shift by) is: (1) negative or -(2) greater than or equal to the precision of "x". -Use function "shift_right" if you want an invalid -shift amount to return an error.`, - ArgNames: []string{"x", "y"}, - } - shiftRightDoc = FunctionDoc{ - Summary: "Right shift `x` by `y`", - Description: `This is equivalent to dividing "x" by 2 to the power "y". -An error is raised if "y" (the amount to shift by) is (1) negative or -(2) greater than or equal to the precision of "x". -See "shift_right_unchecked" for a variant that doesn't fail for -an invalid shift amount.`, - ArgNames: []string{"x", "y"}, - } - sinUncheckedDoc = FunctionDoc{ - Summary: "Compute the sine", - Description: `NaN is returned for invalid input values; to raise an error -instead, see "sin"`, - ArgNames: []string{"x"}, - } - sinDoc = FunctionDoc{ - Summary: "Compute the sine", - Description: `Invalid input values raise an error; -to return NaN instead, see "sin_unchecked".`, - ArgNames: []string{"x"}, - } - cosUncheckedDoc = FunctionDoc{ - Summary: "Compute the cosine", - Description: `NaN is returned for invalid input values; -to raise an error instead, see "cos".`, - ArgNames: []string{"x"}, - } - cosDoc = FunctionDoc{ - Summary: "Compute the cosine", - Description: `Infinite values raise an error; -to return NaN instead, see "cos_unchecked".`, - ArgNames: []string{"x"}, - } - tanUncheckedDoc = FunctionDoc{ - Summary: "Compute the tangent", - Description: `NaN is returned for invalid input values; -to raise an error instead see "tan".`, - ArgNames: []string{"x"}, - } - tanDoc = FunctionDoc{ - Summary: "Compute the tangent", - Description: `Infinite values raise an error; -to return NaN instead, see "tan_unchecked".`, - ArgNames: []string{"x"}, - } - asinUncheckedDoc = FunctionDoc{ - Summary: "Compute the inverse sine", - Description: `NaN is returned for invalid input values; -to raise an error instead, see "asin"`, - ArgNames: []string{"x"}, - } - asinDoc = FunctionDoc{ - Summary: "Compute the inverse sine", - Description: `Invalid input values raise an error; -to return NaN instead see asin_unchecked.`, - ArgNames: []string{"x"}, - } - acosUncheckedDoc = FunctionDoc{ - Summary: "Compute the inverse cosine", - Description: `NaN is returned for invalid input values; -to raise an error instead, see "acos".`, - ArgNames: []string{"x"}, - } - acosDoc = FunctionDoc{ - Summary: "Compute the inverse cosine", - Description: `Invalid input values raise an error; -to return NaN instead, see "acos_unchecked".`, - ArgNames: []string{"x"}, - } - atanDoc = FunctionDoc{ - Summary: "Compute the inverse tangent of x", - Description: `The return value is in the range [-pi/2, pi/2]; -for a full return range [-pi, pi], see "atan2"`, - ArgNames: []string{"x"}, - } - atan2Doc = FunctionDoc{ - Summary: "Compute the inverse tangent of y/x", - Description: "The return value is in the range [-pi, pi].", - ArgNames: []string{"y", "x"}, - } - lnUncheckedDoc = FunctionDoc{ - Summary: "Compute natural logarithm", - Description: `Non-positive values return -Inf or NaN. Null values return null. -Use function "ln" if you want non-positive values to raise an error.`, - ArgNames: []string{"x"}, - } - lnDoc = FunctionDoc{ - Summary: "Compute natural logarithm", - Description: `Non-positive values raise an error. Null values return null. -Use function "ln_unchecked" if you want non-positive values to return --Inf or NaN`, - ArgNames: []string{"x"}, - } - log10UncheckedDoc = FunctionDoc{ - Summary: "Compute base 10 logarithm", - Description: `Non-positive values return -Inf or NaN. Null values return null. -Use function "log10" if you want non-positive values to raise an error.`, - ArgNames: []string{"x"}, - } - log10Doc = FunctionDoc{ - Summary: "Compute base 10 logarithm", - Description: `Non-positive values raise an error. Null values return null. -Use function "log10_unchecked" if you want non-positive values to return --Inf or NaN.`, - ArgNames: []string{"x"}, - } - log2UncheckedDoc = FunctionDoc{ - Summary: "Compute base 2 logarithm", - Description: `Non-positive values return -Inf or NaN. Null values return null. -Use function "log2" if you want non-positive values to raise an error.`, - ArgNames: []string{"x"}, - } - log2Doc = FunctionDoc{ - Summary: "Compute base 2 logarithm", - Description: `Non-positive values raise an error. Null values return null. -Use function "log2_unchecked" if you want non-positive values to -return -Inf or NaN`, - ArgNames: []string{"x"}, - } - log1pUncheckedDoc = FunctionDoc{ - Summary: "Compute natural log of (1+x)", - Description: `Values <= -1 return -Inf or NaN. Null values return null. -This function may be more precise than log(1 + x) for x close to zero. -Use function "log1p" if you want invalid values to raise an error.`, - ArgNames: []string{"x"}, - } - log1pDoc = FunctionDoc{ - Summary: "Compute natural log of (1+x)", - Description: `Values <= -1 return -Inf or NaN. Null values return null. -This function may be more precise than (1 + x) for x close to zero. -Use function "log1p_unchecked" if you want invalid values to return --Inf or NaN.`, - ArgNames: []string{"x"}, - } - logbUncheckedDoc = FunctionDoc{ - Summary: "Compute base `b` logarithm", - Description: `Values <= 0 return -Inf or NaN. Null values return null. -Use function "logb" if you want non-positive values to raise an error.`, - ArgNames: []string{"x", "b"}, - } - logbDoc = FunctionDoc{ - Summary: "Compute base `b` logarithm", - Description: `Values <= 0 returns an error. Null values return null. -Use function "logb_unchecked" if you want non-positive values to return --Inf or NaN.`, - ArgNames: []string{"x", "b"}, - } - floorDoc = FunctionDoc{ - Summary: "Round down to the nearest integer", - Description: "Compute the largest integer value not greater than `x`", - ArgNames: []string{"x"}, - } - ceilDoc = FunctionDoc{ - Summary: "Round up to the nearest integer", - Description: "Compute the smallest integer value not less than `x`", - ArgNames: []string{"x"}, - } - truncDoc = FunctionDoc{ - Summary: "Compute the integral part", - Description: "Compute the nearest integer not greater than `x`", - ArgNames: []string{"x"}, - } - roundDoc = FunctionDoc{ - Summary: "Round to a given precision", - Description: `Options are used to control the number of digits and rounding mode. -Default behavior is to round to the nearest integer and -use half-to-even rule to break ties.`, - ArgNames: []string{"x"}, - OptionsType: "RoundOptions", - } - roundToMultipleDoc = FunctionDoc{ - Summary: "Round to a given multiple", - Description: `Options are used to control the rounding multiple and rounding mode. -Default behavior is to round to the nearest integer and -use half-to-even rule to break ties.`, - ArgNames: []string{"x"}, - OptionsType: "RoundToMultipleOptions", - } -) - -func RegisterScalarArithmetic(reg FunctionRegistry) { - ops := []struct { - funcName string - op kernels.ArithmeticOp - decPromote decimalPromotion - doc FunctionDoc - }{ - {"add_unchecked", kernels.OpAdd, decPromoteAdd, addUncheckedDoc}, - {"add", kernels.OpAddChecked, decPromoteAdd, addDoc}, - } - - for _, o := range ops { - fn := &arithmeticFunction{*NewScalarFunction(o.funcName, Binary(), o.doc), o.decPromote} - kns := append(kernels.GetArithmeticBinaryKernels(o.op), kernels.GetDecimalBinaryKernels(o.op)...) - kns = append(kns, kernels.GetArithmeticFunctionTimeDuration(o.op)...) - for _, k := range kns { - if err := fn.AddKernel(k); err != nil { - panic(err) - } - } - - for _, unit := range arrow.TimeUnitValues { - inType := exec.NewMatchedInput(exec.TimestampTypeUnit(unit)) - inDuration := exec.NewExactInput(&arrow.DurationType{Unit: unit}) - ex := kernels.ArithmeticExecSameType(arrow.TIMESTAMP, o.op) - err := fn.AddNewKernel([]exec.InputType{inType, inDuration}, kernels.OutputFirstType, ex, nil) - if err != nil { - panic(err) - } - err = fn.AddNewKernel([]exec.InputType{inDuration, inType}, kernels.OutputLastType, ex, nil) - if err != nil { - panic(err) - } - - matchDur := exec.NewMatchedInput(exec.DurationTypeUnit(unit)) - ex = kernels.ArithmeticExecSameType(arrow.DURATION, o.op) - err = fn.AddNewKernel([]exec.InputType{matchDur, matchDur}, exec.NewOutputType(&arrow.DurationType{Unit: unit}), ex, nil) - if err != nil { - panic(err) - } - } - - reg.AddFunction(fn, false) - } - - ops = []struct { - funcName string - op kernels.ArithmeticOp - decPromote decimalPromotion - doc FunctionDoc - }{ - {"sub_unchecked", kernels.OpSub, decPromoteAdd, subUncheckedDoc}, - {"sub", kernels.OpSubChecked, decPromoteAdd, subDoc}, - {"subtract_unchecked", kernels.OpSub, decPromoteAdd, subUncheckedDoc}, - {"subtract", kernels.OpSubChecked, decPromoteAdd, subDoc}, - } - - for _, o := range ops { - fn := &arithmeticFunction{*NewScalarFunction(o.funcName, Binary(), o.doc), o.decPromote} - kns := append(kernels.GetArithmeticBinaryKernels(o.op), kernels.GetDecimalBinaryKernels(o.op)...) - kns = append(kns, kernels.GetArithmeticFunctionTimeDuration(o.op)...) - for _, k := range kns { - if err := fn.AddKernel(k); err != nil { - panic(err) - } - } - - for _, unit := range arrow.TimeUnitValues { - // timestamp - timestamp => duration - inType := exec.NewMatchedInput(exec.TimestampTypeUnit(unit)) - ex := kernels.ArithmeticExecSameType(arrow.TIMESTAMP, o.op) - err := fn.AddNewKernel([]exec.InputType{inType, inType}, kernels.OutputResolveTemporal, ex, nil) - if err != nil { - panic(err) - } - - // timestamp - duration => timestamp - inDuration := exec.NewExactInput(&arrow.DurationType{Unit: unit}) - ex = kernels.ArithmeticExecSameType(arrow.TIMESTAMP, o.op) - err = fn.AddNewKernel([]exec.InputType{inType, inDuration}, kernels.OutputFirstType, ex, nil) - if err != nil { - panic(err) - } - - // duration - duration = duration - matchDur := exec.NewMatchedInput(exec.DurationTypeUnit(unit)) - ex = kernels.ArithmeticExecSameType(arrow.DURATION, o.op) - err = fn.AddNewKernel([]exec.InputType{matchDur, matchDur}, exec.NewOutputType(&arrow.DurationType{Unit: unit}), ex, nil) - if err != nil { - panic(err) - } - } - - // time32 - time32 = duration - for _, unit := range []arrow.TimeUnit{arrow.Second, arrow.Millisecond} { - inType := exec.NewMatchedInput(exec.Time32TypeUnit(unit)) - internalEx := kernels.ArithmeticExecSameType(arrow.TIME32, o.op) - ex := func(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { - if err := internalEx(ctx, batch, out); err != nil { - return err - } - // the allocated space is for duration (an int64) but we - // wrote the time32 - time32 as if the output was time32 - // so a quick copy in reverse expands the int32s to int64. - rawData := arrow.GetData[int32](out.Buffers[1].Buf) - outData := arrow.GetData[int64](out.Buffers[1].Buf) - - for i := out.Len - 1; i >= 0; i-- { - outData[i] = int64(rawData[i]) - } - return nil - } - - err := fn.AddNewKernel([]exec.InputType{inType, inType}, - exec.NewOutputType(&arrow.DurationType{Unit: unit}), ex, nil) - if err != nil { - panic(err) - } - } - - // time64 - time64 = duration - for _, unit := range []arrow.TimeUnit{arrow.Microsecond, arrow.Nanosecond} { - inType := exec.NewMatchedInput(exec.Time64TypeUnit(unit)) - ex := kernels.ArithmeticExecSameType(arrow.TIME64, o.op) - err := fn.AddNewKernel([]exec.InputType{inType, inType}, exec.NewOutputType(&arrow.DurationType{Unit: unit}), ex, nil) - if err != nil { - panic(err) - } - } - - inDate32 := exec.NewExactInput(arrow.FixedWidthTypes.Date32) - ex := kernels.SubtractDate32(o.op) - err := fn.AddNewKernel([]exec.InputType{inDate32, inDate32}, exec.NewOutputType(arrow.FixedWidthTypes.Duration_s), ex, nil) - if err != nil { - panic(err) - } - - inDate64 := exec.NewExactInput(arrow.FixedWidthTypes.Date64) - ex = kernels.ArithmeticExecSameType(arrow.DATE64, o.op) - err = fn.AddNewKernel([]exec.InputType{inDate64, inDate64}, exec.NewOutputType(arrow.FixedWidthTypes.Duration_ms), ex, nil) - if err != nil { - panic(err) - } - - reg.AddFunction(fn, false) - } - - oplist := []struct { - funcName string - op kernels.ArithmeticOp - decPromote decimalPromotion - doc FunctionDoc - commutative bool - }{ - {"multiply_unchecked", kernels.OpMul, decPromoteMultiply, mulUncheckedDoc, true}, - {"multiply", kernels.OpMulChecked, decPromoteMultiply, mulDoc, true}, - {"divide_unchecked", kernels.OpDiv, decPromoteDivide, divUncheckedDoc, false}, - {"divide", kernels.OpDivChecked, decPromoteDivide, divDoc, false}, - } - - for _, o := range oplist { - fn := &arithmeticFunction{*NewScalarFunction(o.funcName, Binary(), o.doc), o.decPromote} - for _, k := range append(kernels.GetArithmeticBinaryKernels(o.op), kernels.GetDecimalBinaryKernels(o.op)...) { - if err := fn.AddKernel(k); err != nil { - panic(err) - } - } - - for _, unit := range arrow.TimeUnitValues { - durInput := exec.NewExactInput(&arrow.DurationType{Unit: unit}) - i64Input := exec.NewExactInput(arrow.PrimitiveTypes.Int64) - durOutput := exec.NewOutputType(&arrow.DurationType{Unit: unit}) - ex := kernels.ArithmeticExecSameType(arrow.DURATION, o.op) - err := fn.AddNewKernel([]exec.InputType{durInput, i64Input}, durOutput, ex, nil) - if err != nil { - panic(err) - } - if o.commutative { - err = fn.AddNewKernel([]exec.InputType{i64Input, durInput}, durOutput, ex, nil) - if err != nil { - panic(err) - } - } - } - - reg.AddFunction(fn, false) - } - - ops = []struct { - funcName string - op kernels.ArithmeticOp - decPromote decimalPromotion - doc FunctionDoc - }{ - {"abs_unchecked", kernels.OpAbsoluteValue, decPromoteNone, absoluteValueUncheckedDoc}, - {"abs", kernels.OpAbsoluteValueChecked, decPromoteNone, absoluteValueDoc}, - {"negate_unchecked", kernels.OpNegate, decPromoteNone, negateUncheckedDoc}, - } - - for _, o := range ops { - fn := &arithmeticFunction{*NewScalarFunction(o.funcName, Unary(), o.doc), decPromoteNone} - kns := append(kernels.GetArithmeticUnaryKernels(o.op), kernels.GetDecimalUnaryKernels(o.op)...) - for _, k := range kns { - if err := fn.AddKernel(k); err != nil { - panic(err) - } - } - - reg.AddFunction(fn, false) - } - - fn := &arithmeticFunction{*NewScalarFunction("negate", Unary(), negateDoc), decPromoteNone} - kns := append(kernels.GetArithmeticUnarySignedKernels(kernels.OpNegateChecked), kernels.GetDecimalUnaryKernels(kernels.OpNegateChecked)...) - for _, k := range kns { - if err := fn.AddKernel(k); err != nil { - panic(err) - } - } - - reg.AddFunction(fn, false) - - ops = []struct { - funcName string - op kernels.ArithmeticOp - decPromote decimalPromotion - doc FunctionDoc - }{ - {"sqrt_unchecked", kernels.OpSqrt, decPromoteNone, sqrtUncheckedDoc}, - {"sqrt", kernels.OpSqrtChecked, decPromoteNone, sqrtDoc}, - {"sin_unchecked", kernels.OpSin, decPromoteNone, sinUncheckedDoc}, - {"sin", kernels.OpSinChecked, decPromoteNone, sinDoc}, - {"cos_unchecked", kernels.OpCos, decPromoteNone, cosUncheckedDoc}, - {"cos", kernels.OpCosChecked, decPromoteNone, cosDoc}, - {"tan_unchecked", kernels.OpTan, decPromoteNone, tanUncheckedDoc}, - {"tan", kernels.OpTanChecked, decPromoteNone, tanDoc}, - {"asin_unchecked", kernels.OpAsin, decPromoteNone, asinUncheckedDoc}, - {"asin", kernels.OpAsinChecked, decPromoteNone, asinDoc}, - {"acos_unchecked", kernels.OpAcos, decPromoteNone, acosUncheckedDoc}, - {"acos", kernels.OpAcosChecked, decPromoteNone, acosDoc}, - {"atan", kernels.OpAtan, decPromoteNone, atanDoc}, - {"ln_unchecked", kernels.OpLn, decPromoteNone, lnUncheckedDoc}, - {"ln", kernels.OpLnChecked, decPromoteNone, lnDoc}, - {"log10_unchecked", kernels.OpLog10, decPromoteNone, log10UncheckedDoc}, - {"log10", kernels.OpLog10Checked, decPromoteNone, log10Doc}, - {"log2_unchecked", kernels.OpLog2, decPromoteNone, log2UncheckedDoc}, - {"log2", kernels.OpLog2Checked, decPromoteNone, log2Doc}, - {"log1p_unchecked", kernels.OpLog1p, decPromoteNone, log1pUncheckedDoc}, - {"log1p", kernels.OpLog1pChecked, decPromoteNone, log1pDoc}, - } - - for _, o := range ops { - fn := &arithmeticFloatingPointFunc{arithmeticFunction{*NewScalarFunction(o.funcName, Unary(), o.doc), decPromoteNone}} - kns := kernels.GetArithmeticUnaryFloatingPointKernels(o.op) - for _, k := range kns { - if err := fn.AddKernel(k); err != nil { - panic(err) - } - } - - reg.AddFunction(fn, false) - } - - ops = []struct { - funcName string - op kernels.ArithmeticOp - decPromote decimalPromotion - doc FunctionDoc - }{ - {"atan2", kernels.OpAtan2, decPromoteNone, atan2Doc}, - {"logb_unchecked", kernels.OpLogb, decPromoteNone, logbUncheckedDoc}, - {"logb", kernels.OpLogbChecked, decPromoteNone, logbDoc}, - } - - for _, o := range ops { - fn := &arithmeticFloatingPointFunc{arithmeticFunction{*NewScalarFunction(o.funcName, Binary(), addDoc), decPromoteNone}} - kns := kernels.GetArithmeticFloatingPointKernels(o.op) - for _, k := range kns { - if err := fn.AddKernel(k); err != nil { - panic(err) - } - } - - reg.AddFunction(fn, false) - } - - fn = &arithmeticFunction{*NewScalarFunction("sign", Unary(), signDoc), decPromoteNone} - kns = kernels.GetArithmeticUnaryFixedIntOutKernels(arrow.PrimitiveTypes.Int8, kernels.OpSign) - for _, k := range kns { - if err := fn.AddKernel(k); err != nil { - panic(err) - } - } - - reg.AddFunction(fn, false) - - ops = []struct { - funcName string - op kernels.ArithmeticOp - decPromote decimalPromotion - doc FunctionDoc - }{ - {"power_unchecked", kernels.OpPower, decPromoteNone, powUncheckedDoc}, - {"power", kernels.OpPowerChecked, decPromoteNone, powDoc}, - } - - for _, o := range ops { - fn := &arithmeticDecimalToFloatingPointFunc{arithmeticFunction{*NewScalarFunction(o.funcName, Binary(), o.doc), o.decPromote}} - kns := kernels.GetArithmeticBinaryKernels(o.op) - for _, k := range kns { - if err := fn.AddKernel(k); err != nil { - panic(err) - } - } - reg.AddFunction(fn, false) - } - - bitWiseOps := []struct { - funcName string - op kernels.BitwiseOp - doc FunctionDoc - }{ - {"bit_wise_and", kernels.OpBitAnd, bitWiseAndDoc}, - {"bit_wise_or", kernels.OpBitOr, bitWiseOrDoc}, - {"bit_wise_xor", kernels.OpBitXor, bitWiseXorDoc}, - } - - for _, o := range bitWiseOps { - fn := &arithmeticFunction{*NewScalarFunction(o.funcName, Binary(), o.doc), decPromoteNone} - kns := kernels.GetBitwiseBinaryKernels(o.op) - for _, k := range kns { - if err := fn.AddKernel(k); err != nil { - panic(err) - } - } - reg.AddFunction(fn, false) - } - - fn = &arithmeticFunction{*NewScalarFunction("bit_wise_not", Unary(), bitWiseNotDoc), decPromoteNone} - for _, k := range kernels.GetBitwiseUnaryKernels() { - if err := fn.AddKernel(k); err != nil { - panic(err) - } - } - - reg.AddFunction(fn, false) - - shiftOps := []struct { - funcName string - dir kernels.ShiftDir - checked bool - doc FunctionDoc - }{ - {"shift_left", kernels.ShiftLeft, true, shiftLeftDoc}, - {"shift_left_unchecked", kernels.ShiftLeft, false, shiftLeftUncheckedDoc}, - {"shift_right", kernels.ShiftRight, true, shiftRightDoc}, - {"shift_right_unchecked", kernels.ShiftRight, false, shiftRightUncheckedDoc}, - } - - for _, o := range shiftOps { - fn := &arithmeticFunction{*NewScalarFunction(o.funcName, Binary(), o.doc), decPromoteNone} - kns := kernels.GetShiftKernels(o.dir, o.checked) - for _, k := range kns { - if err := fn.AddKernel(k); err != nil { - panic(err) - } - } - reg.AddFunction(fn, false) - } - - floorFn := &arithmeticIntegerToFloatingPointFunc{arithmeticFunction{*NewScalarFunction("floor", Unary(), floorDoc), decPromoteNone}} - kns = kernels.GetSimpleRoundKernels(kernels.RoundDown) - for _, k := range kns { - if err := floorFn.AddKernel(k); err != nil { - panic(err) - } - } - floorFn.AddNewKernel([]exec.InputType{exec.NewIDInput(arrow.DECIMAL128)}, - kernels.OutputFirstType, kernels.FixedRoundDecimalExec[decimal128.Num](kernels.RoundDown), nil) - floorFn.AddNewKernel([]exec.InputType{exec.NewIDInput(arrow.DECIMAL256)}, - kernels.OutputFirstType, kernels.FixedRoundDecimalExec[decimal256.Num](kernels.RoundDown), nil) - reg.AddFunction(floorFn, false) - - ceilFn := &arithmeticIntegerToFloatingPointFunc{arithmeticFunction{*NewScalarFunction("ceil", Unary(), ceilDoc), decPromoteNone}} - kns = kernels.GetSimpleRoundKernels(kernels.RoundUp) - for _, k := range kns { - if err := ceilFn.AddKernel(k); err != nil { - panic(err) - } - } - ceilFn.AddNewKernel([]exec.InputType{exec.NewIDInput(arrow.DECIMAL128)}, - kernels.OutputFirstType, kernels.FixedRoundDecimalExec[decimal128.Num](kernels.RoundUp), nil) - ceilFn.AddNewKernel([]exec.InputType{exec.NewIDInput(arrow.DECIMAL256)}, - kernels.OutputFirstType, kernels.FixedRoundDecimalExec[decimal256.Num](kernels.RoundUp), nil) - reg.AddFunction(ceilFn, false) - - truncFn := &arithmeticIntegerToFloatingPointFunc{arithmeticFunction{*NewScalarFunction("trunc", Unary(), truncDoc), decPromoteNone}} - kns = kernels.GetSimpleRoundKernels(kernels.TowardsZero) - for _, k := range kns { - if err := truncFn.AddKernel(k); err != nil { - panic(err) - } - } - truncFn.AddNewKernel([]exec.InputType{exec.NewIDInput(arrow.DECIMAL128)}, - kernels.OutputFirstType, kernels.FixedRoundDecimalExec[decimal128.Num](kernels.TowardsZero), nil) - truncFn.AddNewKernel([]exec.InputType{exec.NewIDInput(arrow.DECIMAL256)}, - kernels.OutputFirstType, kernels.FixedRoundDecimalExec[decimal256.Num](kernels.TowardsZero), nil) - reg.AddFunction(truncFn, false) - - roundFn := &arithmeticIntegerToFloatingPointFunc{arithmeticFunction{*NewScalarFunction("round", Unary(), roundDoc), decPromoteNone}} - kns = kernels.GetRoundUnaryKernels(kernels.InitRoundState, kernels.UnaryRoundExec) - for _, k := range kns { - if err := roundFn.AddKernel(k); err != nil { - panic(err) - } - } - - roundFn.defaultOpts = DefaultRoundOptions - reg.AddFunction(roundFn, false) - - roundToMultipleFn := &arithmeticIntegerToFloatingPointFunc{arithmeticFunction{*NewScalarFunction("round_to_multiple", Unary(), roundToMultipleDoc), decPromoteNone}} - kns = kernels.GetRoundUnaryKernels(kernels.InitRoundToMultipleState, kernels.UnaryRoundToMultipleExec) - for _, k := range kns { - if err := roundToMultipleFn.AddKernel(k); err != nil { - panic(err) - } - } - - roundToMultipleFn.defaultOpts = DefaultRoundToMultipleOptions - reg.AddFunction(roundToMultipleFn, false) -} - -func impl(ctx context.Context, fn string, opts ArithmeticOptions, left, right Datum) (Datum, error) { - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, left, right) -} - -// Add performs an addition between the passed in arguments (scalar or array) -// and returns the result. If one argument is a scalar and the other is an -// array, the scalar value is added to each value of the array. -// -// ArithmeticOptions specifies whether or not to check for overflows, -// performance is faster if not explicitly checking for overflows but -// will error on an overflow if NoCheckOverflow is false (default). -func Add(ctx context.Context, opts ArithmeticOptions, left, right Datum) (Datum, error) { - return impl(ctx, "add", opts, left, right) -} - -// Sub performs a subtraction between the passed in arguments (scalar or array) -// and returns the result. If one argument is a scalar and the other is an -// array, the scalar value is subtracted from each value of the array. -// -// ArithmeticOptions specifies whether or not to check for overflows, -// performance is faster if not explicitly checking for overflows but -// will error on an overflow if NoCheckOverflow is false (default). -func Subtract(ctx context.Context, opts ArithmeticOptions, left, right Datum) (Datum, error) { - return impl(ctx, "sub", opts, left, right) -} - -// Multiply performs a multiplication between the passed in arguments (scalar or array) -// and returns the result. If one argument is a scalar and the other is an -// array, the scalar value is multiplied against each value of the array. -// -// ArithmeticOptions specifies whether or not to check for overflows, -// performance is faster if not explicitly checking for overflows but -// will error on an overflow if NoCheckOverflow is false (default). -func Multiply(ctx context.Context, opts ArithmeticOptions, left, right Datum) (Datum, error) { - return impl(ctx, "multiply", opts, left, right) -} - -// Divide performs a division between the passed in arguments (scalar or array) -// and returns the result. If one argument is a scalar and the other is an -// array, the scalar value is used with each value of the array. -// -// ArithmeticOptions specifies whether or not to check for overflows, -// performance is faster if not explicitly checking for overflows but -// will error on an overflow if NoCheckOverflow is false (default). -// -// Will error on divide by zero regardless of whether or not checking for -// overflows. -func Divide(ctx context.Context, opts ArithmeticOptions, left, right Datum) (Datum, error) { - return impl(ctx, "divide", opts, left, right) -} - -// AbsoluteValue returns the AbsoluteValue for each element in the input -// argument. It accepts either a scalar or an array. -// -// ArithmeticOptions specifies whether or not to check for overflows, -// performance is faster if not explicitly checking for overflows but -// will error on an overflow if CheckOverflow is true. -func AbsoluteValue(ctx context.Context, opts ArithmeticOptions, input Datum) (Datum, error) { - fn := "abs" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, input) -} - -// Negate returns a result containing the negation of each element in the -// input argument. It accepts either a scalar or an array. -// -// ArithmeticOptions specifies whether or not to check for overflows, -// or to throw an error on unsigned types. -func Negate(ctx context.Context, opts ArithmeticOptions, input Datum) (Datum, error) { - fn := "negate" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, input) -} - -// Sign returns -1, 0, or 1 depending on the sign of each element in the -// input. For x in the input: -// -// if x > 0: 1 -// if x < 0: -1 -// if x == 0: 0 -func Sign(ctx context.Context, input Datum) (Datum, error) { - return CallFunction(ctx, "sign", nil, input) -} - -// Power returns base**exp for each element in the input arrays. Should work -// for both Arrays and Scalars -func Power(ctx context.Context, opts ArithmeticOptions, base, exp Datum) (Datum, error) { - fn := "power" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, base, exp) -} - -// ShiftLeft only accepts integral types and shifts each element of the -// first argument to the left by the value of the corresponding element -// in the second argument. -// -// The value to shift by should be >= 0 and < precision of the type. -func ShiftLeft(ctx context.Context, opts ArithmeticOptions, lhs, rhs Datum) (Datum, error) { - fn := "shift_left" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, lhs, rhs) -} - -// ShiftRight only accepts integral types and shifts each element of the -// first argument to the right by the value of the corresponding element -// in the second argument. -// -// The value to shift by should be >= 0 and < precision of the type. -func ShiftRight(ctx context.Context, opts ArithmeticOptions, lhs, rhs Datum) (Datum, error) { - fn := "shift_right" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, lhs, rhs) -} - -func Sin(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error) { - fn := "sin" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, arg) -} - -func Cos(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error) { - fn := "cos" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, arg) -} - -func Tan(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error) { - fn := "tan" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, arg) -} - -func Asin(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error) { - fn := "asin" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, arg) -} - -func Acos(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error) { - fn := "acos" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, arg) -} - -func Atan(ctx context.Context, arg Datum) (Datum, error) { - return CallFunction(ctx, "atan", nil, arg) -} - -func Atan2(ctx context.Context, x, y Datum) (Datum, error) { - return CallFunction(ctx, "atan2", nil, x, y) -} - -func Ln(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error) { - fn := "ln" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, arg) -} - -func Log10(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error) { - fn := "log10" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, arg) -} - -func Log2(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error) { - fn := "log2" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, arg) -} - -func Log1p(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error) { - fn := "log1p" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, arg) -} - -func Logb(ctx context.Context, opts ArithmeticOptions, x, base Datum) (Datum, error) { - fn := "logb" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, x, base) -} - -func Round(ctx context.Context, opts RoundOptions, arg Datum) (Datum, error) { - return CallFunction(ctx, "round", &opts, arg) -} - -func RoundToMultiple(ctx context.Context, opts RoundToMultipleOptions, arg Datum) (Datum, error) { - return CallFunction(ctx, "round_to_multiple", &opts, arg) -} diff --git a/go/arrow/compute/arithmetic_test.go b/go/arrow/compute/arithmetic_test.go deleted file mode 100644 index 6e693481a322c..0000000000000 --- a/go/arrow/compute/arithmetic_test.go +++ /dev/null @@ -1,3504 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package compute_test - -import ( - "context" - "fmt" - "math" - "strings" - "testing" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/compute" - "github.com/apache/arrow/go/v18/arrow/compute/exec" - "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/decimal256" - "github.com/apache/arrow/go/v18/arrow/internal/testing/gen" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/arrow/scalar" - "github.com/klauspost/cpuid/v2" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "github.com/stretchr/testify/suite" - "golang.org/x/exp/constraints" -) - -var ( - CpuCacheSizes = [...]int{ // defaults - 32 * 1024, // level 1: 32K - 256 * 1024, // level 2: 256K - 3072 * 1024, // level 3: 3M - } -) - -func init() { - if cpuid.CPU.Cache.L1D != -1 { - CpuCacheSizes[0] = cpuid.CPU.Cache.L1D - } - if cpuid.CPU.Cache.L2 != -1 { - CpuCacheSizes[1] = cpuid.CPU.Cache.L2 - } - if cpuid.CPU.Cache.L3 != -1 { - CpuCacheSizes[2] = cpuid.CPU.Cache.L3 - } -} - -func assertNullToNull(t *testing.T, ctx context.Context, fn string, mem memory.Allocator) { - f, ok := compute.GetFunctionRegistry().GetFunction(fn) - require.True(t, ok) - nulls := array.MakeArrayOfNull(mem, arrow.Null, 7) - defer nulls.Release() - n := f.Arity().NArgs - - t.Run("null to null array", func(t *testing.T) { - args := make([]compute.Datum, n) - for i := 0; i < n; i++ { - args[i] = &compute.ArrayDatum{nulls.Data()} - } - - result, err := compute.CallFunction(ctx, fn, nil, args...) - assert.NoError(t, err) - defer result.Release() - out := result.(*compute.ArrayDatum).MakeArray() - defer out.Release() - assertArraysEqual(t, nulls, out) - }) - - t.Run("null to null scalar", func(t *testing.T) { - args := make([]compute.Datum, n) - for i := 0; i < n; i++ { - args[i] = compute.NewDatum(scalar.ScalarNull) - } - - result, err := compute.CallFunction(ctx, fn, nil, args...) - assert.NoError(t, err) - assertScalarEquals(t, scalar.ScalarNull, result.(*compute.ScalarDatum).Value) - }) -} - -type fnOpts interface { - compute.ArithmeticOptions | compute.RoundOptions | compute.RoundToMultipleOptions -} - -type unaryArithmeticFunc[O fnOpts] func(context.Context, O, compute.Datum) (compute.Datum, error) - -// type unaryFunc = func(compute.Datum) (compute.Datum, error) - -type binaryArithmeticFunc = func(context.Context, compute.ArithmeticOptions, compute.Datum, compute.Datum) (compute.Datum, error) - -type binaryFunc = func(left, right compute.Datum) (compute.Datum, error) - -func assertScalarEquals(t *testing.T, expected, actual scalar.Scalar, opt ...scalar.EqualOption) { - assert.Truef(t, scalar.ApproxEquals(expected, actual, opt...), "expected: %s\ngot: %s", expected, actual) -} - -func assertBinop(t *testing.T, fn binaryFunc, left, right, expected arrow.Array, opt []array.EqualOption, scalarOpt []scalar.EqualOption) { - actual, err := fn(&compute.ArrayDatum{Value: left.Data()}, &compute.ArrayDatum{Value: right.Data()}) - require.NoError(t, err) - defer actual.Release() - assertDatumsEqual(t, &compute.ArrayDatum{Value: expected.Data()}, actual, opt, scalarOpt) - - // also check (Scalar, Scalar) operations - for i := 0; i < expected.Len(); i++ { - s, err := scalar.GetScalar(expected, i) - require.NoError(t, err) - lhs, _ := scalar.GetScalar(left, i) - rhs, _ := scalar.GetScalar(right, i) - - actual, err := fn(&compute.ScalarDatum{Value: lhs}, &compute.ScalarDatum{Value: rhs}) - assert.NoError(t, err) - assertScalarEquals(t, s, actual.(*compute.ScalarDatum).Value, scalarOpt...) - } -} - -func assertBinopErr(t *testing.T, fn binaryFunc, left, right arrow.Array, expectedMsg string) { - _, err := fn(&compute.ArrayDatum{left.Data()}, &compute.ArrayDatum{Value: right.Data()}) - assert.ErrorIs(t, err, arrow.ErrInvalid) - assert.ErrorContains(t, err, expectedMsg) -} - -type BinaryFuncTestSuite struct { - suite.Suite - - mem *memory.CheckedAllocator - ctx context.Context -} - -func (b *BinaryFuncTestSuite) SetupTest() { - b.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) - b.ctx = compute.WithAllocator(context.TODO(), b.mem) -} - -func (b *BinaryFuncTestSuite) TearDownTest() { - b.mem.AssertSize(b.T(), 0) -} - -func (b *BinaryFuncTestSuite) getArr(dt arrow.DataType, str string) arrow.Array { - arr, _, err := array.FromJSON(b.mem, dt, strings.NewReader(str), array.WithUseNumber()) - b.Require().NoError(err) - return arr -} - -type Float16BinaryFuncTestSuite struct { - BinaryFuncTestSuite -} - -func (b *Float16BinaryFuncTestSuite) assertBinopErr(fn binaryFunc, lhs, rhs string) { - left, _, _ := array.FromJSON(b.mem, arrow.FixedWidthTypes.Float16, strings.NewReader(lhs), array.WithUseNumber()) - defer left.Release() - right, _, _ := array.FromJSON(b.mem, arrow.FixedWidthTypes.Float16, strings.NewReader(rhs), array.WithUseNumber()) - defer right.Release() - - _, err := fn(&compute.ArrayDatum{left.Data()}, &compute.ArrayDatum{right.Data()}) - b.ErrorIs(err, arrow.ErrNotImplemented) -} - -func (b *Float16BinaryFuncTestSuite) TestAdd() { - for _, overflow := range []bool{false, true} { - b.Run(fmt.Sprintf("no_overflow_check=%t", overflow), func() { - opts := compute.ArithmeticOptions{NoCheckOverflow: overflow} - b.assertBinopErr(func(left, right compute.Datum) (compute.Datum, error) { - return compute.Add(b.ctx, opts, left, right) - }, `[1.5]`, `[1.5]`) - }) - } -} - -func (b *Float16BinaryFuncTestSuite) TestSub() { - for _, overflow := range []bool{false, true} { - b.Run(fmt.Sprintf("no_overflow_check=%t", overflow), func() { - opts := compute.ArithmeticOptions{NoCheckOverflow: overflow} - b.assertBinopErr(func(left, right compute.Datum) (compute.Datum, error) { - return compute.Subtract(b.ctx, opts, left, right) - }, `[1.5]`, `[1.5]`) - }) - } -} - -type BinaryArithmeticSuite[T arrow.NumericType] struct { - BinaryFuncTestSuite - - opts compute.ArithmeticOptions - min, max T - equalOpts []array.EqualOption - scalarEqualOpts []scalar.EqualOption -} - -func (BinaryArithmeticSuite[T]) DataType() arrow.DataType { - return arrow.GetDataType[T]() -} - -func (b *BinaryArithmeticSuite[T]) setNansEqual(val bool) { - b.equalOpts = []array.EqualOption{array.WithNaNsEqual(val)} - b.scalarEqualOpts = []scalar.EqualOption{scalar.WithNaNsEqual(val)} -} - -func (b *BinaryArithmeticSuite[T]) SetupTest() { - b.BinaryFuncTestSuite.SetupTest() - b.opts.NoCheckOverflow = false -} - -func (b *BinaryArithmeticSuite[T]) makeNullScalar() scalar.Scalar { - return scalar.MakeNullScalar(b.DataType()) -} - -func (b *BinaryArithmeticSuite[T]) makeScalar(val T) scalar.Scalar { - return scalar.MakeScalar(val) -} - -func (b *BinaryArithmeticSuite[T]) assertBinopScalars(fn binaryArithmeticFunc, lhs, rhs T, expected T) { - left, right := b.makeScalar(lhs), b.makeScalar(rhs) - exp := b.makeScalar(expected) - - actual, err := fn(b.ctx, b.opts, &compute.ScalarDatum{Value: left}, &compute.ScalarDatum{Value: right}) - b.NoError(err) - sc := actual.(*compute.ScalarDatum).Value - - assertScalarEquals(b.T(), exp, sc) -} - -func (b *BinaryArithmeticSuite[T]) assertBinopScalarValArr(fn binaryArithmeticFunc, lhs T, rhs, expected string) { - left := b.makeScalar(lhs) - b.assertBinopScalarArr(fn, left, rhs, expected) -} - -func (b *BinaryArithmeticSuite[T]) assertBinopScalarArr(fn binaryArithmeticFunc, lhs scalar.Scalar, rhs, expected string) { - right, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(rhs)) - defer right.Release() - exp, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(expected)) - defer exp.Release() - - actual, err := fn(b.ctx, b.opts, &compute.ScalarDatum{Value: lhs}, &compute.ArrayDatum{Value: right.Data()}) - b.NoError(err) - defer actual.Release() - assertDatumsEqual(b.T(), &compute.ArrayDatum{Value: exp.Data()}, actual, b.equalOpts, b.scalarEqualOpts) -} - -func (b *BinaryArithmeticSuite[T]) assertBinopArrScalarExpArr(fn binaryArithmeticFunc, lhs string, rhs scalar.Scalar, exp arrow.Array) { - left, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(lhs)) - defer left.Release() - - actual, err := fn(b.ctx, b.opts, &compute.ArrayDatum{left.Data()}, compute.NewDatum(rhs)) - b.Require().NoError(err) - defer actual.Release() - assertDatumsEqual(b.T(), &compute.ArrayDatum{exp.Data()}, actual, b.equalOpts, b.scalarEqualOpts) -} - -func (b *BinaryArithmeticSuite[T]) assertBinopArrScalarVal(fn binaryArithmeticFunc, lhs string, rhs T, expected string) { - right := b.makeScalar(rhs) - b.assertBinopArrScalar(fn, lhs, right, expected) -} - -func (b *BinaryArithmeticSuite[T]) assertBinopArrScalar(fn binaryArithmeticFunc, lhs string, rhs scalar.Scalar, expected string) { - left, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(lhs)) - defer left.Release() - exp, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(expected)) - defer exp.Release() - - actual, err := fn(b.ctx, b.opts, &compute.ArrayDatum{Value: left.Data()}, &compute.ScalarDatum{Value: rhs}) - b.NoError(err) - defer actual.Release() - assertDatumsEqual(b.T(), &compute.ArrayDatum{Value: exp.Data()}, actual, b.equalOpts, b.scalarEqualOpts) -} - -func (b *BinaryArithmeticSuite[T]) assertBinopArrs(fn binaryArithmeticFunc, lhs, rhs, exp arrow.Array) { - assertBinop(b.T(), func(left, right compute.Datum) (compute.Datum, error) { - return fn(b.ctx, b.opts, left, right) - }, lhs, rhs, exp, b.equalOpts, b.scalarEqualOpts) -} - -func (b *BinaryArithmeticSuite[T]) assertBinopExpArr(fn binaryArithmeticFunc, lhs, rhs string, exp arrow.Array) { - left, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(lhs), array.WithUseNumber()) - defer left.Release() - right, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(rhs), array.WithUseNumber()) - defer right.Release() - - b.assertBinopArrs(fn, left, right, exp) -} - -func (b *BinaryArithmeticSuite[T]) assertBinop(fn binaryArithmeticFunc, lhs, rhs, expected string) { - left, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(lhs), array.WithUseNumber()) - defer left.Release() - right, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(rhs), array.WithUseNumber()) - defer right.Release() - exp, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(expected), array.WithUseNumber()) - defer exp.Release() - - b.assertBinopArrs(fn, left, right, exp) -} - -func (b *BinaryArithmeticSuite[T]) setOverflowCheck(value bool) { - b.opts.NoCheckOverflow = !value -} - -func (b *BinaryArithmeticSuite[T]) assertBinopErr(fn binaryArithmeticFunc, lhs, rhs, expectedMsg string) { - left, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(lhs), array.WithUseNumber()) - defer left.Release() - right, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(rhs), array.WithUseNumber()) - defer right.Release() - - assertBinopErr(b.T(), func(left, right compute.Datum) (compute.Datum, error) { - return fn(b.ctx, b.opts, left, right) - }, left, right, expectedMsg) -} - -func (b *BinaryArithmeticSuite[T]) TestAdd() { - b.Run(b.DataType().String(), func() { - for _, overflow := range []bool{false, true} { - b.Run(fmt.Sprintf("no_overflow_check=%t", overflow), func() { - b.setOverflowCheck(overflow) - - b.assertBinop(compute.Add, `[]`, `[]`, `[]`) - b.assertBinop(compute.Add, `[3, 2, 6]`, `[1, 0, 2]`, `[4, 2, 8]`) - // nulls on one side - b.assertBinop(compute.Add, `[null, 1, null]`, `[3, 4, 5]`, `[null, 5, null]`) - b.assertBinop(compute.Add, `[3, 4, 5]`, `[null, 1, null]`, `[null, 5, null]`) - // nulls on both sides - b.assertBinop(compute.Add, `[null, 1, 2]`, `[3, 4, null]`, `[null, 5, null]`) - // all nulls - b.assertBinop(compute.Add, `[null]`, `[null]`, `[null]`) - - // scalar on the left - b.assertBinopScalarValArr(compute.Add, 3, `[1, 2]`, `[4, 5]`) - b.assertBinopScalarValArr(compute.Add, 3, `[null, 2]`, `[null, 5]`) - b.assertBinopScalarArr(compute.Add, b.makeNullScalar(), `[1, 2]`, `[null, null]`) - b.assertBinopScalarArr(compute.Add, b.makeNullScalar(), `[null, 2]`, `[null, null]`) - // scalar on the right - b.assertBinopArrScalarVal(compute.Add, `[1, 2]`, 3, `[4, 5]`) - b.assertBinopArrScalarVal(compute.Add, `[null, 2]`, 3, `[null, 5]`) - b.assertBinopArrScalar(compute.Add, `[1, 2]`, b.makeNullScalar(), `[null, null]`) - b.assertBinopArrScalar(compute.Add, `[null, 2]`, b.makeNullScalar(), `[null, null]`) - - if !arrow.IsFloating(b.DataType().ID()) && overflow { - val := fmt.Sprintf("[%v]", b.max) - b.assertBinopErr(compute.Add, val, val, "overflow") - } - }) - } - }) -} - -func (b *BinaryArithmeticSuite[T]) TestSub() { - b.Run(b.DataType().String(), func() { - for _, overflow := range []bool{false, true} { - b.Run(fmt.Sprintf("no_overflow_check=%t", overflow), func() { - b.setOverflowCheck(overflow) - - b.assertBinop(compute.Subtract, `[]`, `[]`, `[]`) - b.assertBinop(compute.Subtract, `[3, 2, 6]`, `[1, 0, 2]`, `[2, 2, 4]`) - // nulls on one side - b.assertBinop(compute.Subtract, `[null, 4, null]`, `[2, 1, 0]`, `[null, 3, null]`) - b.assertBinop(compute.Subtract, `[3, 4, 5]`, `[null, 1, null]`, `[null, 3, null]`) - // nulls on both sides - b.assertBinop(compute.Subtract, `[null, 4, 3]`, `[2, 1, null]`, `[null, 3, null]`) - // all nulls - b.assertBinop(compute.Subtract, `[null]`, `[null]`, `[null]`) - - // scalar on the left - b.assertBinopScalarValArr(compute.Subtract, 3, `[1, 2]`, `[2, 1]`) - b.assertBinopScalarValArr(compute.Subtract, 3, `[null, 2]`, `[null, 1]`) - b.assertBinopScalarArr(compute.Subtract, b.makeNullScalar(), `[1, 2]`, `[null, null]`) - b.assertBinopScalarArr(compute.Subtract, b.makeNullScalar(), `[null, 2]`, `[null, null]`) - // scalar on the right - b.assertBinopArrScalarVal(compute.Subtract, `[4, 5]`, 3, `[1, 2]`) - b.assertBinopArrScalarVal(compute.Subtract, `[null, 5]`, 3, `[null, 2]`) - b.assertBinopArrScalar(compute.Subtract, `[1, 2]`, b.makeNullScalar(), `[null, null]`) - b.assertBinopArrScalar(compute.Subtract, `[null, 2]`, b.makeNullScalar(), `[null, null]`) - - if !arrow.IsFloating(b.DataType().ID()) && overflow { - b.assertBinopErr(compute.Subtract, fmt.Sprintf("[%v]", b.min), fmt.Sprintf("[%v]", b.max), "overflow") - } - }) - } - }) -} - -func (b *BinaryArithmeticSuite[T]) TestMultiply() { - b.Run(b.DataType().String(), func() { - for _, overflow := range []bool{false, true} { - b.Run(fmt.Sprintf("no_overflow_check=%t", overflow), func() { - b.setOverflowCheck(overflow) - - b.assertBinop(compute.Multiply, `[]`, `[]`, `[]`) - b.assertBinop(compute.Multiply, `[3, 2, 6]`, `[1, 0, 2]`, `[3, 0, 12]`) - // nulls on one side - b.assertBinop(compute.Multiply, `[null, 2, null]`, `[4, 5, 6]`, `[null, 10, null]`) - b.assertBinop(compute.Multiply, `[4, 5, 6]`, `[null, 2, null]`, `[null, 10, null]`) - // nulls on both sides - b.assertBinop(compute.Multiply, `[null, 2, 3]`, `[4, 5, null]`, `[null, 10, null]`) - // all nulls - b.assertBinop(compute.Multiply, `[null]`, `[null]`, `[null]`) - - // scalar on left - b.assertBinopScalarValArr(compute.Multiply, 3, `[4, 5]`, `[12, 15]`) - b.assertBinopScalarValArr(compute.Multiply, 3, `[null, 5]`, `[null, 15]`) - b.assertBinopScalarArr(compute.Multiply, b.makeNullScalar(), `[1, 2]`, `[null, null]`) - b.assertBinopScalarArr(compute.Multiply, b.makeNullScalar(), `[null, 2]`, `[null, null]`) - // scalar on right - b.assertBinopArrScalarVal(compute.Multiply, `[4, 5]`, 3, `[12, 15]`) - b.assertBinopArrScalarVal(compute.Multiply, `[null, 5]`, 3, `[null, 15]`) - b.assertBinopArrScalar(compute.Multiply, `[1, 2]`, b.makeNullScalar(), `[null, null]`) - b.assertBinopArrScalar(compute.Multiply, `[null, 2]`, b.makeNullScalar(), `[null, null]`) - }) - } - }) -} - -func (b *BinaryArithmeticSuite[T]) TestDiv() { - b.Run(b.DataType().String(), func() { - for _, overflow := range []bool{false, true} { - b.Run(fmt.Sprintf("no_overflow_check=%t", overflow), func() { - b.setOverflowCheck(overflow) - - // empty arrays - b.assertBinop(compute.Divide, `[]`, `[]`, `[]`) - // ordinary arrays - b.assertBinop(compute.Divide, `[3, 2, 6]`, `[1, 1, 2]`, `[3, 2, 3]`) - // with nulls - b.assertBinop(compute.Divide, `[null, 10, 30, null, 20]`, `[1, 5, 2, 5, 10]`, `[null, 2, 15, null, 2]`) - if !arrow.IsFloating(b.DataType().ID()) { - // scalar divided by array - b.assertBinopScalarValArr(compute.Divide, 33, `[null, 1, 3, null, 2]`, `[null, 33, 11, null, 16]`) - // array divided by scalar - b.assertBinopArrScalarVal(compute.Divide, `[null, 10, 30, null, 2]`, 3, `[null, 3, 10, null, 0]`) - // scalar divided by scalar - b.assertBinopScalars(compute.Divide, 16, 7, 2) - } else { - b.assertBinop(compute.Divide, `[3.4, 0.64, 1.28]`, `[1, 2, 4]`, `[3.4, 0.32, 0.32]`) - b.assertBinop(compute.Divide, `[null, 1, 3.3, null, 2]`, `[1, 4, 2, 5, 0.1]`, `[null, 0.25, 1.65, null, 20]`) - b.assertBinopScalarValArr(compute.Divide, 10, `[null, 1, 2.5, null, 2, 5]`, `[null, 10, 4, null, 5, 2]`) - b.assertBinopArrScalarVal(compute.Divide, `[null, 1, 2.5, null, 2, 5]`, 10, `[null, 0.1, 0.25, null, 0.2, 0.5]`) - - b.assertBinop(compute.Divide, `[3.4, "Inf", "-Inf"]`, `[1, 2, 3]`, `[3.4, "Inf", "-Inf"]`) - b.setNansEqual(true) - b.assertBinop(compute.Divide, `[3.4, "NaN", 2.0]`, `[1, 2, 2.0]`, `[3.4, "NaN", 1.0]`) - b.assertBinopScalars(compute.Divide, 21, 3, 7) - } - }) - } - }) -} - -func (b *BinaryArithmeticSuite[T]) TestDivideByZero() { - if !arrow.IsFloating(b.DataType().ID()) { - for _, checkOverflow := range []bool{false, true} { - b.setOverflowCheck(checkOverflow) - b.assertBinopErr(compute.Divide, `[3, 2, 6]`, `[1, 1, 0]`, "divide by zero") - } - } else { - b.setOverflowCheck(true) - b.assertBinopErr(compute.Divide, `[3, 2, 6]`, `[1, 1, 0]`, "divide by zero") - b.assertBinopErr(compute.Divide, `[3, 2, 0]`, `[1, 1, 0]`, "divide by zero") - b.assertBinopErr(compute.Divide, `[3, 2, -6]`, `[1, 1, 0]`, "divide by zero") - - b.setOverflowCheck(false) - b.setNansEqual(true) - b.assertBinop(compute.Divide, `[3, 2, 6]`, `[1, 1, 0]`, `[3, 2, "Inf"]`) - b.assertBinop(compute.Divide, `[3, 2, 0]`, `[1, 1, 0]`, `[3, 2, "NaN"]`) - b.assertBinop(compute.Divide, `[3, 2, -6]`, `[1, 1, 0]`, `[3, 2, "-Inf"]`) - } -} - -func (b *BinaryArithmeticSuite[T]) TestPower() { - b.setNansEqual(true) - b.Run(b.DataType().String(), func() { - for _, checkOverflow := range []bool{false, true} { - b.Run(fmt.Sprintf("checkOverflow=%t", checkOverflow), func() { - b.setOverflowCheck(checkOverflow) - - b.assertBinop(compute.Power, `[]`, `[]`, `[]`) - if !arrow.IsFloating(b.DataType().ID()) { - b.assertBinop(compute.Power, `[3, 2, 6, 2]`, `[1, 1, 2, 0]`, `[3, 2, 36, 1]`) - b.assertBinop(compute.Power, `[null, 2, 3, null, 20]`, `[1, 6, 2, 5, 1]`, `[null, 64, 9, null, 20]`) - b.assertBinopScalarValArr(compute.Power, 3, `[null, 3, 4, null, 2]`, `[null, 27, 81, null, 9]`) - b.assertBinopArrScalarVal(compute.Power, `[null, 10, 3, null, 2]`, 2, `[null, 100, 9, null, 4]`) - b.assertBinopScalars(compute.Power, 4, 3, 64) - b.assertBinop(compute.Power, `[0, 1, 0]`, `[0, 0, 42]`, `[1, 1, 0]`) - - if checkOverflow { - b.assertBinopErr(compute.Power, fmt.Sprintf("[%v]", b.max), `[10]`, "overflow") - } else { - b.assertBinopScalars(compute.Power, b.max, 10, 1) - } - } else { - b.assertBinop(compute.Power, `[3.4, 16, 0.64, 1.2, 0]`, `[1, 0.5, 2, 4, 0]`, `[3.4, 4, 0.4096, 2.0736, 1]`) - b.assertBinop(compute.Power, `[null, 1, 3.3, null, 2]`, `[1, 4, 2, 5, 0.1]`, `[null, 1, 10.89, null, 1.07177346]`) - b.assertBinopScalarValArr(compute.Power, 10, `[null, 1, 2.5, null, 2, 5]`, `[null, 10, 316.227766017, null, 100, 100000]`) - b.assertBinopArrScalarVal(compute.Power, `[null, 1, 2.5, null, 2, 5]`, 10, `[null, 1, 9536.74316406, null, 1024, 9765625]`) - b.assertBinop(compute.Power, `[3.4, "Inf", "-Inf", 1.1, 10000]`, `[1, 2, 3, "Inf", 100000]`, `[3.4, "Inf", "-Inf", "Inf", "Inf"]`) - b.assertBinop(compute.Power, `[3.4, "NaN", 2.0]`, `[1, 2, 2.0]`, `[3.4, "NaN", 4.0]`) - b.assertBinop(compute.Power, `[0.0, 0.0]`, `[-1.0, -3.0]`, `["Inf", "Inf"]`) - } - }) - } - }) -} - -type BinaryFloatingArithmeticSuite[T constraints.Float] struct { - BinaryArithmeticSuite[T] - - smallest T -} - -func (bs *BinaryFloatingArithmeticSuite[T]) TestTrigAtan2() { - bs.setNansEqual(true) - atan2 := func(ctx context.Context, _ compute.ArithmeticOptions, x, y compute.Datum) (compute.Datum, error) { - return compute.Atan2(ctx, x, y) - } - - bs.assertBinop(atan2, `[]`, `[]`, `[]`) - bs.assertBinop(atan2, `[0, 0, null, "NaN"]`, `[null, "NaN", 0, 0]`, `[null, "NaN", null, "NaN"]`) - bs.assertBinop(atan2, `[0, 0, -0.0, 0, -0.0, 0, 1, 0, -1, "Inf", "-Inf", 0, 0]`, - `[0, 0, 0, -0.0, -0.0, 1, 0, -1, 0, 0, 0, "Inf", "-Inf"]`, - fmt.Sprintf("[0, 0, -0.0, %f, %f, 0, %f, %f, %f, %f, %f, 0, %f]", - math.Pi, -math.Pi, math.Pi/2, math.Pi, -math.Pi/2, math.Pi/2, -math.Pi/2, math.Pi)) -} - -func (bs *BinaryFloatingArithmeticSuite[T]) TestLog() { - bs.setNansEqual(true) - for _, overflow := range []bool{false, true} { - bs.setOverflowCheck(overflow) - bs.assertBinop(compute.Logb, `[1, 10, null, "NaN", "Inf"]`, `[100, 10, null, 2, 10]`, - `[0, 1, null, "NaN", "Inf"]`) - bs.assertBinopScalars(compute.Logb, bs.smallest, 10, T(math.Log(float64(bs.smallest))/math.Log(10))) - bs.assertBinopScalars(compute.Logb, bs.max, 10, T(math.Log(float64(bs.max))/math.Log(10))) - } - - bs.setOverflowCheck(true) - bs.assertBinop(compute.Logb, `[1, 10, null]`, `[10, 10, null]`, `[0, 1, null]`) - bs.assertBinop(compute.Logb, `[1, 2, null]`, `[2, 2, null]`, `[0, 1, null]`) - bs.assertBinopArrScalarVal(compute.Logb, `[10, 100, 1000, null]`, 10, `[1, 2, 3, null]`) - bs.assertBinopArrScalarVal(compute.Logb, `[1, 2, 4, 8]`, 0.25, `[-0.0, -0.5, -1.0, -1.5]`) - - bs.setOverflowCheck(false) - bs.assertBinopArrScalarVal(compute.Logb, `["-Inf", -1, 0, "Inf"]`, 10, `["NaN", "NaN", "-Inf", "Inf"]`) - bs.assertBinopArrScalarVal(compute.Logb, `["-Inf", -1, 0, "Inf"]`, 2, `["NaN", "NaN", "-Inf", "Inf"]`) - bs.assertBinop(compute.Logb, `["-Inf", -1, 0, "Inf"]`, `[2, 10, 0, 0]`, `["NaN", "NaN", "NaN", "NaN"]`) - bs.assertBinopArrScalarVal(compute.Logb, `["-Inf", -1, 0, "Inf"]`, 0, `["NaN", "NaN", "NaN", "NaN"]`) - bs.assertBinopArrScalarVal(compute.Logb, `["-Inf", -2, -1, "Inf"]`, 2, `["NaN", "NaN", "NaN", "Inf"]`) - - bs.setOverflowCheck(true) - bs.assertBinopErr(compute.Logb, `[0]`, `[2]`, "logarithm of zero") - bs.assertBinopErr(compute.Logb, `[2]`, `[0]`, "logarithm of zero") - bs.assertBinopErr(compute.Logb, `[-1]`, `[2]`, "logarithm of negative number") - bs.assertBinopErr(compute.Logb, `["-Inf"]`, `[2]`, "logarithm of negative number") -} - -type BinaryIntegralArithmeticSuite[T arrow.IntType | arrow.UintType] struct { - BinaryArithmeticSuite[T] -} - -func (b *BinaryIntegralArithmeticSuite[T]) TestShiftLeft() { - b.Run(b.DataType().String(), func() { - for _, overflow := range []bool{false, true} { - b.Run(fmt.Sprintf("check_overflow=%t", overflow), func() { - b.setOverflowCheck(overflow) - - b.assertBinop(compute.ShiftLeft, `[]`, `[]`, `[]`) - b.assertBinop(compute.ShiftLeft, `[0, 1, 2, 3]`, `[2, 3, 4, 5]`, `[0, 8, 32, 96]`) - b.assertBinop(compute.ShiftLeft, `[0, null, 2, 3]`, `[2, 3, 4, 5]`, `[0, null, 32, 96]`) - b.assertBinop(compute.ShiftLeft, `[0, 1, 2, 3]`, `[2, 3, null, 5]`, `[0, 8, null, 96]`) - b.assertBinop(compute.ShiftLeft, `[0, null, 2, 3]`, `[2, 3, null, 5]`, `[0, null, null, 96]`) - b.assertBinop(compute.ShiftLeft, `[null]`, `[null]`, `[null]`) - b.assertBinopScalarValArr(compute.ShiftLeft, 2, `[null, 5]`, `[null, 64]`) - b.assertBinopScalarArr(compute.ShiftLeft, b.makeNullScalar(), `[null, 5]`, `[null, null]`) - b.assertBinopArrScalarVal(compute.ShiftLeft, `[null, 5]`, 3, `[null, 40]`) - b.assertBinopArrScalar(compute.ShiftLeft, `[null, 5]`, b.makeNullScalar(), `[null, null]`) - }) - } - }) -} - -func (b *BinaryIntegralArithmeticSuite[T]) TestShiftRight() { - b.Run(b.DataType().String(), func() { - for _, overflow := range []bool{false, true} { - b.Run(fmt.Sprintf("check_overflow=%t", overflow), func() { - b.setOverflowCheck(overflow) - - b.assertBinop(compute.ShiftRight, `[]`, `[]`, `[]`) - b.assertBinop(compute.ShiftRight, `[0, 1, 4, 8]`, `[1, 1, 1, 4]`, `[0, 0, 2, 0]`) - b.assertBinop(compute.ShiftRight, `[0, null, 4, 8]`, `[1, 1, 1, 4]`, `[0, null, 2, 0]`) - b.assertBinop(compute.ShiftRight, `[0, 1, 4, 8]`, `[1, 1, null, 4]`, `[0, 0, null, 0]`) - b.assertBinop(compute.ShiftRight, `[0, null, 4, 8]`, `[1, 1, null, 4]`, `[0, null, null, 0]`) - b.assertBinop(compute.ShiftRight, `[null]`, `[null]`, `[null]`) - b.assertBinopScalarValArr(compute.ShiftRight, 64, `[null, 2, 6]`, `[null, 16, 1]`) - b.assertBinopScalarArr(compute.ShiftRight, b.makeNullScalar(), `[null, 2, 6]`, `[null, null, null]`) - b.assertBinopArrScalarVal(compute.ShiftRight, `[null, 3, 96]`, 3, `[null, 0, 12]`) - b.assertBinopArrScalar(compute.ShiftRight, `[null, 3, 96]`, b.makeNullScalar(), `[null, null, null]`) - }) - } - }) -} - -func (b *BinaryIntegralArithmeticSuite[T]) TestShiftLeftOverflowError() { - b.Run(b.DataType().String(), func() { - bitWidth := b.DataType().(arrow.FixedWidthDataType).BitWidth() - if !arrow.IsUnsignedInteger(b.DataType().ID()) { - bitWidth-- - } - - b.setOverflowCheck(true) - b.assertBinop(compute.ShiftLeft, `[1]`, fmt.Sprintf("[%d]", bitWidth-1), - fmt.Sprintf("[%d]", T(1)<<(bitWidth-1))) - b.assertBinop(compute.ShiftLeft, `[2]`, fmt.Sprintf("[%d]", bitWidth-2), - fmt.Sprintf("[%d]", T(1)<<(bitWidth-1))) - if arrow.IsUnsignedInteger(b.DataType().ID()) { - b.assertBinop(compute.ShiftLeft, `[2]`, fmt.Sprintf("[%d]", bitWidth-1), `[0]`) - b.assertBinop(compute.ShiftLeft, `[4]`, fmt.Sprintf("[%d]", bitWidth-1), `[0]`) - b.assertBinopErr(compute.ShiftLeft, `[1]`, fmt.Sprintf("[%d]", bitWidth), "shift amount must be >= 0 and less than precision of type") - } else { - // shift a bit into the sign bit - b.assertBinop(compute.ShiftLeft, `[2]`, fmt.Sprintf("[%d]", bitWidth-1), - fmt.Sprintf("[%d]", b.min)) - // shift a bit past the sign bit - b.assertBinop(compute.ShiftLeft, `[4]`, fmt.Sprintf("[%d]", bitWidth-1), `[0]`) - b.assertBinop(compute.ShiftLeft, fmt.Sprintf("[%d]", b.min), `[1]`, `[0]`) - b.assertBinopErr(compute.ShiftLeft, `[1, 2]`, `[1, -1]`, "shift amount must be >= 0 and less than precision of type") - b.assertBinopErr(compute.ShiftLeft, `[1]`, fmt.Sprintf("[%d]", bitWidth), "shift amount must be >= 0 and less than precision of type") - - b.setOverflowCheck(false) - b.assertBinop(compute.ShiftLeft, `[1, 1]`, fmt.Sprintf("[-1, %d]", bitWidth), `[1, 1]`) - } - }) -} - -func (b *BinaryIntegralArithmeticSuite[T]) TestShiftRightOverflowError() { - b.Run(b.DataType().String(), func() { - bitWidth := b.DataType().(arrow.FixedWidthDataType).BitWidth() - if !arrow.IsUnsignedInteger(b.DataType().ID()) { - bitWidth-- - } - - b.setOverflowCheck(true) - - b.assertBinop(compute.ShiftRight, fmt.Sprintf("[%d]", b.max), fmt.Sprintf("[%d]", bitWidth-1), `[1]`) - if arrow.IsUnsignedInteger(b.DataType().ID()) { - b.assertBinopErr(compute.ShiftRight, `[1]`, fmt.Sprintf("[%d]", bitWidth), "shift amount must be >= 0 and less than precision of type") - } else { - b.assertBinop(compute.ShiftRight, `[-1, -1]`, `[1, 5]`, `[-1, -1]`) - b.assertBinop(compute.ShiftRight, fmt.Sprintf("[%d]", b.min), `[1]`, fmt.Sprintf("[%d]", b.min/2)) - - b.assertBinopErr(compute.ShiftRight, `[1, 2]`, `[1, -1]`, "shift amount must be >= 0 and less than precision of type") - b.assertBinopErr(compute.ShiftRight, `[1]`, fmt.Sprintf("[%d]", bitWidth), "shift amount must be >= 0 and less than precision of type") - - b.setOverflowCheck(false) - b.assertBinop(compute.ShiftRight, `[1, 1]`, fmt.Sprintf("[-1, %d]", bitWidth), `[1, 1]`) - } - }) -} - -func (b *BinaryIntegralArithmeticSuite[T]) TestTrig() { - // integer arguments promoted to float64, sanity check here - ty := b.DataType() - b.setNansEqual(true) - atan2 := func(ctx context.Context, _ compute.ArithmeticOptions, x, y compute.Datum) (compute.Datum, error) { - return compute.Atan2(ctx, x, y) - } - - lhs, rhs := b.getArr(ty, `[0, 1]`), b.getArr(ty, `[1, 0]`) - defer lhs.Release() - defer rhs.Release() - exp := b.getArr(arrow.PrimitiveTypes.Float64, fmt.Sprintf(`[0, %f]`, math.Pi/2)) - defer exp.Release() - - b.assertBinopArrs(atan2, lhs, rhs, exp) -} - -func (b *BinaryIntegralArithmeticSuite[T]) TestLog() { - // integer arguments promoted to double, sanity check here - exp1 := b.getArr(arrow.PrimitiveTypes.Float64, `[0, 1, null]`) - exp2 := b.getArr(arrow.PrimitiveTypes.Float64, `[1, 2, null]`) - defer exp1.Release() - defer exp2.Release() - - b.assertBinopExpArr(compute.Logb, `[1, 10, null]`, `[10, 10, null]`, exp1) - b.assertBinopExpArr(compute.Logb, `[1, 2, null]`, `[2, 2, null]`, exp1) - b.assertBinopArrScalarExpArr(compute.Logb, `[10, 100, null]`, scalar.MakeScalar(T(10)), exp2) -} - -func TestBinaryArithmetic(t *testing.T) { - suite.Run(t, &BinaryIntegralArithmeticSuite[int8]{BinaryArithmeticSuite[int8]{min: math.MinInt8, max: math.MaxInt8}}) - suite.Run(t, &BinaryIntegralArithmeticSuite[uint8]{BinaryArithmeticSuite[uint8]{min: 0, max: math.MaxUint8}}) - suite.Run(t, &BinaryIntegralArithmeticSuite[int16]{BinaryArithmeticSuite[int16]{min: math.MinInt16, max: math.MaxInt16}}) - suite.Run(t, &BinaryIntegralArithmeticSuite[uint16]{BinaryArithmeticSuite[uint16]{min: 0, max: math.MaxUint16}}) - suite.Run(t, &BinaryIntegralArithmeticSuite[int32]{BinaryArithmeticSuite[int32]{min: math.MinInt32, max: math.MaxInt32}}) - suite.Run(t, &BinaryIntegralArithmeticSuite[uint32]{BinaryArithmeticSuite[uint32]{min: 0, max: math.MaxUint32}}) - suite.Run(t, &BinaryIntegralArithmeticSuite[int64]{BinaryArithmeticSuite[int64]{min: math.MinInt64, max: math.MaxInt64}}) - suite.Run(t, &BinaryIntegralArithmeticSuite[uint64]{BinaryArithmeticSuite[uint64]{min: 0, max: math.MaxUint64}}) - suite.Run(t, &BinaryFloatingArithmeticSuite[float32]{BinaryArithmeticSuite[float32]{min: -math.MaxFloat32, max: math.MaxFloat32}, math.SmallestNonzeroFloat32}) - suite.Run(t, &BinaryFloatingArithmeticSuite[float64]{BinaryArithmeticSuite[float64]{min: -math.MaxFloat64, max: math.MaxFloat64}, math.SmallestNonzeroFloat64}) - suite.Run(t, new(Float16BinaryFuncTestSuite)) - suite.Run(t, new(DecimalBinaryArithmeticSuite)) - suite.Run(t, new(ScalarBinaryTemporalArithmeticSuite)) -} - -func TestBinaryArithmeticDispatchBest(t *testing.T) { - for _, name := range []string{"add", "sub", "multiply", "divide", "power"} { - for _, suffix := range []string{"", "_unchecked"} { - name += suffix - t.Run(name, func(t *testing.T) { - - tests := []struct { - left, right arrow.DataType - expected arrow.DataType - }{ - {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int32}, - {arrow.PrimitiveTypes.Int32, arrow.Null, arrow.PrimitiveTypes.Int32}, - {arrow.Null, arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int32}, - {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Int32}, - {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Int32}, - {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int32}, - {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Int64}, - {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Uint8, arrow.PrimitiveTypes.Int32}, - {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Int32}, - {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Uint32, arrow.PrimitiveTypes.Int64}, - {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Uint64, arrow.PrimitiveTypes.Int64}, - {arrow.PrimitiveTypes.Uint8, arrow.PrimitiveTypes.Uint8, arrow.PrimitiveTypes.Uint8}, - {arrow.PrimitiveTypes.Uint8, arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Uint16}, - {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Float32}, - {arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Float32}, - {arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Float64}, - {&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.PrimitiveTypes.Float64}, - arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Float64}, - {&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.PrimitiveTypes.Float64}, - arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Float64}, - } - - for _, tt := range tests { - CheckDispatchBest(t, name, []arrow.DataType{tt.left, tt.right}, []arrow.DataType{tt.expected, tt.expected}) - } - }) - } - } -} - -type DecimalArithmeticSuite struct { - BinaryFuncTestSuite -} - -func (*DecimalArithmeticSuite) positiveScales() []arrow.DataType { - return []arrow.DataType{ - &arrow.Decimal128Type{Precision: 4, Scale: 2}, - &arrow.Decimal256Type{Precision: 4, Scale: 2}, - &arrow.Decimal128Type{Precision: 38, Scale: 2}, - &arrow.Decimal256Type{Precision: 76, Scale: 2}, - } -} - -func (*DecimalArithmeticSuite) negativeScales() []arrow.DataType { - return []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: -2}, - &arrow.Decimal256Type{Precision: 2, Scale: -2}, - } -} - -func (ds *DecimalArithmeticSuite) checkDecimalToFloat(fn string, args []compute.Datum) { - // validate that fn(*decimals) is the same as - // fn([cast(x, float64) x for x in decimals]) - - newArgs := make([]compute.Datum, len(args)) - for i, arg := range args { - if arrow.IsDecimal(arg.(compute.ArrayLikeDatum).Type().ID()) { - casted, err := compute.CastDatum(ds.ctx, arg, compute.NewCastOptions(arrow.PrimitiveTypes.Float64, true)) - ds.Require().NoError(err) - defer casted.Release() - newArgs[i] = casted - } else { - newArgs[i] = arg - } - } - - expected, err := compute.CallFunction(ds.ctx, fn, nil, newArgs...) - ds.Require().NoError(err) - defer expected.Release() - actual, err := compute.CallFunction(ds.ctx, fn, nil, args...) - ds.Require().NoError(err) - defer actual.Release() - - assertDatumsEqual(ds.T(), expected, actual, []array.EqualOption{array.WithNaNsEqual(true)}, []scalar.EqualOption{scalar.WithNaNsEqual(true)}) -} - -func (ds *DecimalArithmeticSuite) checkFail(fn string, args []compute.Datum, substr string, opts compute.FunctionOptions) { - _, err := compute.CallFunction(ds.ctx, fn, opts, args...) - ds.ErrorIs(err, arrow.ErrInvalid) - ds.ErrorContains(err, substr) -} - -func (ds *DecimalArithmeticSuite) decimalArrayFromJSON(ty arrow.DataType, str string) arrow.Array { - arr, _, err := array.FromJSON(ds.mem, ty, strings.NewReader(str)) - ds.Require().NoError(err) - return arr -} - -type DecimalBinaryArithmeticSuite struct { - DecimalArithmeticSuite -} - -func (ds *DecimalBinaryArithmeticSuite) TestDispatchBest() { - // decimal, floating point - ds.Run("dec/floatingpoint", func() { - for _, fn := range []string{"add", "sub", "multiply", "divide"} { - for _, suffix := range []string{"", "_unchecked"} { - fn += suffix - ds.Run(fn, func() { - - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 1, Scale: 0}, - arrow.PrimitiveTypes.Float32}, []arrow.DataType{ - arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Float32}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal256Type{Precision: 1, Scale: 0}, arrow.PrimitiveTypes.Float64}, - []arrow.DataType{arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Float64}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - arrow.PrimitiveTypes.Float32, &arrow.Decimal256Type{Precision: 1, Scale: 0}}, - []arrow.DataType{arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Float32}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - arrow.PrimitiveTypes.Float64, &arrow.Decimal128Type{Precision: 1, Scale: 0}}, - []arrow.DataType{arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Float64}) - }) - } - } - }) - - // decimal, decimal => decimal - // decimal, integer => decimal - ds.Run("dec/dec_int", func() { - for _, fn := range []string{"add", "sub"} { - for _, suffix := range []string{"", "_unchecked"} { - fn += suffix - ds.Run(fn, func() { - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - arrow.PrimitiveTypes.Int64, &arrow.Decimal128Type{Precision: 1, Scale: 0}}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 19, Scale: 0}, - &arrow.Decimal128Type{Precision: 1, Scale: 0}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 1, Scale: 0}, arrow.PrimitiveTypes.Int64}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 1, Scale: 0}, - &arrow.Decimal128Type{Precision: 19, Scale: 0}}) - - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 2, Scale: 1}, - &arrow.Decimal128Type{Precision: 2, Scale: 1}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal256Type{Precision: 2, Scale: 1}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal256Type{Precision: 2, Scale: 1}, - &arrow.Decimal256Type{Precision: 2, Scale: 1}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal256Type{Precision: 2, Scale: 1}, - &arrow.Decimal256Type{Precision: 2, Scale: 1}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal256Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal256Type{Precision: 2, Scale: 1}, - &arrow.Decimal256Type{Precision: 2, Scale: 1}}) - - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 0}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 3, Scale: 1}, - &arrow.Decimal128Type{Precision: 2, Scale: 1}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 0}}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 2, Scale: 1}, - &arrow.Decimal128Type{Precision: 3, Scale: 1}}) - }) - } - } - }) - - { - fn := "multiply" - for _, suffix := range []string{"", "_unchecked"} { - fn += suffix - ds.Run(fn, func() { - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - arrow.PrimitiveTypes.Int64, &arrow.Decimal128Type{Precision: 1}}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 19}, - &arrow.Decimal128Type{Precision: 1}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 1}, arrow.PrimitiveTypes.Int64}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 1}, - &arrow.Decimal128Type{Precision: 19}}) - - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 2, Scale: 1}, - &arrow.Decimal128Type{Precision: 2, Scale: 1}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal256Type{Precision: 2, Scale: 1}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal256Type{Precision: 2, Scale: 1}, - &arrow.Decimal256Type{Precision: 2, Scale: 1}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal256Type{Precision: 2, Scale: 1}, - &arrow.Decimal256Type{Precision: 2, Scale: 1}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal256Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal256Type{Precision: 2, Scale: 1}, - &arrow.Decimal256Type{Precision: 2, Scale: 1}}) - - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 0}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 2, Scale: 0}, - &arrow.Decimal128Type{Precision: 2, Scale: 1}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 0}}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 2, Scale: 1}, - &arrow.Decimal128Type{Precision: 2, Scale: 0}}) - }) - } - } - - { - fn := "divide" - for _, suffix := range []string{"", "_unchecked"} { - fn += suffix - ds.Run(fn, func() { - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - arrow.PrimitiveTypes.Int64, &arrow.Decimal128Type{Precision: 1, Scale: 0}}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 23, Scale: 4}, - &arrow.Decimal128Type{Precision: 1, Scale: 0}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 1, Scale: 0}, arrow.PrimitiveTypes.Int64}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 21, Scale: 20}, - &arrow.Decimal128Type{Precision: 19, Scale: 0}}) - - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 6, Scale: 5}, - &arrow.Decimal128Type{Precision: 2, Scale: 1}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal256Type{Precision: 2, Scale: 1}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal256Type{Precision: 6, Scale: 5}, - &arrow.Decimal256Type{Precision: 2, Scale: 1}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal256Type{Precision: 6, Scale: 5}, - &arrow.Decimal256Type{Precision: 2, Scale: 1}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal256Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal256Type{Precision: 6, Scale: 5}, - &arrow.Decimal256Type{Precision: 2, Scale: 1}}) - - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 0}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 7, Scale: 5}, - &arrow.Decimal128Type{Precision: 2, Scale: 1}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 0}}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 5, Scale: 4}, - &arrow.Decimal128Type{Precision: 2, Scale: 0}}) - }) - } - } - - for _, name := range []string{"power", "power_unchecked", "atan2", "logb", "logb_unchecked"} { - ds.Run(name, func() { - CheckDispatchBest(ds.T(), name, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, - []arrow.DataType{arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Float64}) - CheckDispatchBest(ds.T(), name, []arrow.DataType{ - &arrow.Decimal256Type{Precision: 2, Scale: 1}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}, - []arrow.DataType{arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Float64}) - CheckDispatchBest(ds.T(), name, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 1}, arrow.PrimitiveTypes.Int64}, - []arrow.DataType{arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Float64}) - CheckDispatchBest(ds.T(), name, []arrow.DataType{ - arrow.PrimitiveTypes.Int32, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, - []arrow.DataType{arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Float64}) - CheckDispatchBest(ds.T(), name, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 1}, arrow.PrimitiveTypes.Float64}, - []arrow.DataType{arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Float64}) - CheckDispatchBest(ds.T(), name, []arrow.DataType{ - arrow.PrimitiveTypes.Float32, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, - []arrow.DataType{arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Float64}) - }) - } -} - -func (ds *DecimalBinaryArithmeticSuite) TestAddSubtractDec128() { - left, _, _ := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 30, Scale: 3}, - strings.NewReader(`["1.000", "-123456789012345678901234567.890", "98765432109876543210.987", "-999999999999999999999999999.999"]`)) - defer left.Release() - right, _, _ := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 20, Scale: 9}, - strings.NewReader(`["-1.000000000", "12345678901.234567890", "98765.432101234", "-99999999999.999999999"]`)) - defer right.Release() - added, _, _ := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 37, Scale: 9}, - strings.NewReader(`["0.000000000", "-123456789012345666555555666.655432110", "98765432109876641976.419101234", "-1000000000000000099999999999.998999999"]`)) - defer added.Release() - subtracted, _, _ := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 37, Scale: 9}, - strings.NewReader(`["2.000000000", "-123456789012345691246913469.124567890", "98765432109876444445.554898766", "-999999999999999899999999999.999000001"]`)) - defer subtracted.Release() - - leftDatum, rightDatum := &compute.ArrayDatum{Value: left.Data()}, &compute.ArrayDatum{Value: right.Data()} - checkScalarBinary(ds.T(), "add", leftDatum, rightDatum, &compute.ArrayDatum{Value: added.Data()}, nil) - checkScalarBinary(ds.T(), "sub", leftDatum, rightDatum, &compute.ArrayDatum{Value: subtracted.Data()}, nil) -} - -func (ds *DecimalBinaryArithmeticSuite) TestAddSubtractDec256() { - left, _, _ := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 30, Scale: 20}, - strings.NewReader(`[ - "-1.00000000000000000001", - "1234567890.12345678900000000000", - "-9876543210.09876543210987654321", - "9999999999.99999999999999999999" - ]`)) - defer left.Release() - right, _, _ := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 30, Scale: 10}, - strings.NewReader(`[ - "1.0000000000", - "-1234567890.1234567890", - "6789.5432101234", - "99999999999999999999.9999999999" - ]`)) - defer right.Release() - added, _, _ := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 41, Scale: 20}, - strings.NewReader(`[ - "-0.00000000000000000001", - "0.00000000000000000000", - "-9876536420.55555530870987654321", - "100000000009999999999.99999999989999999999" - ]`)) - defer added.Release() - subtracted, _, _ := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 41, Scale: 20}, - strings.NewReader(`[ - "-2.00000000000000000001", - "2469135780.24691357800000000000", - "-9876549999.64197555550987654321", - "-99999999989999999999.99999999990000000001" - ]`)) - defer subtracted.Release() - - leftDatum, rightDatum := &compute.ArrayDatum{Value: left.Data()}, &compute.ArrayDatum{Value: right.Data()} - checkScalarBinary(ds.T(), "add", leftDatum, rightDatum, &compute.ArrayDatum{Value: added.Data()}, nil) - checkScalarBinary(ds.T(), "sub", leftDatum, rightDatum, &compute.ArrayDatum{Value: subtracted.Data()}, nil) -} - -func (ds *DecimalBinaryArithmeticSuite) TestAddSubScalars() { - ds.Run("scalar_array", func() { - left := scalar.NewDecimal128Scalar(decimal128.New(0, 123456), &arrow.Decimal128Type{Precision: 6, Scale: 1}) - right, _, _ := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 10, Scale: 3}, - strings.NewReader(`["1.234", "1234.000", "-9876.543", "666.888"]`)) - defer right.Release() - added, _, _ := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 11, Scale: 3}, - strings.NewReader(`["12346.834", "13579.600", "2469.057", "13012.488"]`)) - defer added.Release() - leftSubRight, _, _ := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 11, Scale: 3}, - strings.NewReader(`["12344.366", "11111.600", "22222.143", "11678.712"]`)) - defer leftSubRight.Release() - rightSubLeft, _, _ := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 11, Scale: 3}, - strings.NewReader(`["-12344.366", "-11111.600", "-22222.143", "-11678.712"]`)) - defer rightSubLeft.Release() - - rightDatum := &compute.ArrayDatum{right.Data()} - addedDatum := &compute.ArrayDatum{added.Data()} - checkScalarBinary(ds.T(), "add", compute.NewDatum(left), rightDatum, addedDatum, nil) - checkScalarBinary(ds.T(), "add", rightDatum, compute.NewDatum(left), addedDatum, nil) - checkScalarBinary(ds.T(), "sub", compute.NewDatum(left), rightDatum, &compute.ArrayDatum{leftSubRight.Data()}, nil) - checkScalarBinary(ds.T(), "sub", rightDatum, compute.NewDatum(left), &compute.ArrayDatum{rightSubLeft.Data()}, nil) - }) - - ds.Run("scalar_scalar", func() { - left := scalar.NewDecimal256Scalar(decimal256.FromU64(666), &arrow.Decimal256Type{Precision: 3}) - right := scalar.NewDecimal256Scalar(decimal256.FromU64(888), &arrow.Decimal256Type{Precision: 3}) - added := scalar.NewDecimal256Scalar(decimal256.FromU64(1554), &arrow.Decimal256Type{Precision: 4}) - subtracted := scalar.NewDecimal256Scalar(decimal256.FromI64(-222), &arrow.Decimal256Type{Precision: 4}) - checkScalarBinary(ds.T(), "add", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(added), nil) - checkScalarBinary(ds.T(), "sub", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(subtracted), nil) - }) - - ds.Run("dec128_dec256", func() { - left := scalar.NewDecimal128Scalar(decimal128.FromU64(666), &arrow.Decimal128Type{Precision: 3}) - right := scalar.NewDecimal256Scalar(decimal256.FromU64(888), &arrow.Decimal256Type{Precision: 3}) - added := scalar.NewDecimal256Scalar(decimal256.FromU64(1554), &arrow.Decimal256Type{Precision: 4}) - checkScalarBinary(ds.T(), "add", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(added), nil) - checkScalarBinary(ds.T(), "add", compute.NewDatum(right), compute.NewDatum(left), compute.NewDatum(added), nil) - }) - - ds.Run("decimal_float", func() { - left := scalar.NewDecimal128Scalar(decimal128.FromU64(666), &arrow.Decimal128Type{Precision: 3}) - right := scalar.MakeScalar(float64(888)) - added := scalar.MakeScalar(float64(1554)) - checkScalarBinary(ds.T(), "add", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(added), nil) - checkScalarBinary(ds.T(), "add", compute.NewDatum(right), compute.NewDatum(left), compute.NewDatum(added), nil) - }) - - ds.Run("decimal_integer", func() { - left := scalar.NewDecimal128Scalar(decimal128.FromU64(666), &arrow.Decimal128Type{Precision: 3}) - right := scalar.MakeScalar(int64(888)) - added := scalar.NewDecimal128Scalar(decimal128.FromU64(1554), &arrow.Decimal128Type{Precision: 20}) - subtracted := scalar.NewDecimal128Scalar(decimal128.FromI64(-222), &arrow.Decimal128Type{Precision: 20}) - checkScalarBinary(ds.T(), "add", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(added), nil) - checkScalarBinary(ds.T(), "sub", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(subtracted), nil) - }) -} - -func (ds *DecimalBinaryArithmeticSuite) TestMultiply() { - ds.Run("array x array, decimal128", func() { - left, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 20, Scale: 10}, - strings.NewReader(`["1234567890.1234567890", "-0.0000000001", "-9999999999.9999999999"]`)) - ds.Require().NoError(err) - defer left.Release() - right, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 13, Scale: 3}, - strings.NewReader(`["1234567890.123", "0.001", "-9999999999.999"]`)) - ds.Require().NoError(err) - defer right.Release() - expected, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 34, Scale: 13}, - strings.NewReader(`["1524157875323319737.98709039504701", "-0.0000000000001", "99999999999989999999.0000000000001"]`)) - ds.Require().NoError(err) - defer expected.Release() - - checkScalarBinary(ds.T(), "multiply_unchecked", &compute.ArrayDatum{left.Data()}, &compute.ArrayDatum{right.Data()}, &compute.ArrayDatum{expected.Data()}, nil) - }) - - ds.Run("array x array decimal256", func() { - left, _, err := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 30, Scale: 3}, - strings.NewReader(`["123456789012345678901234567.890", "0.000"]`)) - ds.Require().NoError(err) - defer left.Release() - right, _, err := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 20, Scale: 9}, - strings.NewReader(`["-12345678901.234567890", "99999999999.999999999"]`)) - ds.Require().NoError(err) - defer right.Release() - expected, _, err := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 51, Scale: 12}, - strings.NewReader(`["-1524157875323883675034293577501905199.875019052100", "0.000000000000"]`)) - ds.Require().NoError(err) - defer expected.Release() - checkScalarBinary(ds.T(), "multiply_unchecked", &compute.ArrayDatum{left.Data()}, &compute.ArrayDatum{right.Data()}, &compute.ArrayDatum{expected.Data()}, nil) - }) - - ds.Run("scalar x array", func() { - left, err := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 3, Scale: 2}, "3.14") - ds.Require().NoError(err) - right, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 1, Scale: 0}, - strings.NewReader(`["1", "2", "3", "4", "5"]`)) - ds.Require().NoError(err) - defer right.Release() - expected, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 5, Scale: 2}, - strings.NewReader(`["3.14", "6.28", "9.42", "12.56", "15.70"]`)) - ds.Require().NoError(err) - defer expected.Release() - - leftDatum, rightDatum := &compute.ScalarDatum{left}, &compute.ArrayDatum{right.Data()} - expDatum := &compute.ArrayDatum{expected.Data()} - - checkScalarBinary(ds.T(), "multiply_unchecked", leftDatum, rightDatum, expDatum, nil) - checkScalarBinary(ds.T(), "multiply_unchecked", rightDatum, leftDatum, expDatum, nil) - }) - - ds.Run("scalar x scalar", func() { - left, err := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 1}, "1") - ds.Require().NoError(err) - right, err := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 1}, "1") - ds.Require().NoError(err) - expected, err := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 3}, "1") - ds.Require().NoError(err) - checkScalarBinary(ds.T(), "multiply_unchecked", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(expected), nil) - }) - - ds.Run("decimal128 x decimal256", func() { - left, _ := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 3, Scale: 2}, "6.66") - right, _ := scalar.ParseScalar(&arrow.Decimal256Type{Precision: 3, Scale: 1}, "88.8") - expected, _ := scalar.ParseScalar(&arrow.Decimal256Type{Precision: 7, Scale: 3}, "591.408") - checkScalarBinary(ds.T(), "multiply_unchecked", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(expected), nil) - checkScalarBinary(ds.T(), "multiply_unchecked", compute.NewDatum(right), compute.NewDatum(left), compute.NewDatum(expected), nil) - }) - - ds.Run("decimal x float", func() { - left, _ := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 3}, "666") - right := scalar.MakeScalar(float64(888)) - expected := scalar.MakeScalar(float64(591408)) - checkScalarBinary(ds.T(), "multiply_unchecked", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(expected), nil) - checkScalarBinary(ds.T(), "multiply_unchecked", compute.NewDatum(right), compute.NewDatum(left), compute.NewDatum(expected), nil) - }) - - ds.Run("decimal x integer", func() { - left, _ := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 3}, "666") - right := scalar.MakeScalar(int64(888)) - expected, _ := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 23}, "591408") - checkScalarBinary(ds.T(), "multiply_unchecked", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(expected), nil) - }) -} - -func (ds *DecimalBinaryArithmeticSuite) TestDivide() { - ds.Run("array / array, decimal128", func() { - left, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 13, Scale: 3}, - strings.NewReader(`["1234567890.123", "0.001"]`)) - ds.Require().NoError(err) - defer left.Release() - right, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 3, Scale: 0}, - strings.NewReader(`["-987", "999"]`)) - ds.Require().NoError(err) - defer right.Release() - expected, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 17, Scale: 7}, - strings.NewReader(`["-1250828.6627386", "0.0000010"]`)) - ds.Require().NoError(err) - defer expected.Release() - - checkScalarBinary(ds.T(), "divide_unchecked", &compute.ArrayDatum{left.Data()}, &compute.ArrayDatum{right.Data()}, &compute.ArrayDatum{expected.Data()}, nil) - }) - - ds.Run("array / array decimal256", func() { - left, _, err := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 20, Scale: 10}, - strings.NewReader(`["1234567890.1234567890", "9999999999.9999999999"]`)) - ds.Require().NoError(err) - defer left.Release() - right, _, err := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 13, Scale: 3}, - strings.NewReader(`["1234567890.123", "0.001"]`)) - ds.Require().NoError(err) - defer right.Release() - expected, _, err := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 34, Scale: 21}, - strings.NewReader(`["1.000000000000369999093", "9999999999999.999999900000000000000"]`)) - ds.Require().NoError(err) - defer expected.Release() - checkScalarBinary(ds.T(), "divide_unchecked", &compute.ArrayDatum{left.Data()}, &compute.ArrayDatum{right.Data()}, &compute.ArrayDatum{expected.Data()}, nil) - }) - - ds.Run("scalar / array", func() { - left, err := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 1, Scale: 0}, "1") - ds.Require().NoError(err) - right, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 1, Scale: 0}, - strings.NewReader(`["1", "2", "3", "4"]`)) - ds.Require().NoError(err) - defer right.Release() - leftDivRight, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 5, Scale: 4}, - strings.NewReader(`["1.0000", "0.5000", "0.3333", "0.2500"]`)) - ds.Require().NoError(err) - defer leftDivRight.Release() - rightDivLeft, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 5, Scale: 4}, - strings.NewReader(`["1.0000", "2.0000", "3.0000", "4.0000"]`)) - ds.Require().NoError(err) - defer rightDivLeft.Release() - - leftDatum, rightDatum := &compute.ScalarDatum{left}, &compute.ArrayDatum{right.Data()} - - checkScalarBinary(ds.T(), "divide_unchecked", leftDatum, rightDatum, &compute.ArrayDatum{leftDivRight.Data()}, nil) - checkScalarBinary(ds.T(), "divide_unchecked", rightDatum, leftDatum, &compute.ArrayDatum{rightDivLeft.Data()}, nil) - }) - - ds.Run("scalar / scalar", func() { - left, err := scalar.ParseScalar(&arrow.Decimal256Type{Precision: 6, Scale: 5}, "2.71828") - ds.Require().NoError(err) - right, err := scalar.ParseScalar(&arrow.Decimal256Type{Precision: 6, Scale: 5}, "3.14159") - ds.Require().NoError(err) - expected, err := scalar.ParseScalar(&arrow.Decimal256Type{Precision: 13, Scale: 7}, "0.8652561") - ds.Require().NoError(err) - checkScalarBinary(ds.T(), "divide_unchecked", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(expected), nil) - }) - - ds.Run("decimal128 / decimal256", func() { - left, err := scalar.ParseScalar(&arrow.Decimal256Type{Precision: 6, Scale: 5}, "2.71828") - ds.Require().NoError(err) - right, err := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 6, Scale: 5}, "3.14159") - ds.Require().NoError(err) - leftDivRight, err := scalar.ParseScalar(&arrow.Decimal256Type{Precision: 13, Scale: 7}, "0.8652561") - ds.Require().NoError(err) - rightDivLeft, err := scalar.ParseScalar(&arrow.Decimal256Type{Precision: 13, Scale: 7}, "1.1557271") - ds.Require().NoError(err) - checkScalarBinary(ds.T(), "divide_unchecked", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(leftDivRight), nil) - checkScalarBinary(ds.T(), "divide_unchecked", compute.NewDatum(right), compute.NewDatum(left), compute.NewDatum(rightDivLeft), nil) - }) - - ds.Run("decimal / float", func() { - left, _ := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 3}, "100") - right := scalar.MakeScalar(float64(50)) - leftDivRight := scalar.MakeScalar(float64(2)) - rightDivLeft := scalar.MakeScalar(float64(0.5)) - checkScalarBinary(ds.T(), "divide_unchecked", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(leftDivRight), nil) - checkScalarBinary(ds.T(), "divide_unchecked", compute.NewDatum(right), compute.NewDatum(left), compute.NewDatum(rightDivLeft), nil) - }) - - ds.Run("decimal / integer", func() { - left, _ := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 3}, "100") - right := scalar.MakeScalar(int64(50)) - leftDivRight, _ := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 23, Scale: 20}, "2.0000000000000000000") - rightDivLeft, _ := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 23, Scale: 4}, "0.5000") - checkScalarBinary(ds.T(), "divide_unchecked", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(leftDivRight), nil) - checkScalarBinary(ds.T(), "divide_unchecked", compute.NewDatum(right), compute.NewDatum(left), compute.NewDatum(rightDivLeft), nil) - }) -} - -func (ds *DecimalBinaryArithmeticSuite) TestAtan2() { - // decimal arguments get promoted to float64, sanity check here - fn := "atan2" - for _, ty := range ds.positiveScales() { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}, &compute.ArrayDatum{empty.Data()}}) - - larr := ds.getArr(ty, `["1.00", "10.00", "1.00", "2.00", null]`) - defer larr.Release() - - ldatum := &compute.ArrayDatum{larr.Data()} - - test := ds.getArr(ty, `["10.00", "10.00", "2.00", "2.00", null]`) - defer test.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{ldatum, - &compute.ArrayDatum{test.Data()}}) - - test = ds.getArr(&arrow.Decimal128Type{Precision: 4, Scale: 2}, `["10.00", "10.00", "2.00", "2.00", null]`) - defer test.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{ldatum, - &compute.ArrayDatum{test.Data()}}) - - ds.checkDecimalToFloat(fn, []compute.Datum{ldatum, - compute.NewDatum(scalar.MakeScalar(int64(10)))}) - ds.checkDecimalToFloat(fn, []compute.Datum{ldatum, - compute.NewDatum(scalar.MakeScalar(float64(10)))}) - - larr = ds.getArr(arrow.PrimitiveTypes.Float64, `[1, 10, 1, 2, null]`) - defer larr.Release() - - sc, _ := scalar.MakeScalarParam("10.00", ty) - ds.checkDecimalToFloat(fn, []compute.Datum{ - &compute.ArrayDatum{larr.Data()}, - compute.NewDatum(sc)}) - - larr = ds.getArr(arrow.PrimitiveTypes.Int64, `[1, 10, 1, 2, null]`) - defer larr.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{ - &compute.ArrayDatum{larr.Data()}, - compute.NewDatum(sc)}) - } - - for _, ty := range ds.negativeScales() { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}, &compute.ArrayDatum{empty.Data()}}) - - larr := ds.getArr(ty, `["12E2", "42E2", null]`) - defer larr.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{ - &compute.ArrayDatum{larr.Data()}, &compute.ArrayDatum{larr.Data()}}) - - rarr := ds.getArr(&arrow.Decimal128Type{Precision: 2, Scale: -2}, `["12E2", "42E2", null]`) - defer rarr.Release() - - ds.checkDecimalToFloat(fn, []compute.Datum{ - &compute.ArrayDatum{larr.Data()}, &compute.ArrayDatum{rarr.Data()}}) - ds.checkDecimalToFloat(fn, []compute.Datum{ - &compute.ArrayDatum{larr.Data()}, compute.NewDatum(scalar.MakeScalar(int64(10)))}) - } -} - -func (ds *DecimalBinaryArithmeticSuite) TestLogb() { - // decimal arguments get promoted to float64, sanity check here - for _, fn := range []string{"logb", "logb_unchecked"} { - ds.Run(fn, func() { - for _, ty := range ds.positiveScales() { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}, &compute.ArrayDatum{empty.Data()}}) - - larr := ds.getArr(ty, `["1.00", "10.00", "1.00", "2.00", null]`) - defer larr.Release() - - ldatum := &compute.ArrayDatum{larr.Data()} - - test := ds.getArr(ty, `["10.00", "10.00", "2.00", "2.00", null]`) - defer test.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{ldatum, - &compute.ArrayDatum{test.Data()}}) - - test = ds.getArr(&arrow.Decimal128Type{Precision: 4, Scale: 2}, `["10.00", "10.00", "2.00", "2.00", null]`) - defer test.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{ldatum, - &compute.ArrayDatum{test.Data()}}) - - ds.checkDecimalToFloat(fn, []compute.Datum{ldatum, - compute.NewDatum(scalar.MakeScalar(int64(10)))}) - ds.checkDecimalToFloat(fn, []compute.Datum{ldatum, - compute.NewDatum(scalar.MakeScalar(float64(10)))}) - - larr = ds.getArr(arrow.PrimitiveTypes.Float64, `[1, 10, 1, 2, null]`) - defer larr.Release() - - sc, _ := scalar.MakeScalarParam("10.00", ty) - ds.checkDecimalToFloat(fn, []compute.Datum{ - &compute.ArrayDatum{larr.Data()}, - compute.NewDatum(sc)}) - - larr = ds.getArr(arrow.PrimitiveTypes.Int64, `[1, 10, 1, 2, null]`) - defer larr.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{ - &compute.ArrayDatum{larr.Data()}, - compute.NewDatum(sc)}) - } - - for _, ty := range ds.negativeScales() { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}, &compute.ArrayDatum{empty.Data()}}) - - larr := ds.getArr(ty, `["12E2", "42E2", null]`) - defer larr.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{ - &compute.ArrayDatum{larr.Data()}, &compute.ArrayDatum{larr.Data()}}) - - rarr := ds.getArr(&arrow.Decimal128Type{Precision: 2, Scale: -2}, `["12E2", "42E2", null]`) - defer rarr.Release() - - ds.checkDecimalToFloat(fn, []compute.Datum{ - &compute.ArrayDatum{larr.Data()}, &compute.ArrayDatum{rarr.Data()}}) - ds.checkDecimalToFloat(fn, []compute.Datum{ - &compute.ArrayDatum{larr.Data()}, compute.NewDatum(scalar.MakeScalar(int64(10)))}) - } - }) - } -} - -type DecimalUnaryArithmeticSuite struct { - DecimalArithmeticSuite -} - -func (ds *DecimalUnaryArithmeticSuite) TestAbsoluteValue() { - max128 := decimal128.GetMaxValue(38) - max256 := decimal256.GetMaxValue(76) - ds.Run("decimal", func() { - for _, fn := range []string{"abs_unchecked", "abs"} { - ds.Run(fn, func() { - for _, ty := range ds.positiveScales() { - ds.Run(ty.String(), func() { - empty, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`[]`)) - defer empty.Release() - in, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`["1.00", "-42.15", null]`)) - defer in.Release() - exp, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`["1.00", "42.15", null]`)) - defer exp.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, nil) - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{in.Data()}}, &compute.ArrayDatum{exp.Data()}, nil) - }) - } - - checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(scalar.NewDecimal128Scalar(max128.Negate(), &arrow.Decimal128Type{Precision: 38}))}, - compute.NewDatum(scalar.NewDecimal128Scalar(max128, &arrow.Decimal128Type{Precision: 38})), nil) - checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(scalar.NewDecimal256Scalar(max256.Negate(), &arrow.Decimal256Type{Precision: 76}))}, - compute.NewDatum(scalar.NewDecimal256Scalar(max256, &arrow.Decimal256Type{Precision: 76})), nil) - for _, ty := range ds.negativeScales() { - ds.Run(ty.String(), func() { - empty, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`[]`)) - defer empty.Release() - in, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`["12E2", "-42E2", null]`)) - defer in.Release() - exp, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`["12E2", "42E2", null]`)) - defer exp.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, nil) - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{in.Data()}}, &compute.ArrayDatum{exp.Data()}, nil) - }) - } - }) - } - }) -} - -func (ds *DecimalUnaryArithmeticSuite) TestNegate() { - max128 := decimal128.GetMaxValue(38) - max256 := decimal256.GetMaxValue(76) - - for _, fn := range []string{"negate_unchecked", "negate"} { - ds.Run(fn, func() { - for _, ty := range ds.positiveScales() { - empty, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`[]`)) - defer empty.Release() - in, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`["0.00", "1.00", "-42.15", null]`)) - defer in.Release() - exp, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`["0.00", "-1.00", "42.15", null]`)) - defer exp.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, nil) - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{in.Data()}}, &compute.ArrayDatum{exp.Data()}, nil) - } - - checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(scalar.NewDecimal128Scalar(max128.Negate(), &arrow.Decimal128Type{Precision: 38}))}, - compute.NewDatum(scalar.NewDecimal128Scalar(max128, &arrow.Decimal128Type{Precision: 38})), nil) - checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(scalar.NewDecimal256Scalar(max256.Negate(), &arrow.Decimal256Type{Precision: 76}))}, - compute.NewDatum(scalar.NewDecimal256Scalar(max256, &arrow.Decimal256Type{Precision: 76})), nil) - checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(scalar.NewDecimal128Scalar(max128, &arrow.Decimal128Type{Precision: 38}))}, - compute.NewDatum(scalar.NewDecimal128Scalar(max128.Negate(), &arrow.Decimal128Type{Precision: 38})), nil) - checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(scalar.NewDecimal256Scalar(max256, &arrow.Decimal256Type{Precision: 76}))}, - compute.NewDatum(scalar.NewDecimal256Scalar(max256.Negate(), &arrow.Decimal256Type{Precision: 76})), nil) - for _, ty := range ds.negativeScales() { - ds.Run(ty.String(), func() { - empty, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`[]`)) - defer empty.Release() - in, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`["0", "12E2", "-42E2", null]`)) - defer in.Release() - exp, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`["0", "-12E2", "42E2", null]`)) - defer exp.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, nil) - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{in.Data()}}, &compute.ArrayDatum{exp.Data()}, nil) - }) - } - }) - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestSquareRoot() { - for _, fn := range []string{"sqrt_unchecked", "sqrt"} { - ds.Run(fn, func() { - for _, ty := range ds.positiveScales() { - ds.Run(ty.String(), func() { - empty := ds.decimalArrayFromJSON(ty, `[]`) - defer empty.Release() - arr := ds.decimalArrayFromJSON(ty, `["4.00", "16.00", "36.00", null]`) - defer arr.Release() - - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{Value: empty.Data()}}) - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{Value: arr.Data()}}) - - neg := ds.decimalArrayFromJSON(ty, `["-2.00"]`) - defer neg.Release() - ds.checkFail("sqrt", []compute.Datum{&compute.ArrayDatum{Value: neg.Data()}}, "square root of negative number", nil) - }) - } - - for _, ty := range ds.negativeScales() { - ds.Run(ty.String(), func() { - empty := ds.decimalArrayFromJSON(ty, `[]`) - defer empty.Release() - arr := ds.decimalArrayFromJSON(ty, `["400", "1600", "3600", null]`) - defer arr.Release() - - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{Value: empty.Data()}}) - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{Value: arr.Data()}}) - - neg := ds.decimalArrayFromJSON(ty, `["-400"]`) - defer neg.Release() - ds.checkFail("sqrt", []compute.Datum{&compute.ArrayDatum{Value: neg.Data()}}, "square root of negative number", nil) - }) - } - }) - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestSign() { - max128 := decimal128.GetMaxValue(38) - max256 := decimal256.GetMaxValue(76) - - for _, ty := range ds.positiveScales() { - empty := ds.decimalArrayFromJSON(ty, `[]`) - defer empty.Release() - emptyOut := ds.decimalArrayFromJSON(arrow.PrimitiveTypes.Int64, `[]`) - defer emptyOut.Release() - in := ds.decimalArrayFromJSON(ty, `["1.00", "0.00", "-42.15", null]`) - defer in.Release() - exp := ds.decimalArrayFromJSON(arrow.PrimitiveTypes.Int64, `[1, 0, -1, null]`) - defer exp.Release() - - checkScalar(ds.T(), "sign", []compute.Datum{&compute.ArrayDatum{empty.Data()}}, - &compute.ArrayDatum{emptyOut.Data()}, nil) - checkScalar(ds.T(), "sign", []compute.Datum{&compute.ArrayDatum{in.Data()}}, - &compute.ArrayDatum{exp.Data()}, nil) - } - - checkScalar(ds.T(), "sign", []compute.Datum{compute.NewDatum( - scalar.NewDecimal128Scalar(max128, &arrow.Decimal128Type{Precision: 38}))}, - compute.NewDatum(scalar.MakeScalar(int64(1))), nil) - checkScalar(ds.T(), "sign", []compute.Datum{compute.NewDatum( - scalar.NewDecimal128Scalar(max128.Negate(), &arrow.Decimal128Type{Precision: 38}))}, - compute.NewDatum(scalar.MakeScalar(int64(-1))), nil) - checkScalar(ds.T(), "sign", []compute.Datum{compute.NewDatum( - scalar.NewDecimal256Scalar(max256, &arrow.Decimal256Type{Precision: 38}))}, - compute.NewDatum(scalar.MakeScalar(int64(1))), nil) - checkScalar(ds.T(), "sign", []compute.Datum{compute.NewDatum( - scalar.NewDecimal256Scalar(max256.Negate(), &arrow.Decimal256Type{Precision: 38}))}, - compute.NewDatum(scalar.MakeScalar(int64(-1))), nil) - - for _, ty := range ds.negativeScales() { - empty := ds.decimalArrayFromJSON(ty, `[]`) - defer empty.Release() - emptyOut := ds.decimalArrayFromJSON(arrow.PrimitiveTypes.Int64, `[]`) - defer emptyOut.Release() - in := ds.decimalArrayFromJSON(ty, `["12e2", "0.00", "-42E2", null]`) - defer in.Release() - exp := ds.decimalArrayFromJSON(arrow.PrimitiveTypes.Int64, `[1, 0, -1, null]`) - defer exp.Release() - - checkScalar(ds.T(), "sign", []compute.Datum{&compute.ArrayDatum{empty.Data()}}, - &compute.ArrayDatum{emptyOut.Data()}, nil) - checkScalar(ds.T(), "sign", []compute.Datum{&compute.ArrayDatum{in.Data()}}, - &compute.ArrayDatum{exp.Data()}, nil) - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestTrigAcosAsin() { - for _, fn := range []string{"acos", "acos_unchecked", "asin", "asin_unchecked"} { - ds.Run(fn, func() { - for _, ty := range ds.positiveScales() { - ds.Run(ty.String(), func() { - empty := ds.decimalArrayFromJSON(ty, `[]`) - defer empty.Release() - vals := ds.decimalArrayFromJSON(ty, `["0.00", "-1.00", "1.00", null]`) - defer vals.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}) - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{vals.Data()}}) - }) - } - }) - } - - for _, fn := range []string{"acos", "asin"} { - ds.Run(fn, func() { - for _, ty := range ds.negativeScales() { - ds.Run(ty.String(), func() { - arr := ds.decimalArrayFromJSON(ty, `["12E2", "-42E2", null]`) - defer arr.Release() - ds.checkDecimalToFloat(fn+"_unchecked", []compute.Datum{&compute.ArrayDatum{arr.Data()}}) - ds.checkFail(fn, []compute.Datum{&compute.ArrayDatum{arr.Data()}}, "domain error", nil) - }) - } - }) - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestAtan() { - fn := "atan" - for _, ty := range ds.positiveScales() { - ds.Run(ty.String(), func() { - empty := ds.decimalArrayFromJSON(ty, `[]`) - defer empty.Release() - vals := ds.decimalArrayFromJSON(ty, `["0.00", "-1.00", "1.00", null]`) - defer vals.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}) - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{vals.Data()}}) - }) - } - for _, ty := range ds.negativeScales() { - ds.Run(ty.String(), func() { - empty := ds.decimalArrayFromJSON(ty, `[]`) - defer empty.Release() - vals := ds.decimalArrayFromJSON(ty, `["12E2", "-42E2", null]`) - defer vals.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}) - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{vals.Data()}}) - }) - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestTrig() { - for _, fn := range []string{"cos", "sin", "tan"} { - for _, suffix := range []string{"", "_unchecked"} { - fn += suffix - ds.Run(fn, func() { - for _, ty := range ds.positiveScales() { - ds.Run(ty.String(), func() { - empty := ds.decimalArrayFromJSON(ty, `[]`) - defer empty.Release() - vals := ds.decimalArrayFromJSON(ty, `["0.00", "-1.00", "1.00", null]`) - defer vals.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}) - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{vals.Data()}}) - }) - } - for _, ty := range ds.negativeScales() { - ds.Run(ty.String(), func() { - empty := ds.decimalArrayFromJSON(ty, `[]`) - defer empty.Release() - vals := ds.decimalArrayFromJSON(ty, `["12E2", "-42E2", null]`) - defer vals.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}) - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{vals.Data()}}) - }) - } - }) - } - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestRound() { - options := compute.RoundOptions{NDigits: 2, Mode: compute.RoundDown} - - cases := []struct { - mode compute.RoundMode - exp string - }{ - {compute.RoundDown, `["1.010", "1.010", "1.010", "1.010", "-1.010", "-1.020", "-1.020", "-1.020", null]`}, - {compute.RoundUp, `["1.010", "1.020", "1.020", "1.020", "-1.010", "-1.010", "-1.010", "-1.010", null]`}, - {compute.RoundTowardsZero, `["1.010", "1.010", "1.010", "1.010", "-1.010", "-1.010", "-1.010", "-1.010", null]`}, - {compute.RoundTowardsInfinity, `["1.010", "1.020", "1.020", "1.020", "-1.010", "-1.020", "-1.020", "-1.020", null]`}, - {compute.RoundHalfDown, `["1.010", "1.010", "1.010", "1.020", "-1.010", "-1.010", "-1.020", "-1.020", null]`}, - {compute.RoundHalfUp, `["1.010", "1.010", "1.020", "1.020", "-1.010", "-1.010", "-1.010", "-1.020", null]`}, - {compute.RoundHalfTowardsZero, `["1.010", "1.010", "1.010", "1.020", "-1.010", "-1.010", "-1.010", "-1.020", null]`}, - {compute.RoundHalfTowardsInfinity, `["1.010", "1.010", "1.020", "1.020", "-1.010", "-1.010", "-1.020", "-1.020", null]`}, - {compute.RoundHalfToEven, `["1.010", "1.010", "1.020", "1.020", "-1.010", "-1.010", "-1.020", "-1.020", null]`}, - {compute.RoundHalfToOdd, `["1.010", "1.010", "1.010", "1.020", "-1.010", "-1.010", "-1.010", "-1.020", null]`}, - } - - fn := "round" - for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 4, Scale: 3}, &arrow.Decimal256Type{Precision: 4, Scale: 3}} { - ds.Run(ty.String(), func() { - values := ds.getArr(ty, `["1.010", "1.012", "1.015", "1.019", "-1.010", "-1.012", "-1.015", "-1.019", null]`) - defer values.Release() - - for _, tt := range cases { - ds.Run(tt.mode.String(), func() { - options.Mode = tt.mode - exp := ds.getArr(ty, tt.exp) - defer exp.Release() - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{values.Data()}}, - &compute.ArrayDatum{exp.Data()}, options) - }) - } - }) - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestRoundTowardsInfinity() { - fn := "round" - options := compute.RoundOptions{NDigits: 0, Mode: compute.RoundTowardsInfinity} - for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 4, Scale: 2}, &arrow.Decimal256Type{Precision: 4, Scale: 2}} { - ds.Run(ty.String(), func() { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - vals := ds.getArr(ty, `["1.00", "1.99", "1.01", "-42.00", "-42.99", "-42.15", null]`) - defer vals.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, options) - input := []compute.Datum{&compute.ArrayDatum{vals.Data()}} - - options.NDigits = 0 - - exp0 := ds.getArr(ty, `["1.00", "2.00", "2.00", "-42.00", "-43.00", "-43.00", null]`) - defer exp0.Release() - - checkScalar(ds.T(), fn, input, &compute.ArrayDatum{exp0.Data()}, options) - - exp1 := ds.getArr(ty, `["1.00", "2.00", "1.10", "-42.00", "-43.00", "-42.20", null]`) - defer exp1.Release() - - options.NDigits = 1 - checkScalar(ds.T(), fn, input, &compute.ArrayDatum{exp1.Data()}, options) - - options.NDigits = 2 - checkScalar(ds.T(), fn, input, &compute.ArrayDatum{vals.Data()}, options) - options.NDigits = 4 - checkScalar(ds.T(), fn, input, &compute.ArrayDatum{vals.Data()}, options) - options.NDigits = 100 - checkScalar(ds.T(), fn, input, &compute.ArrayDatum{vals.Data()}, options) - - options.NDigits = -1 - neg := ds.getArr(ty, `["10.00", "10.00", "10.00", "-50.00", "-50.00", "-50.00", null]`) - defer neg.Release() - checkScalar(ds.T(), fn, input, &compute.ArrayDatum{neg.Data()}, options) - - options.NDigits = -2 - ds.checkFail(fn, input, "rounding to -2 digits will not fit in precision", options) - options.NDigits = -1 - - noprec := ds.getArr(ty, `["99.99"]`) - defer noprec.Release() - ds.checkFail(fn, []compute.Datum{&compute.ArrayDatum{noprec.Data()}}, "rounded value 100.00 does not fit in precision", options) - }) - } - - for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 2, Scale: -2}, &arrow.Decimal256Type{Precision: 2, Scale: -2}} { - ds.Run(ty.String(), func() { - values := ds.getArr(ty, `["10E2", "12E2", "18E2", "-10E2", "-12E2", "-18E2", null]`) - defer values.Release() - - input := &compute.ArrayDatum{values.Data()} - - options.NDigits = 0 - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - options.NDigits = 2 - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - options.NDigits = 100 - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - options.NDigits = -1 - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - options.NDigits = -2 - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - options.NDigits = -3 - res := ds.getArr(ty, `["10E2", "20E2", "20E2", "-10E2", "-20E2", "-20E2", null]`) - defer res.Release() - checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{res.Data()}, options) - - options.NDigits = -4 - ds.checkFail(fn, []compute.Datum{input}, "rounding to -4 digits will not fit in precision", options) - }) - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestRoundHalfToEven() { - fn := "round" - options := compute.RoundOptions{NDigits: 0, Mode: compute.RoundHalfToEven} - for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 4, Scale: 2}, &arrow.Decimal256Type{Precision: 4, Scale: 2}} { - ds.Run(ty.String(), func() { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, options) - - values := ds.getArr(ty, `["1.00", "5.99", "1.01", "-42.00", "-42.99", "-42.15", "1.50", "2.50", "-5.50", "-2.55", null]`) - defer values.Release() - input := &compute.ArrayDatum{values.Data()} - - exp0 := ds.getArr(ty, `["1.00", "6.00", "1.00", "-42.00", "-43.00", "-42.00", "2.00", "2.00", "-6.00", "-3.00", null]`) - defer exp0.Release() - - exp1 := ds.getArr(ty, `["1.00", "6.00", "1.00", "-42.00", "-43.00", "-42.20", "1.50", "2.50", "-5.50", "-2.60", null]`) - defer exp1.Release() - - expNeg1 := ds.getArr(ty, `["0.00", "10.00", "0.00", "-40.00", "-40.00", "-40.00", "0.00", "0.00", "-10.00", "0.00", null]`) - defer expNeg1.Release() - - options.NDigits = 0 - checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp0.Data()}, options) - options.NDigits = 1 - checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp1.Data()}, options) - options.NDigits = 2 - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - options.NDigits = 4 - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - options.NDigits = 100 - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - options.NDigits = -1 - checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{expNeg1.Data()}, options) - options.NDigits = -2 - ds.checkFail(fn, []compute.Datum{input}, "rounding to -2 digits will not fit in precision", options) - options.NDigits = -1 - noprec := ds.getArr(ty, `["99.99"]`) - defer noprec.Release() - ds.checkFail(fn, []compute.Datum{&compute.ArrayDatum{noprec.Data()}}, "rounded value 100.00 does not fit in precision", options) - }) - } - for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 2, Scale: -2}, &arrow.Decimal256Type{Precision: 2, Scale: -2}} { - ds.Run(ty.String(), func() { - values := ds.getArr(ty, `["5E2", "10E2", "12E2", "15E2", "18E2", "-10E2", "-12E2", "-15E2", "-18E2", null]`) - defer values.Release() - - input := &compute.ArrayDatum{values.Data()} - - options.NDigits = 0 - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - options.NDigits = 2 - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - options.NDigits = 100 - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - options.NDigits = -1 - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - options.NDigits = -2 - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - options.NDigits = -3 - res := ds.getArr(ty, `["0", "10E2", "10E2", "20E2", "20E2", "-10E2", "-10E2", "-20E2", "-20E2", null]`) - defer res.Release() - checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{res.Data()}, options) - - options.NDigits = -4 - ds.checkFail(fn, []compute.Datum{input}, "rounding to -4 digits will not fit in precision", options) - }) - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestRoundCeil() { - fn := "ceil" - for _, ty := range ds.positiveScales() { - ds.Run(ty.String(), func() { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, - &compute.ArrayDatum{empty.Data()}, nil) - - in := ds.getArr(ty, `["1.00", "1.99", "1.01", "-42.00", "-42.99", "-42.15", null]`) - defer in.Release() - out := ds.getArr(ty, `["1.00", "2.00", "2.00", "-42.00", "-42.00", "-42.00", null]`) - defer out.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{in.Data()}}, - &compute.ArrayDatum{out.Data()}, nil) - }) - } - for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 4, Scale: 2}, &arrow.Decimal256Type{Precision: 4, Scale: 2}} { - ds.Run(ty.String(), func() { - sc, _ := scalar.MakeScalarParam("99.99", ty) - ds.checkFail(fn, []compute.Datum{compute.NewDatum(sc)}, "rounded value 100.00 does not fit in precision of decimal", nil) - sc, _ = scalar.MakeScalarParam("-99.99", ty) - out, _ := scalar.MakeScalarParam("-99.00", ty) - checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(sc)}, compute.NewDatum(out), nil) - }) - } - for _, ty := range ds.negativeScales() { - ds.Run(ty.String(), func() { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, - &compute.ArrayDatum{empty.Data()}, nil) - - ex := ds.getArr(ty, `["12E2", "-42E2", null]`) - defer ex.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{ex.Data()}}, - &compute.ArrayDatum{ex.Data()}, nil) - }) - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestRoundFloor() { - fn := "floor" - for _, ty := range ds.positiveScales() { - ds.Run(ty.String(), func() { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, - &compute.ArrayDatum{empty.Data()}, nil) - - in := ds.getArr(ty, `["1.00", "1.99", "1.01", "-42.00", "-42.99", "-42.15", null]`) - defer in.Release() - out := ds.getArr(ty, `["1.00", "1.00", "1.00", "-42.00", "-43.00", "-43.00", null]`) - defer out.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{in.Data()}}, - &compute.ArrayDatum{out.Data()}, nil) - }) - } - for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 4, Scale: 2}, &arrow.Decimal256Type{Precision: 4, Scale: 2}} { - ds.Run(ty.String(), func() { - sc, _ := scalar.MakeScalarParam("-99.99", ty) - ds.checkFail(fn, []compute.Datum{compute.NewDatum(sc)}, "rounded value -100.00 does not fit in precision of decimal", nil) - sc, _ = scalar.MakeScalarParam("99.99", ty) - out, _ := scalar.MakeScalarParam("99.00", ty) - checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(sc)}, compute.NewDatum(out), nil) - }) - } - for _, ty := range ds.negativeScales() { - ds.Run(ty.String(), func() { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, - &compute.ArrayDatum{empty.Data()}, nil) - - ex := ds.getArr(ty, `["12E2", "-42E2", null]`) - defer ex.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{ex.Data()}}, - &compute.ArrayDatum{ex.Data()}, nil) - }) - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestRoundTrunc() { - fn := "trunc" - for _, ty := range ds.positiveScales() { - ds.Run(ty.String(), func() { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, - &compute.ArrayDatum{empty.Data()}, nil) - - in := ds.getArr(ty, `["1.00", "1.99", "1.01", "-42.00", "-42.99", "-42.15", null]`) - defer in.Release() - out := ds.getArr(ty, `["1.00", "1.00", "1.00", "-42.00", "-42.00", "-42.00", null]`) - defer out.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{in.Data()}}, - &compute.ArrayDatum{out.Data()}, nil) - }) - } - for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 4, Scale: 2}, &arrow.Decimal256Type{Precision: 4, Scale: 2}} { - ds.Run(ty.String(), func() { - sc, _ := scalar.MakeScalarParam("99.99", ty) - out, _ := scalar.MakeScalarParam("99.00", ty) - checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(sc)}, compute.NewDatum(out), nil) - sc, _ = scalar.MakeScalarParam("-99.99", ty) - out, _ = scalar.MakeScalarParam("-99.00", ty) - checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(sc)}, compute.NewDatum(out), nil) - }) - } - for _, ty := range ds.negativeScales() { - ds.Run(ty.String(), func() { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, - &compute.ArrayDatum{empty.Data()}, nil) - - ex := ds.getArr(ty, `["12E2", "-42E2", null]`) - defer ex.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{ex.Data()}}, - &compute.ArrayDatum{ex.Data()}, nil) - }) - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestRoundToMultiple() { - fn := "round_to_multiple" - var options compute.RoundToMultipleOptions - for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 4, Scale: 2}, &arrow.Decimal256Type{Precision: 4, Scale: 2}} { - ds.Run(ty.String(), func() { - if ty.ID() == arrow.DECIMAL128 { - options.Multiple, _ = scalar.MakeScalarParam(decimal128.FromI64(200), ty) - } else { - options.Multiple, _ = scalar.MakeScalarParam(decimal256.FromI64(200), ty) - } - - values := ds.getArr(ty, `["-3.50", "-3.00", "-2.50", "-2.00", "-1.50", "-1.00", "-0.50", "0.00", "0.50", "1.00", "1.50", "2.00", "2.50", "3.00", "3.50", null]`) - defer values.Release() - - input := []compute.Datum{&compute.ArrayDatum{values.Data()}} - - tests := []struct { - mode compute.RoundMode - exp string - }{ - {compute.RoundDown, `["-4.00", "-4.00", "-4.00", "-2.00", "-2.00", "-2.00", "-2.00", "0.00", "0.00", "0.00", "0.00", "2.00", "2.00", "2.00", "2.00", null]`}, - {compute.RoundUp, `["-2.00", "-2.00", "-2.00", "-2.00", "-0.00", "-0.00", "-0.00", "0.00", "2.00", "2.00", "2.00", "2.00", "4.00", "4.00", "4.00", null]`}, - {compute.RoundTowardsZero, `["-2.00", "-2.00", "-2.00", "-2.00", "-0.00", "-0.00", "-0.00", "0.00", "0.00", "0.00", "0.00", "2.00", "2.00", "2.00", "2.00", null]`}, - {compute.RoundTowardsInfinity, `["-4.00", "-4.00", "-4.00", "-2.00", "-2.00", "-2.00", "-2.00", "0.00", "2.00", "2.00", "2.00", "2.00", "4.00", "4.00", "4.00", null]`}, - {compute.RoundHalfDown, `["-4.00", "-4.00", "-2.00", "-2.00", "-2.00", "-2.00", "-0.00", "0.00", "0.00", "0.00", "2.00", "2.00", "2.00", "2.00", "4.00", null]`}, - {compute.RoundHalfUp, `["-4.00", "-2.00", "-2.00", "-2.00", "-2.00", "-0.00", "-0.00", "0.00", "0.00", "2.00", "2.00", "2.00", "2.00", "4.00", "4.00", null]`}, - {compute.RoundHalfTowardsZero, `["-4.00", "-2.00", "-2.00", "-2.00", "-2.00", "-0.00", "-0.00", "0.00", "0.00", "0.00", "2.00", "2.00", "2.00", "2.00", "4.00", null]`}, - {compute.RoundHalfTowardsInfinity, `["-4.00", "-4.00", "-2.00", "-2.00", "-2.00", "-2.00", "-0.00", "0.00", "0.00", "2.00", "2.00", "2.00", "2.00", "4.00", "4.00", null]`}, - {compute.RoundHalfToEven, `["-4.00", "-4.00", "-2.00", "-2.00", "-2.00", "-0.00", "-0.00", "0.00", "0.00", "0.00", "2.00", "2.00", "2.00", "4.00", "4.00", null]`}, - {compute.RoundHalfToOdd, `["-4.00", "-2.00", "-2.00", "-2.00", "-2.00", "-2.00", "-0.00", "0.00", "0.00", "2.00", "2.00", "2.00", "2.00", "2.00", "4.00", null]`}, - } - - for _, tt := range tests { - ds.Run(tt.mode.String(), func() { - options.Mode = tt.mode - - result := ds.getArr(ty, tt.exp) - defer result.Release() - - checkScalar(ds.T(), fn, input, &compute.ArrayDatum{result.Data()}, options) - }) - } - }) - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestRoundToMultipleTowardsInfinity() { - fn := "round_to_multiple" - options := compute.RoundToMultipleOptions{Mode: compute.RoundTowardsInfinity} - setMultiple := func(ty arrow.DataType, val int64) { - if ty.ID() == arrow.DECIMAL128 { - options.Multiple = scalar.NewDecimal128Scalar(decimal128.FromI64(val), ty) - } else { - options.Multiple = scalar.NewDecimal256Scalar(decimal256.FromI64(val), ty) - } - } - - for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 4, Scale: 2}, &arrow.Decimal256Type{Precision: 4, Scale: 2}} { - ds.Run(ty.String(), func() { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - - values := ds.getArr(ty, `["1.00", "1.99", "1.01", "-42.00", "-42.99", "-42.15", null]`) - defer values.Release() - - input := &compute.ArrayDatum{values.Data()} - - setMultiple(ty, 25) - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, options) - - exp25 := ds.getArr(ty, `["1.00", "2.00", "1.25", "-42.00", "-43.00", "-42.25", null]`) - defer exp25.Release() - checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp25.Data()}, options) - - setMultiple(ty, 1) - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - - setMultiple(&arrow.Decimal128Type{Precision: 2, Scale: 0}, 2) - exp20 := ds.getArr(ty, `["2.00", "2.00", "2.00", "-42.00", "-44.00", "-44.00", null]`) - defer exp20.Release() - checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp20.Data()}, options) - - setMultiple(ty, 0) - ds.checkFail(fn, []compute.Datum{input}, "rounding multiple must be positive", options) - - options.Multiple = scalar.NewDecimal128Scalar(decimal128.Num{}, &arrow.Decimal128Type{Precision: 4, Scale: 2}) - ds.checkFail(fn, []compute.Datum{input}, "rounding multiple must be positive", options) - - tester := ds.getArr(ty, `["99.99"]`) - defer tester.Release() - - testDatum := &compute.ArrayDatum{tester.Data()} - - setMultiple(ty, -10) - ds.checkFail(fn, []compute.Datum{testDatum}, "rounding multiple must be positive", options) - setMultiple(ty, 100) - ds.checkFail(fn, []compute.Datum{testDatum}, "rounded value 100.00 does not fit in precision", options) - options.Multiple = scalar.NewFloat64Scalar(1) - ds.checkFail(fn, []compute.Datum{testDatum}, "rounded value 100.00 does not fit in precision", options) - options.Multiple = scalar.MakeNullScalar(&arrow.Decimal128Type{Precision: 3}) - ds.checkFail(fn, []compute.Datum{testDatum}, "rounding multiple must be non-null and valid", options) - options.Multiple = nil - ds.checkFail(fn, []compute.Datum{testDatum}, "rounding multiple must be non-null and valid", options) - }) - } - - for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 2, Scale: -2}, &arrow.Decimal256Type{Precision: 2, Scale: -2}} { - ds.Run(ty.String(), func() { - values := ds.getArr(ty, `["10E2", "12E2", "18E2", "-10E2", "-12E2", "-18E2", null]`) - defer values.Release() - - input := &compute.ArrayDatum{values.Data()} - - setMultiple(ty, 4) - exp := ds.getArr(ty, `["12E2", "12E2", "20E2", "-12E2", "-12E2", "-20E2", null]`) - defer exp.Release() - - checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp.Data()}, options) - - setMultiple(ty, 1) - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - }) - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestRoundToMultipleHalfToOdd() { - fn := "round_to_multiple" - options := compute.RoundToMultipleOptions{Mode: compute.RoundHalfToOdd} - setMultiple := func(ty arrow.DataType, val int64) { - if ty.ID() == arrow.DECIMAL128 { - options.Multiple = scalar.NewDecimal128Scalar(decimal128.FromI64(val), ty) - } else { - options.Multiple = scalar.NewDecimal256Scalar(decimal256.FromI64(val), ty) - } - } - - for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 4, Scale: 2}, &arrow.Decimal256Type{Precision: 4, Scale: 2}} { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - - values := ds.getArr(ty, `["-0.38", "-0.37", "-0.25", "-0.13", "-0.12", "0.00", "0.12", "0.13", "0.25", "0.37", "0.38", null]`) - defer values.Release() - - input := &compute.ArrayDatum{values.Data()} - - // there is no exact halfway point, check what happens - setMultiple(ty, 25) - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, options) - - exp25 := ds.getArr(ty, `["-0.50", "-0.25", "-0.25", "-0.25", "-0.00", "0.00", "0.00", "0.25", "0.25", "0.25", "0.50", null]`) - defer exp25.Release() - - checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp25.Data()}, options) - - setMultiple(ty, 1) - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - setMultiple(ty, 24) - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, options) - - exp24 := ds.getArr(ty, `["-0.48", "-0.48", "-0.24", "-0.24", "-0.24", "0.00", "0.24", "0.24", "0.24", "0.48", "0.48", null]`) - defer exp24.Release() - checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp24.Data()}, options) - - setMultiple(&arrow.Decimal128Type{Precision: 3, Scale: 1}, 1) - exp1 := ds.getArr(ty, `["-0.40", "-0.40", "-0.30", "-0.10", "-0.10", "0.00", "0.10", "0.10", "0.30", "0.40", "0.40", null]`) - defer exp1.Release() - - checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp1.Data()}, options) - } - - for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 2, Scale: -2}, &arrow.Decimal256Type{Precision: 2, Scale: -2}} { - values := ds.getArr(ty, `["10E2", "12E2", "18E2", "-10E2", "-12E2", "-18E2", null]`) - defer values.Release() - - exp4 := ds.getArr(ty, `["12E2", "12E2", "20E2", "-12E2", "-12E2", "-20E2", null]`) - defer exp4.Release() - - exp5 := ds.getArr(ty, `["10E2", "10E2", "20E2", "-10E2", "-10E2", "-20E2", null]`) - defer exp5.Release() - - input := &compute.ArrayDatum{values.Data()} - setMultiple(ty, 4) - checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp4.Data()}, options) - - setMultiple(ty, 5) - checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp5.Data()}, options) - - setMultiple(ty, 1) - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - } -} - -type ScalarBinaryTemporalArithmeticSuite struct { - BinaryFuncTestSuite -} - -var ( - date32JSON = `[0, 11016, -25932, 23148, 18262, 18261, 18260, 14609, 14610, 14612, - 14613, 13149, 13148, 14241, 14242, 15340, null]` - date32JSON2 = `[365, 10650, -25901, 23118, 18263, 18259, 18260, 14609, 14610, 14612, - 14613, 13149, 13148, 14240, 13937, 15400, null]` - date64JSON = `[0, 951782400000, -2240524800000, 1999987200000, 1577836800000, - 1577750400000, 1577664000000, 1262217600000, 1262304000000, 1262476800000, - 1262563200000, 1136073600000, 1135987200000, 1230422400000, 1230508800000, - 1325376000000, null]` - date64JSON2 = `[31536000000, 920160000000, -2237846400000, 1997395200000, - 1577923200000, 1577577600000, 1577664000000, 1262217600000, 1262304000000, - 1262476800000, 1262563200000, 1136073600000, 1135987200000, 1230336000000, - 1204156800000, 1330560000000, null]` - timeJSONs = `[59, 84203, 3560, 12800, 3905, 7810, 11715, 15620, 19525, 23430, 27335, - 31240, 35145, 0, 0, 3723, null]` - timeJSONs2 = `[59, 84203, 12642, 7182, 68705, 7390, 915, 16820, 19525, 5430, 84959, - 31207, 35145, 0, 0, 3723, null]` - timeJSONms = `[59123, 84203999, 3560001, 12800000, 3905001, 7810002, 11715003, 15620004, - 19525005, 23430006, 27335000, 31240000, 35145000, 0, 0, 3723000, null]` - timeJSONms2 = `[59103, 84203999, 12642001, 7182000, 68705005, 7390000, 915003, 16820004, - 19525005, 5430006, 84959000, 31207000, 35145000, 0, 0, 3723000, null]` - timeJSONus = `[59123456, 84203999999, 3560001001, 12800000000, 3905001000, 7810002000, - 11715003000, 15620004132, 19525005321, 23430006163, 27335000000, - 31240000000, 35145000000, 0, 0, 3723000000, null]` - timeJSONus2 = `[59103476, 84203999999, 12642001001, 7182000000, 68705005000, 7390000000, - 915003000, 16820004432, 19525005021, 5430006163, 84959000000, - 31207000000, 35145000000, 0, 0, 3723000000, null]` - timeJSONns = `[59123456789, 84203999999999, 3560001001001, 12800000000000, 3905001000000, - 7810002000000, 11715003000000, 15620004132000, 19525005321000, - 23430006163000, 27335000000000, 31240000000000, 35145000000000, 0, 0, - 3723000000000, null]` - timeJSONns2 = `[59103476799, 84203999999909, 12642001001001, 7182000000000, 68705005000000, - 7390000000000, 915003000000, 16820004432000, 19525005021000, 5430006163000, - 84959000000000, 31207000000000, 35145000000000, 0, 0, 3723000000000, null]` -) - -func (s *ScalarBinaryTemporalArithmeticSuite) TestTemporalAddSub() { - tests := []struct { - val1 string - val2 string - dt arrow.DataType - exp arrow.DataType - }{ - {date32JSON, date32JSON2, arrow.FixedWidthTypes.Date32, arrow.FixedWidthTypes.Duration_s}, - {date64JSON, date64JSON2, arrow.FixedWidthTypes.Date64, arrow.FixedWidthTypes.Duration_ms}, - {timeJSONs, timeJSONs2, arrow.FixedWidthTypes.Time32s, arrow.FixedWidthTypes.Duration_s}, - {timeJSONms, timeJSONms2, arrow.FixedWidthTypes.Time32ms, arrow.FixedWidthTypes.Duration_ms}, - {timeJSONus, timeJSONus2, arrow.FixedWidthTypes.Time64us, arrow.FixedWidthTypes.Duration_us}, - {timeJSONns, timeJSONns2, arrow.FixedWidthTypes.Time64ns, arrow.FixedWidthTypes.Duration_ns}, - } - - for _, tt := range tests { - s.Run(tt.dt.String(), func() { - for _, checked := range []bool{true, false} { - s.Run(fmt.Sprintf("checked=%t", checked), func() { - opts := compute.ArithmeticOptions{NoCheckOverflow: !checked} - arr1, _, _ := array.FromJSON(s.mem, tt.dt, strings.NewReader(tt.val1)) - defer arr1.Release() - arr2, _, _ := array.FromJSON(s.mem, tt.dt, strings.NewReader(tt.val2)) - defer arr2.Release() - - datum1 := &compute.ArrayDatum{Value: arr1.Data()} - datum2 := &compute.ArrayDatum{Value: arr2.Data()} - - result, err := compute.Subtract(s.ctx, opts, datum1, datum2) - s.Require().NoError(err) - defer result.Release() - res := result.(*compute.ArrayDatum) - s.Truef(arrow.TypeEqual(tt.exp, res.Type()), - "expected: %s\n got: %s", tt.exp, res.Type()) - - out, err := compute.Add(s.ctx, opts, datum2, result) - s.Require().NoError(err) - defer out.Release() - - // date32 - date32 / date64 - date64 produce durations - // and date + duration == timestamp so we need to cast - // the timestamp back to a date in that case. Otherwise - // we get back time32/time64 in those cases and can - // compare them accurately. - if arrow.TypeEqual(arr1.DataType(), out.(*compute.ArrayDatum).Type()) { - assertDatumsEqual(s.T(), datum1, out, nil, nil) - } else { - casted, err := compute.CastDatum(s.ctx, out, compute.SafeCastOptions(arr1.DataType())) - s.Require().NoError(err) - defer casted.Release() - assertDatumsEqual(s.T(), datum1, casted, nil, nil) - } - - }) - } - }) - } -} - -func TestUnaryDispatchBest(t *testing.T) { - for _, fn := range []string{"abs"} { - for _, suffix := range []string{"", "_unchecked"} { - fn += suffix - t.Run(fn, func(t *testing.T) { - for _, ty := range numericTypes { - t.Run(ty.String(), func(t *testing.T) { - CheckDispatchBest(t, fn, []arrow.DataType{ty}, []arrow.DataType{ty}) - CheckDispatchBest(t, fn, []arrow.DataType{&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: ty}}, - []arrow.DataType{ty}) - }) - } - }) - } - } - - for _, fn := range []string{"negate_unchecked", "sign"} { - t.Run(fn, func(t *testing.T) { - for _, ty := range numericTypes { - t.Run(ty.String(), func(t *testing.T) { - CheckDispatchBest(t, fn, []arrow.DataType{ty}, []arrow.DataType{ty}) - CheckDispatchBest(t, fn, []arrow.DataType{&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: ty}}, - []arrow.DataType{ty}) - }) - } - }) - } - - for _, fn := range []string{"negate"} { - t.Run(fn, func(t *testing.T) { - for _, ty := range append(signedIntTypes, floatingTypes...) { - t.Run(ty.String(), func(t *testing.T) { - CheckDispatchBest(t, fn, []arrow.DataType{ty}, []arrow.DataType{ty}) - CheckDispatchBest(t, fn, []arrow.DataType{&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: ty}}, - []arrow.DataType{ty}) - }) - } - }) - } - - // float types (with _unchecked variants) - for _, fn := range []string{"ln", "log2", "log10", "log1p", "sin", "cos", "tan", "asin", "acos"} { - for _, suffix := range []string{"", "_unchecked"} { - fn += suffix - t.Run(fn, func(t *testing.T) { - for _, ty := range floatingTypes { - t.Run(ty.String(), func(t *testing.T) { - CheckDispatchBest(t, fn, []arrow.DataType{ty}, []arrow.DataType{ty}) - CheckDispatchBest(t, fn, []arrow.DataType{&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: ty}}, - []arrow.DataType{ty}) - }) - } - }) - } - } - - // float types (without _unchecked variants) - for _, fn := range []string{"atan", "sign", "floor", "ceil", "trunc", "round"} { - t.Run(fn, func(t *testing.T) { - for _, ty := range floatingTypes { - t.Run(ty.String(), func(t *testing.T) { - CheckDispatchBest(t, fn, []arrow.DataType{ty}, []arrow.DataType{ty}) - CheckDispatchBest(t, fn, []arrow.DataType{&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: ty}}, - []arrow.DataType{ty}) - }) - } - }) - } - - // integer -> float64 (with _unchecked variant) - for _, fn := range []string{"ln", "log2", "log10", "log1p", "sin", "cos", "tan", "asin", "acos"} { - for _, suffix := range []string{"", "_unchecked"} { - fn += suffix - t.Run(fn, func(t *testing.T) { - for _, ty := range integerTypes { - t.Run(ty.String(), func(t *testing.T) { - CheckDispatchBest(t, fn, []arrow.DataType{ty}, []arrow.DataType{arrow.PrimitiveTypes.Float64}) - CheckDispatchBest(t, fn, []arrow.DataType{&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: ty}}, - []arrow.DataType{arrow.PrimitiveTypes.Float64}) - }) - } - }) - } - } - - // integer -> float64 (without _unchecked variants) - for _, fn := range []string{"atan", "floor", "ceil", "trunc", "round"} { - t.Run(fn, func(t *testing.T) { - for _, ty := range integerTypes { - t.Run(ty.String(), func(t *testing.T) { - CheckDispatchBest(t, fn, []arrow.DataType{ty}, []arrow.DataType{arrow.PrimitiveTypes.Float64}) - CheckDispatchBest(t, fn, []arrow.DataType{&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: ty}}, - []arrow.DataType{arrow.PrimitiveTypes.Float64}) - }) - } - }) - } -} - -func TestUnaryArithmeticNull(t *testing.T) { - for _, fn := range []string{"abs", "negate", "acos", "asin", "cos", "ln", "log10", "log1p", "log2", "sin", "tan"} { - for _, suffix := range []string{"", "_unchecked"} { - fn += suffix - assertNullToNull(t, context.TODO(), fn, memory.DefaultAllocator) - } - } - - for _, fn := range []string{"sign", "atan", "bit_wise_not", "floor", "ceil", "trunc", "round"} { - assertNullToNull(t, context.TODO(), fn, memory.DefaultAllocator) - } -} - -type UnaryArithmeticSuite[T arrow.NumericType, O fnOpts] struct { - suite.Suite - - mem *memory.CheckedAllocator - ctx context.Context - - opts O -} - -func (us *UnaryArithmeticSuite[T, O]) SetupTest() { - us.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) - us.ctx = compute.WithAllocator(context.TODO(), us.mem) - var def O - us.opts = def -} - -func (us *UnaryArithmeticSuite[T, O]) TearDownTest() { - us.mem.AssertSize(us.T(), 0) -} - -func (*UnaryArithmeticSuite[T, O]) datatype() arrow.DataType { - return arrow.GetDataType[T]() -} - -func (us *UnaryArithmeticSuite[T, O]) makeNullScalar() scalar.Scalar { - return scalar.MakeNullScalar(us.datatype()) -} - -func (us *UnaryArithmeticSuite[T, O]) makeScalar(v T) scalar.Scalar { - return scalar.MakeScalar(v) -} - -func (us *UnaryArithmeticSuite[T, O]) makeArray(v ...T) arrow.Array { - return exec.ArrayFromSlice(us.mem, v) -} - -func (us *UnaryArithmeticSuite[T, O]) getArr(dt arrow.DataType, str string) arrow.Array { - arr, _, err := array.FromJSON(us.mem, dt, strings.NewReader(str), array.WithUseNumber()) - us.Require().NoError(err) - return arr -} - -func (us *UnaryArithmeticSuite[T, O]) assertUnaryOpValError(fn unaryArithmeticFunc[O], arg T, msg string) { - in := us.makeScalar(arg) - _, err := fn(us.ctx, us.opts, compute.NewDatum(in)) - us.ErrorIs(err, arrow.ErrInvalid) - us.ErrorContains(err, msg) -} - -func (us *UnaryArithmeticSuite[T, O]) assertUnaryOpNotImplemented(fn unaryArithmeticFunc[O], arg T, msg string) { - in := us.makeScalar(arg) - _, err := fn(us.ctx, us.opts, compute.NewDatum(in)) - us.ErrorIs(err, arrow.ErrNotImplemented) - us.ErrorContains(err, msg) -} - -func (us *UnaryArithmeticSuite[T, O]) assertUnaryOpVals(fn unaryArithmeticFunc[O], arg, expected T) { - in := us.makeScalar(arg) - exp := us.makeScalar(expected) - - actual, err := fn(us.ctx, us.opts, compute.NewDatum(in)) - us.Require().NoError(err) - assertScalarEquals(us.T(), exp, actual.(*compute.ScalarDatum).Value, scalar.WithNaNsEqual(true)) -} - -func (us *UnaryArithmeticSuite[T, O]) assertUnaryOpScalars(fn unaryArithmeticFunc[O], arg, exp scalar.Scalar) { - actual, err := fn(us.ctx, us.opts, compute.NewDatum(arg)) - us.Require().NoError(err) - assertScalarEquals(us.T(), exp, actual.(*compute.ScalarDatum).Value, scalar.WithNaNsEqual(true)) -} - -func (us *UnaryArithmeticSuite[T, O]) assertUnaryOpArrs(fn unaryArithmeticFunc[O], arg, exp arrow.Array) { - datum := &compute.ArrayDatum{arg.Data()} - actual, err := fn(us.ctx, us.opts, datum) - us.Require().NoError(err) - defer actual.Release() - assertDatumsEqual(us.T(), &compute.ArrayDatum{exp.Data()}, actual, []array.EqualOption{array.WithNaNsEqual(true)}, []scalar.EqualOption{scalar.WithNaNsEqual(true)}) - - // also check scalar ops - for i := 0; i < arg.Len(); i++ { - expScalar, err := scalar.GetScalar(exp, i) - us.NoError(err) - argScalar, err := scalar.GetScalar(arg, i) - us.NoError(err) - - actual, err := fn(us.ctx, us.opts, compute.NewDatum(argScalar)) - us.Require().NoError(err) - assertDatumsEqual(us.T(), compute.NewDatum(expScalar), compute.NewDatum(actual), []array.EqualOption{array.WithNaNsEqual(true)}, []scalar.EqualOption{scalar.WithNaNsEqual(true)}) - } -} - -func (us *UnaryArithmeticSuite[T, O]) assertUnaryOpExpArr(fn unaryArithmeticFunc[O], arg string, exp arrow.Array) { - in, _, err := array.FromJSON(us.mem, us.datatype(), strings.NewReader(arg), array.WithUseNumber()) - us.Require().NoError(err) - defer in.Release() - - us.assertUnaryOpArrs(fn, in, exp) -} - -func (us *UnaryArithmeticSuite[T, O]) assertUnaryOp(fn unaryArithmeticFunc[O], arg, exp string) { - in, _, err := array.FromJSON(us.mem, us.datatype(), strings.NewReader(arg), array.WithUseNumber()) - us.Require().NoError(err) - defer in.Release() - expected, _, err := array.FromJSON(us.mem, us.datatype(), strings.NewReader(exp), array.WithUseNumber()) - us.Require().NoError(err) - defer expected.Release() - - us.assertUnaryOpArrs(fn, in, expected) -} - -func (us *UnaryArithmeticSuite[T, O]) assertUnaryOpErr(fn unaryArithmeticFunc[O], arg string, msg string) { - in, _, err := array.FromJSON(us.mem, us.datatype(), strings.NewReader(arg), array.WithUseNumber()) - us.Require().NoError(err) - defer in.Release() - - _, err = fn(us.ctx, us.opts, &compute.ArrayDatum{in.Data()}) - us.ErrorIs(err, arrow.ErrInvalid) - us.ErrorContains(err, msg) -} - -type UnaryArithmeticIntegral[T arrow.IntType | arrow.UintType] struct { - UnaryArithmeticSuite[T, compute.ArithmeticOptions] -} - -func (us *UnaryArithmeticIntegral[T]) setOverflowCheck(v bool) { - us.opts.NoCheckOverflow = !v -} - -func (us *UnaryArithmeticIntegral[T]) TestTrig() { - // integer arguments promoted to float64, sanity check here - atan := func(ctx context.Context, _ compute.ArithmeticOptions, arg compute.Datum) (compute.Datum, error) { - return compute.Atan(ctx, arg) - } - - input := us.makeArray(0, 1) - defer input.Release() - for _, overflow := range []bool{false, true} { - us.setOverflowCheck(overflow) - sinOut := us.getArr(arrow.PrimitiveTypes.Float64, `[0, 0.8414709848078965]`) - defer sinOut.Release() - cosOut := us.getArr(arrow.PrimitiveTypes.Float64, `[1, 0.5403023058681398]`) - defer cosOut.Release() - tanOut := us.getArr(arrow.PrimitiveTypes.Float64, `[0, 1.5574077246549023]`) - defer tanOut.Release() - asinOut := us.getArr(arrow.PrimitiveTypes.Float64, fmt.Sprintf("[0, %f]", math.Pi/2)) - defer asinOut.Release() - acosOut := us.getArr(arrow.PrimitiveTypes.Float64, fmt.Sprintf("[%f, 0]", math.Pi/2)) - defer acosOut.Release() - atanOut := us.getArr(arrow.PrimitiveTypes.Float64, fmt.Sprintf("[0, %f]", math.Pi/4)) - defer atanOut.Release() - - us.assertUnaryOpArrs(compute.Sin, input, sinOut) - us.assertUnaryOpArrs(compute.Cos, input, cosOut) - us.assertUnaryOpArrs(compute.Tan, input, tanOut) - us.assertUnaryOpArrs(compute.Asin, input, asinOut) - us.assertUnaryOpArrs(compute.Acos, input, acosOut) - us.assertUnaryOpArrs(atan, input, atanOut) - } -} - -func (us *UnaryArithmeticIntegral[T]) TestLog() { - // integer arguments promoted to double, sanity check here - ty := us.datatype() - for _, overflow := range []bool{false, true} { - us.setOverflowCheck(overflow) - exp1 := us.getArr(arrow.PrimitiveTypes.Float64, `[0, null]`) - defer exp1.Release() - exp2 := us.getArr(arrow.PrimitiveTypes.Float64, `[0, 1, null]`) - defer exp2.Release() - - ln := us.getArr(ty, `[1, null]`) - defer ln.Release() - log10 := us.getArr(ty, `[1, 10, null]`) - defer log10.Release() - log2 := us.getArr(ty, `[1, 2, null]`) - defer log2.Release() - log1p := us.getArr(ty, `[0, null]`) - defer log1p.Release() - - us.assertUnaryOpArrs(compute.Ln, ln, exp1) - us.assertUnaryOpArrs(compute.Log10, log10, exp2) - us.assertUnaryOpArrs(compute.Log2, log2, exp2) - us.assertUnaryOpArrs(compute.Log1p, log1p, exp1) - } -} - -type UnaryArithmeticSigned[T arrow.IntType] struct { - UnaryArithmeticIntegral[T] -} - -func (us *UnaryArithmeticSigned[T]) TestAbsoluteValue() { - var ( - dt = us.datatype() - min = kernels.MinOf[T]() - max = kernels.MaxOf[T]() - ) - - fn := func(in, exp string) { - us.assertUnaryOp(compute.AbsoluteValue, in, exp) - } - - us.Run(dt.String(), func() { - for _, checkOverflow := range []bool{true, false} { - us.setOverflowCheck(checkOverflow) - us.Run(fmt.Sprintf("check_overflow=%t", checkOverflow), func() { - // empty array - fn(`[]`, `[]`) - // scalar/arrays with nulls - fn(`[null]`, `[null]`) - fn(`[1, null, -10]`, `[1, null, 10]`) - us.assertUnaryOpScalars(compute.AbsoluteValue, us.makeNullScalar(), us.makeNullScalar()) - // scalar/arrays with zeros - fn(`[0, -0]`, `[0, 0]`) - us.assertUnaryOpVals(compute.AbsoluteValue, -0, 0) - us.assertUnaryOpVals(compute.AbsoluteValue, 0, 0) - // ordinary scalars/arrays (positive inputs) - fn(`[1, 10, 127]`, `[1, 10, 127]`) - us.assertUnaryOpVals(compute.AbsoluteValue, 1, 1) - // ordinary scalars/arrays (negative inputs) - fn(`[-1, -10, -127]`, `[1, 10, 127]`) - us.assertUnaryOpVals(compute.AbsoluteValue, -1, 1) - // min/max - us.assertUnaryOpVals(compute.AbsoluteValue, max, max) - if checkOverflow { - us.assertUnaryOpValError(compute.AbsoluteValue, min, "overflow") - } else { - us.assertUnaryOpVals(compute.AbsoluteValue, min, min) - } - }) - } - }) -} - -func (us *UnaryArithmeticSigned[T]) TestNegate() { - var ( - dt = us.datatype() - min = kernels.MinOf[T]() - max = kernels.MaxOf[T]() - ) - - fn := func(in, exp string) { - us.assertUnaryOp(compute.Negate, in, exp) - } - - us.Run(dt.String(), func() { - for _, checkOverflow := range []bool{true, false} { - us.setOverflowCheck(checkOverflow) - us.Run(fmt.Sprintf("check_overflow=%t", checkOverflow), func() { - fn(`[]`, `[]`) - // scalar/arrays with nulls - fn(`[null]`, `[null]`) - fn(`[1, null, -10]`, `[-1, null, 10]`) - // ordinary scalars/arrays (positive inputs) - fn(`[1, 10, 127]`, `[-1, -10, -127]`) - us.assertUnaryOpVals(compute.Negate, 1, -1) - // ordinary scalars/arrays (negative inputs) - fn(`[-1, -10, -127]`, `[1, 10, 127]`) - us.assertUnaryOpVals(compute.Negate, -1, 1) - // min/max - us.assertUnaryOpVals(compute.Negate, min+1, max) - us.assertUnaryOpVals(compute.Negate, max, min+1) - }) - } - }) -} - -type UnaryArithmeticUnsigned[T arrow.UintType] struct { - UnaryArithmeticIntegral[T] -} - -func (us *UnaryArithmeticUnsigned[T]) TestAbsoluteValue() { - var ( - min, max T = 0, kernels.MaxOf[T]() - ) - - fn := func(in, exp string) { - us.assertUnaryOp(compute.AbsoluteValue, in, exp) - } - - us.Run(us.datatype().String(), func() { - for _, checkOverflow := range []bool{true, false} { - us.setOverflowCheck(checkOverflow) - us.Run(fmt.Sprintf("check_overflow=%t", checkOverflow), func() { - fn(`[]`, `[]`) - fn(`[null]`, `[null]`) - us.assertUnaryOpScalars(compute.AbsoluteValue, us.makeNullScalar(), us.makeNullScalar()) - fn(`[0, 1, 10, 127]`, `[0, 1, 10, 127]`) - us.assertUnaryOpVals(compute.AbsoluteValue, min, min) - us.assertUnaryOpVals(compute.AbsoluteValue, max, max) - }) - } - }) -} - -func (us *UnaryArithmeticUnsigned[T]) TestNegate() { - var ( - dt = us.datatype() - ) - - fn := func(in, exp string) { - us.assertUnaryOp(compute.Negate, in, exp) - } - - us.Run(dt.String(), func() { - us.setOverflowCheck(true) - us.assertUnaryOpNotImplemented(compute.Negate, 1, "no kernel matching input types") - - us.setOverflowCheck(false) - fn(`[]`, `[]`) - fn(`[null]`, `[null]`) - us.assertUnaryOpVals(compute.Negate, 1, ^T(1)+1) - }) -} - -type UnaryArithmeticFloating[T constraints.Float] struct { - UnaryArithmeticSuite[T, compute.ArithmeticOptions] - - min, max T - smallest T -} - -func (us *UnaryArithmeticFloating[T]) setOverflowCheck(v bool) { - us.opts.NoCheckOverflow = !v -} - -func (us *UnaryArithmeticFloating[T]) TestAbsoluteValue() { - fn := func(in, exp string) { - us.assertUnaryOp(compute.AbsoluteValue, in, exp) - } - - us.Run(us.datatype().String(), func() { - for _, checkOverflow := range []bool{true, false} { - us.setOverflowCheck(checkOverflow) - us.Run(fmt.Sprintf("check_overflow=%t", checkOverflow), func() { - fn(`[]`, `[]`) - fn(`[null]`, `[null]`) - fn(`[1.3, null, -10.80]`, `[1.3, null, 10.80]`) - us.assertUnaryOpScalars(compute.AbsoluteValue, us.makeNullScalar(), us.makeNullScalar()) - fn(`[0.0, -0.0]`, `[0.0, 0.0]`) - us.assertUnaryOpVals(compute.AbsoluteValue, T(math.Copysign(0, -1)), 0) - us.assertUnaryOpVals(compute.AbsoluteValue, 0, 0) - fn(`[1.3, 10.80, 12748.001]`, `[1.3, 10.80, 12748.001]`) - us.assertUnaryOpVals(compute.AbsoluteValue, 1.3, 1.3) - fn(`[-1.3, -10.80, -12748.001]`, `[1.3, 10.80, 12748.001]`) - us.assertUnaryOpVals(compute.AbsoluteValue, -1.3, 1.3) - fn(`["Inf", "-Inf"]`, `["Inf", "Inf"]`) - us.assertUnaryOpVals(compute.AbsoluteValue, us.min, us.max) - us.assertUnaryOpVals(compute.AbsoluteValue, us.max, us.max) - }) - } - }) -} - -func (us *UnaryArithmeticFloating[T]) TestNegate() { - var ( - dt = us.datatype() - ) - - fn := func(in, exp string) { - us.assertUnaryOp(compute.Negate, in, exp) - } - - us.Run(dt.String(), func() { - for _, checkOverflow := range []bool{true, false} { - us.setOverflowCheck(checkOverflow) - us.Run(fmt.Sprintf("check_overflow=%t", checkOverflow), func() { - fn(`[]`, `[]`) - // scalar/arrays with nulls - fn(`[null]`, `[null]`) - fn(`[1.5, null, -10.25]`, `[-1.5, null, 10.25]`) - // ordinary scalars/arrays (positive inputs) - fn(`[0.5, 10.123, 127.321]`, `[-0.5, -10.123, -127.321]`) - us.assertUnaryOpVals(compute.Negate, 1.25, -1.25) - // ordinary scalars/arrays (negative inputs) - fn(`[-0.5, -10.123, -127.321]`, `[0.5, 10.123, 127.321]`) - us.assertUnaryOpVals(compute.Negate, -1.25, 1.25) - // min/max - us.assertUnaryOpVals(compute.Negate, us.min, us.max) - us.assertUnaryOpVals(compute.Negate, us.max, us.min) - }) - } - }) -} - -func (us *UnaryArithmeticFloating[T]) TestTrigSin() { - us.setOverflowCheck(false) - us.assertUnaryOp(compute.Sin, `["Inf", "-Inf"]`, `["NaN", "NaN"]`) - for _, overflow := range []bool{false, true} { - us.setOverflowCheck(overflow) - us.assertUnaryOp(compute.Sin, `[]`, `[]`) - us.assertUnaryOp(compute.Sin, `[null, "NaN"]`, `[null, "NaN"]`) - arr := us.makeArray(0, math.Pi/2, math.Pi) - exp := us.makeArray(0, 1, 0) - defer arr.Release() - defer exp.Release() - us.assertUnaryOpArrs(compute.Sin, arr, exp) - } - - us.setOverflowCheck(true) - us.assertUnaryOpErr(compute.Sin, `["Inf", "-Inf"]`, "domain error") -} - -func (us *UnaryArithmeticFloating[T]) TestTrigCos() { - us.setOverflowCheck(false) - us.assertUnaryOp(compute.Cos, `["Inf", "-Inf"]`, `["NaN", "NaN"]`) - for _, overflow := range []bool{false, true} { - us.setOverflowCheck(overflow) - us.assertUnaryOp(compute.Cos, `[]`, `[]`) - us.assertUnaryOp(compute.Cos, `[null, "NaN"]`, `[null, "NaN"]`) - arr := us.makeArray(0, math.Pi/2, math.Pi) - exp := us.makeArray(1, 0, -1) - defer arr.Release() - defer exp.Release() - us.assertUnaryOpArrs(compute.Cos, arr, exp) - } - - us.setOverflowCheck(true) - us.assertUnaryOpErr(compute.Cos, `["Inf", "-Inf"]`, "domain error") -} - -func (us *UnaryArithmeticFloating[T]) TestTrigTan() { - us.setOverflowCheck(false) - us.assertUnaryOp(compute.Tan, `["Inf", "-Inf"]`, `["NaN", "NaN"]`) - for _, overflow := range []bool{false, true} { - us.setOverflowCheck(overflow) - us.assertUnaryOp(compute.Tan, `[]`, `[]`) - us.assertUnaryOp(compute.Tan, `[null, "NaN"]`, `[null, "NaN"]`) - // pi/2 isn't representable exactly -> there are no poles - // (i.e. tan(pi/2) is merely a large value and not +Inf) - arr := us.makeArray(0, math.Pi) - exp := us.makeArray(0, 0) - defer arr.Release() - defer exp.Release() - us.assertUnaryOpArrs(compute.Tan, arr, exp) - } - - us.setOverflowCheck(true) - us.assertUnaryOpErr(compute.Tan, `["Inf", "-Inf"]`, "domain error") -} - -func (us *UnaryArithmeticFloating[T]) TestTrigAsin() { - us.setOverflowCheck(false) - us.assertUnaryOp(compute.Asin, `["Inf", "-Inf", -2, 2]`, `["NaN", "NaN", "NaN", "NaN"]`) - for _, overflow := range []bool{false, true} { - us.setOverflowCheck(overflow) - us.assertUnaryOp(compute.Asin, `[]`, `[]`) - us.assertUnaryOp(compute.Asin, `[null, "NaN"]`, `[null, "NaN"]`) - arr := us.makeArray(0, 1, -1) - exp := us.makeArray(0, math.Pi/2, -math.Pi/2) - defer arr.Release() - defer exp.Release() - us.assertUnaryOpArrs(compute.Asin, arr, exp) - } - - us.setOverflowCheck(true) - us.assertUnaryOpErr(compute.Asin, `["Inf", "-Inf", -2, 2]`, "domain error") -} - -func (us *UnaryArithmeticFloating[T]) TestTrigAcos() { - us.setOverflowCheck(false) - us.assertUnaryOp(compute.Acos, `["Inf", "-Inf", -2, 2]`, `["NaN", "NaN", "NaN", "NaN"]`) - for _, overflow := range []bool{false, true} { - us.setOverflowCheck(overflow) - us.assertUnaryOp(compute.Acos, `[]`, `[]`) - us.assertUnaryOp(compute.Acos, `[null, "NaN"]`, `[null, "NaN"]`) - arr := us.makeArray(0, 1, -1) - exp := us.makeArray(math.Pi/2, 0, math.Pi) - defer arr.Release() - defer exp.Release() - us.assertUnaryOpArrs(compute.Acos, arr, exp) - } - - us.setOverflowCheck(true) - us.assertUnaryOpErr(compute.Acos, `["Inf", "-Inf", -2, 2]`, "domain error") -} - -func (us *UnaryArithmeticFloating[T]) TestTrigAtan() { - us.setOverflowCheck(false) - atan := func(ctx context.Context, _ compute.ArithmeticOptions, arg compute.Datum) (compute.Datum, error) { - return compute.Atan(ctx, arg) - } - us.assertUnaryOp(atan, `[]`, `[]`) - us.assertUnaryOp(atan, `[null, "NaN"]`, `[null, "NaN"]`) - - arr := us.makeArray(0, 1, -1, T(math.Inf(1)), T(math.Inf(-1))) - exp := us.makeArray(0, math.Pi/4, -math.Pi/4, math.Pi/2, -math.Pi/2) - defer arr.Release() - defer exp.Release() - us.assertUnaryOpArrs(atan, arr, exp) -} - -func (us *UnaryArithmeticFloating[T]) TestLog() { - for _, overflow := range []bool{false, true} { - us.setOverflowCheck(overflow) - us.Run(fmt.Sprintf("checked=%t", overflow), func() { - us.assertUnaryOp(compute.Ln, `[1, 2.718281828459045, null, "NaN", "Inf"]`, - `[0, 1, null, "NaN", "Inf"]`) - us.assertUnaryOpVals(compute.Ln, us.smallest, T(math.Log(float64(us.smallest)))) - us.assertUnaryOpVals(compute.Ln, us.max, T(math.Log(float64(us.max)))) - us.assertUnaryOp(compute.Log10, `[1, 10, null, "NaN", "Inf"]`, `[0, 1, null, "NaN", "Inf"]`) - us.assertUnaryOpVals(compute.Log10, us.smallest, T(math.Log10(float64(us.smallest)))) - us.assertUnaryOpVals(compute.Log10, us.max, T(math.Log10(float64(us.max)))) - us.assertUnaryOp(compute.Log2, `[1, 2, null, "NaN", "Inf"]`, `[0, 1, null, "NaN", "Inf"]`) - us.assertUnaryOpVals(compute.Log2, us.smallest, T(math.Log2(float64(us.smallest)))) - us.assertUnaryOpVals(compute.Log2, us.max, T(math.Log2(float64(us.max)))) - us.assertUnaryOp(compute.Log1p, `[0, 1.718281828459045, null, "NaN", "Inf"]`, `[0, 1, null, "NaN", "Inf"]`) - us.assertUnaryOpVals(compute.Log1p, us.smallest, T(math.Log1p(float64(us.smallest)))) - us.assertUnaryOpVals(compute.Log1p, us.max, T(math.Log1p(float64(us.max)))) - }) - } - - us.setOverflowCheck(false) - us.assertUnaryOp(compute.Ln, `["-Inf", -1, 0, "Inf"]`, `["NaN", "NaN", "-Inf", "Inf"]`) - us.assertUnaryOp(compute.Log10, `["-Inf", -1, 0, "Inf"]`, `["NaN", "NaN", "-Inf", "Inf"]`) - us.assertUnaryOp(compute.Log2, `["-Inf", -1, 0, "Inf"]`, `["NaN", "NaN", "-Inf", "Inf"]`) - us.assertUnaryOp(compute.Log1p, `["-Inf", -2, -1, "Inf"]`, `["NaN", "NaN", "-Inf", "Inf"]`) - - us.setOverflowCheck(true) - us.assertUnaryOpErr(compute.Ln, `[0]`, "logarithm of zero") - us.assertUnaryOpErr(compute.Ln, `[-1]`, "logarithm of negative number") - us.assertUnaryOpErr(compute.Ln, `["-Inf"]`, "logarithm of negative number") - us.assertUnaryOpValError(compute.Ln, us.min, "logarithm of negative number") - - us.assertUnaryOpErr(compute.Log10, `[0]`, "logarithm of zero") - us.assertUnaryOpErr(compute.Log10, `[-1]`, "logarithm of negative number") - us.assertUnaryOpErr(compute.Log10, `["-Inf"]`, "logarithm of negative number") - us.assertUnaryOpValError(compute.Log10, us.min, "logarithm of negative number") - - us.assertUnaryOpErr(compute.Log2, `[0]`, "logarithm of zero") - us.assertUnaryOpErr(compute.Log2, `[-1]`, "logarithm of negative number") - us.assertUnaryOpErr(compute.Log2, `["-Inf"]`, "logarithm of negative number") - us.assertUnaryOpValError(compute.Log2, us.min, "logarithm of negative number") - - us.assertUnaryOpErr(compute.Log1p, `[-1]`, "logarithm of zero") - us.assertUnaryOpErr(compute.Log1p, `[-2]`, "logarithm of negative number") - us.assertUnaryOpErr(compute.Log1p, `["-Inf"]`, "logarithm of negative number") - us.assertUnaryOpValError(compute.Log1p, us.min, "logarithm of negative number") -} - -func TestUnaryArithmetic(t *testing.T) { - suite.Run(t, new(UnaryArithmeticSigned[int8])) - suite.Run(t, new(UnaryArithmeticSigned[int16])) - suite.Run(t, new(UnaryArithmeticSigned[int32])) - suite.Run(t, new(UnaryArithmeticSigned[int64])) - suite.Run(t, new(UnaryArithmeticUnsigned[uint8])) - suite.Run(t, new(UnaryArithmeticUnsigned[uint16])) - suite.Run(t, new(UnaryArithmeticUnsigned[uint32])) - suite.Run(t, new(UnaryArithmeticUnsigned[uint64])) - suite.Run(t, &UnaryArithmeticFloating[float32]{min: -math.MaxFloat32, max: math.MaxFloat32, smallest: math.SmallestNonzeroFloat32}) - suite.Run(t, &UnaryArithmeticFloating[float64]{min: -math.MaxFloat64, max: math.MaxFloat64, smallest: math.SmallestNonzeroFloat64}) - suite.Run(t, new(DecimalUnaryArithmeticSuite)) -} - -type BitwiseArithmeticSuite[T arrow.IntType | arrow.UintType] struct { - BinaryFuncTestSuite -} - -func (bs *BitwiseArithmeticSuite[T]) datatype() arrow.DataType { - return arrow.GetDataType[T]() -} - -// to make it easier to test different widths, tests give bytes which -// get repeated to make an array of the actual type -func (bs *BitwiseArithmeticSuite[T]) expandByteArray(values []byte) arrow.Array { - vals := make([]T, len(values)+1) - sz := kernels.SizeOf[T]() - for i, v := range values { - memory.Set(unsafe.Slice((*byte)(unsafe.Pointer(&vals[i])), sz), v) - } - valid := make([]bool, len(vals)) - for i := range values { - valid[i] = true - } - return exec.ArrayFromSliceWithValid(bs.mem, vals, valid) -} - -func (bs *BitwiseArithmeticSuite[T]) assertBinaryOp(fn string, arg0, arg1, expected []byte) { - in0, in1 := bs.expandByteArray(arg0), bs.expandByteArray(arg1) - out := bs.expandByteArray(expected) - defer func() { - in0.Release() - in1.Release() - out.Release() - }() - - actual, err := compute.CallFunction(bs.ctx, fn, nil, &compute.ArrayDatum{in0.Data()}, &compute.ArrayDatum{in1.Data()}) - bs.Require().NoError(err) - defer actual.Release() - assertDatumsEqual(bs.T(), &compute.ArrayDatum{out.Data()}, actual, nil, nil) - - for i := 0; i < out.Len(); i++ { - a0, err := scalar.GetScalar(in0, i) - bs.Require().NoError(err) - a1, err := scalar.GetScalar(in1, i) - bs.Require().NoError(err) - exp, err := scalar.GetScalar(out, i) - bs.Require().NoError(err) - - actual, err := compute.CallFunction(bs.ctx, fn, nil, compute.NewDatum(a0), compute.NewDatum(a1)) - bs.Require().NoError(err) - assertScalarEquals(bs.T(), exp, actual.(*compute.ScalarDatum).Value) - } -} - -func (bs *BitwiseArithmeticSuite[T]) TestBitWiseAnd() { - bs.Run(bs.datatype().String(), func() { - bs.assertBinaryOp("bit_wise_and", []byte{0x00, 0xFF, 0x00, 0xFF}, - []byte{0x00, 0x00, 0xFF, 0xFF}, []byte{0x00, 0x00, 0x00, 0xFF}) - }) -} - -func (bs *BitwiseArithmeticSuite[T]) TestBitWiseOr() { - bs.Run(bs.datatype().String(), func() { - bs.assertBinaryOp("bit_wise_or", []byte{0x00, 0xFF, 0x00, 0xFF}, - []byte{0x00, 0x00, 0xFF, 0xFF}, []byte{0x00, 0xFF, 0xFF, 0xFF}) - }) -} - -func (bs *BitwiseArithmeticSuite[T]) TestBitWiseXor() { - bs.Run(bs.datatype().String(), func() { - bs.assertBinaryOp("bit_wise_xor", []byte{0x00, 0xFF, 0x00, 0xFF}, - []byte{0x00, 0x00, 0xFF, 0xFF}, []byte{0x00, 0xFF, 0xFF, 0x00}) - }) -} - -func TestBitwiseArithmetic(t *testing.T) { - suite.Run(t, new(BitwiseArithmeticSuite[int8])) - suite.Run(t, new(BitwiseArithmeticSuite[uint8])) - suite.Run(t, new(BitwiseArithmeticSuite[int16])) - suite.Run(t, new(BitwiseArithmeticSuite[uint16])) - suite.Run(t, new(BitwiseArithmeticSuite[int32])) - suite.Run(t, new(BitwiseArithmeticSuite[uint32])) - suite.Run(t, new(BitwiseArithmeticSuite[int64])) - suite.Run(t, new(BitwiseArithmeticSuite[uint64])) -} - -var roundModes = []compute.RoundMode{ - compute.RoundDown, - compute.RoundUp, - compute.RoundTowardsZero, - compute.RoundTowardsInfinity, - compute.RoundHalfDown, - compute.RoundHalfUp, - compute.RoundHalfTowardsZero, - compute.RoundHalfTowardsInfinity, - compute.RoundHalfToEven, - compute.RoundHalfToOdd, -} - -type UnaryRoundSuite[T arrow.NumericType] struct { - UnaryArithmeticSuite[T, compute.RoundOptions] -} - -func (us *UnaryRoundSuite[T]) setRoundMode(mode compute.RoundMode) { - us.opts.Mode = mode -} - -func (us *UnaryRoundSuite[T]) setRoundNDigits(v int64) { - us.opts.NDigits = v -} - -type UnaryRoundToMultipleSuite[T arrow.NumericType] struct { - UnaryArithmeticSuite[T, compute.RoundToMultipleOptions] -} - -func (us *UnaryRoundToMultipleSuite[T]) setRoundMode(mode compute.RoundMode) { - us.opts.Mode = mode -} - -func (us *UnaryRoundToMultipleSuite[T]) setRoundMultiple(val float64) { - us.opts.Multiple = scalar.NewFloat64Scalar(val) -} - -type UnaryRoundIntegral[T arrow.IntType | arrow.UintType] struct { - UnaryRoundSuite[T] -} - -type UnaryRoundToMultipleIntegral[T arrow.IntType | arrow.UintType] struct { - UnaryRoundToMultipleSuite[T] -} - -type UnaryRoundSigned[T arrow.IntType] struct { - UnaryRoundIntegral[T] -} - -func (us *UnaryRoundSigned[T]) TestRound() { - values := `[0, 1, -13, -50, 115]` - us.setRoundNDigits(0) - - arr := us.getArr(arrow.PrimitiveTypes.Float64, values) - defer arr.Release() - for _, mode := range roundModes { - us.setRoundMode(mode) - us.assertUnaryOpExpArr(compute.Round, values, arr) - } - - // test different round N-digits for nearest rounding mode - ndigExpected := []struct { - n int64 - exp string - }{ - {-2, `[0, 0, -0.0, -100, 100]`}, - {-1, `[0.0, 0.0, -10, -50, 120]`}, - {0, values}, - {1, values}, - {2, values}, - } - us.setRoundMode(compute.RoundHalfTowardsInfinity) - for _, tt := range ndigExpected { - us.Run(fmt.Sprintf("ndigits=%d", tt.n), func() { - us.setRoundNDigits(tt.n) - arr := us.getArr(arrow.PrimitiveTypes.Float64, tt.exp) - defer arr.Release() - us.assertUnaryOpExpArr(compute.Round, values, arr) - }) - } -} - -type UnaryRoundToMultipleSigned[T arrow.IntType] struct { - UnaryRoundToMultipleIntegral[T] -} - -func (us *UnaryRoundToMultipleSigned[T]) TestRoundToMultiple() { - values := `[0, 1, -13, -50, 115]` - us.setRoundMultiple(1) - for _, mode := range roundModes { - us.setRoundMode(mode) - arr := us.getArr(arrow.PrimitiveTypes.Float64, values) - defer arr.Release() - us.assertUnaryOpExpArr(compute.RoundToMultiple, values, arr) - } - - tests := []struct { - mult float64 - exp string - }{ - {2, `[0.0, 2, -14, -50, 116]`}, - {0.05, `[0.0, 1, -13, -50, 115]`}, - {0.1, values}, - {10, `[0.0, 0.0, -10, -50, 120]`}, - {100, `[0.0, 0.0, -0.0, -100, 100]`}, - } - - us.setRoundMode(compute.RoundHalfTowardsInfinity) - for _, tt := range tests { - us.setRoundMultiple(tt.mult) - arr := us.getArr(arrow.PrimitiveTypes.Float64, tt.exp) - defer arr.Release() - us.assertUnaryOpExpArr(compute.RoundToMultiple, values, arr) - } -} - -type UnaryRoundUnsigned[T arrow.UintType] struct { - UnaryRoundIntegral[T] -} - -func (us *UnaryRoundUnsigned[T]) TestRound() { - values := `[0, 1, 13, 50, 115]` - us.setRoundNDigits(0) - - arr := us.getArr(arrow.PrimitiveTypes.Float64, values) - defer arr.Release() - for _, mode := range roundModes { - us.setRoundMode(mode) - us.assertUnaryOpExpArr(compute.Round, values, arr) - } - - // test different round N-digits for nearest rounding mode - ndigExpected := []struct { - n int64 - exp string - }{ - {-2, `[0, 0, 0, 100, 100]`}, - {-1, `[0.0, 0.0, 10, 50, 120]`}, - {0, values}, - {1, values}, - {2, values}, - } - us.setRoundMode(compute.RoundHalfTowardsInfinity) - for _, tt := range ndigExpected { - us.Run(fmt.Sprintf("ndigits=%d", tt.n), func() { - us.setRoundNDigits(tt.n) - arr := us.getArr(arrow.PrimitiveTypes.Float64, tt.exp) - defer arr.Release() - us.assertUnaryOpExpArr(compute.Round, values, arr) - }) - } -} - -type UnaryRoundToMultipleUnsigned[T arrow.UintType] struct { - UnaryRoundToMultipleIntegral[T] -} - -func (us *UnaryRoundToMultipleUnsigned[T]) TestRoundToMultiple() { - values := `[0, 1, 13, 50, 115]` - us.setRoundMultiple(1) - for _, mode := range roundModes { - us.setRoundMode(mode) - arr := us.getArr(arrow.PrimitiveTypes.Float64, values) - defer arr.Release() - us.assertUnaryOpExpArr(compute.RoundToMultiple, values, arr) - } - - tests := []struct { - mult float64 - exp string - }{ - {0.05, `[0, 1, 13, 50, 115]`}, - {0.1, values}, - {2, `[0, 2, 14, 50, 116]`}, - {10, `[0, 0, 10, 50, 120]`}, - {100, `[0, 0, 0, 100, 100]`}, - } - - us.setRoundMode(compute.RoundHalfTowardsInfinity) - for _, tt := range tests { - us.setRoundMultiple(tt.mult) - arr := us.getArr(arrow.PrimitiveTypes.Float64, tt.exp) - defer arr.Release() - us.assertUnaryOpExpArr(compute.RoundToMultiple, values, arr) - } -} - -type UnaryRoundFloating[T constraints.Float] struct { - UnaryRoundSuite[T] -} - -func (us *UnaryRoundFloating[T]) TestRound() { - values := `[3.2, 3.5, 3.7, 4.5, -3.2, -3.5, -3.7]` - rmodeExpected := []struct { - mode compute.RoundMode - exp string - }{ - {compute.RoundDown, `[3, 3, 3, 4, -4, -4, -4]`}, - {compute.RoundUp, `[4, 4, 4, 5, -3, -3, -3]`}, - {compute.RoundTowardsZero, `[3, 3, 3, 4, -3, -3, -3]`}, - {compute.RoundTowardsInfinity, `[4, 4, 4, 5, -4, -4, -4]`}, - {compute.RoundHalfDown, `[3, 3, 4, 4, -3, -4, -4]`}, - {compute.RoundHalfUp, `[3, 4, 4, 5, -3, -3, -4]`}, - {compute.RoundHalfTowardsZero, `[3, 3, 4, 4, -3, -3, -4]`}, - {compute.RoundHalfToEven, `[3, 4, 4, 4, -3, -4, -4]`}, - {compute.RoundHalfToOdd, `[3, 3, 4, 5, -3, -3, -4]`}, - } - us.setRoundNDigits(0) - for _, tt := range rmodeExpected { - us.Run(tt.mode.String(), func() { - us.setRoundMode(tt.mode) - us.assertUnaryOp(compute.Round, `[]`, `[]`) - us.assertUnaryOp(compute.Round, `[null, 0, "Inf", "-Inf", "NaN"]`, - `[null, 0, "Inf", "-Inf", "NaN"]`) - us.assertUnaryOp(compute.Round, values, tt.exp) - }) - } - - // test different round n-digits for nearest rounding mode - values = `[320, 3.5, 3.075, 4.5, -3.212, -35.1234, -3.045]` - ndigitsExp := []struct { - n int64 - exp string - }{ - {-2, `[300, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0]`}, - {-1, `[320, 0.0, 0.0, 0.0, -0.0, -40, -0.0]`}, - {0, `[320, 4, 3, 5, -3, -35, -3]`}, - {1, `[320, 3.5, 3.1, 4.5, -3.2, -35.1, -3]`}, - {2, `[320, 3.5, 3.08, 4.5, -3.21, -35.12, -3.05]`}, - } - - us.setRoundMode(compute.RoundHalfTowardsInfinity) - for _, tt := range ndigitsExp { - us.Run(fmt.Sprintf("ndigits=%d", tt.n), func() { - us.setRoundNDigits(tt.n) - us.assertUnaryOp(compute.Round, values, tt.exp) - }) - } -} - -type UnaryRoundToMultipleFloating[T constraints.Float] struct { - UnaryRoundToMultipleSuite[T] -} - -func (us *UnaryRoundToMultipleFloating[T]) TestRoundToMultiple() { - values := `[3.2, 3.5, 3.7, 4.5, -3.2, -3.5, -3.7]` - rmodeExpected := []struct { - mode compute.RoundMode - exp string - }{ - {compute.RoundDown, `[3, 3, 3, 4, -4, -4, -4]`}, - {compute.RoundUp, `[4, 4, 4, 5, -3, -3, -3]`}, - {compute.RoundTowardsZero, `[3, 3, 3, 4, -3, -3, -3]`}, - {compute.RoundTowardsInfinity, `[4, 4, 4, 5, -4, -4, -4]`}, - {compute.RoundHalfDown, `[3, 3, 4, 4, -3, -4, -4]`}, - {compute.RoundHalfUp, `[3, 4, 4, 5, -3, -3, -4]`}, - {compute.RoundHalfTowardsZero, `[3, 3, 4, 4, -3, -3, -4]`}, - {compute.RoundHalfToEven, `[3, 4, 4, 4, -3, -4, -4]`}, - {compute.RoundHalfToOdd, `[3, 3, 4, 5, -3, -3, -4]`}, - } - us.setRoundMultiple(1) - for _, tt := range rmodeExpected { - us.Run(tt.mode.String(), func() { - us.setRoundMode(tt.mode) - us.assertUnaryOp(compute.RoundToMultiple, `[]`, `[]`) - us.assertUnaryOp(compute.RoundToMultiple, `[null, 0, "Inf", "-Inf", "NaN"]`, - `[null, 0, "Inf", "-Inf", "NaN"]`) - us.assertUnaryOp(compute.RoundToMultiple, values, tt.exp) - }) - } - - // test different round n-digits for nearest rounding mode - values = `[320, 3.5, 3.075, 4.5, -3.212, -35.1234, -3.045]` - multAndExp := []struct { - mult float64 - exp string - }{ - {0.05, `[320, 3.5, 3.1, 4.5, -3.2, -35.1, -3.05]`}, - {0.1, `[320, 3.5, 3.1, 4.5, -3.2, -35.1, -3]`}, - {2, `[320, 4, 4, 4, -4, -36, -4]`}, - {10, `[320, 0.0, 0.0, 0.0, -0.0, -40, -0.0]`}, - {100, `[300, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0]`}, - } - - us.setRoundMode(compute.RoundHalfTowardsInfinity) - for _, tt := range multAndExp { - us.Run(fmt.Sprintf("multiple=%f", tt.mult), func() { - us.setRoundMultiple(tt.mult) - us.assertUnaryOp(compute.RoundToMultiple, values, tt.exp) - }) - } -} - -func TestRounding(t *testing.T) { - suite.Run(t, new(UnaryRoundSigned[int8])) - suite.Run(t, new(UnaryRoundSigned[int16])) - suite.Run(t, new(UnaryRoundSigned[int32])) - suite.Run(t, new(UnaryRoundSigned[int64])) - suite.Run(t, new(UnaryRoundUnsigned[uint8])) - suite.Run(t, new(UnaryRoundUnsigned[uint16])) - suite.Run(t, new(UnaryRoundUnsigned[uint32])) - suite.Run(t, new(UnaryRoundUnsigned[uint64])) - suite.Run(t, new(UnaryRoundFloating[float32])) - suite.Run(t, new(UnaryRoundFloating[float64])) - - suite.Run(t, new(UnaryRoundToMultipleSigned[int8])) - suite.Run(t, new(UnaryRoundToMultipleSigned[int16])) - suite.Run(t, new(UnaryRoundToMultipleSigned[int32])) - suite.Run(t, new(UnaryRoundToMultipleSigned[int64])) - suite.Run(t, new(UnaryRoundToMultipleUnsigned[uint8])) - suite.Run(t, new(UnaryRoundToMultipleUnsigned[uint16])) - suite.Run(t, new(UnaryRoundToMultipleUnsigned[uint32])) - suite.Run(t, new(UnaryRoundToMultipleUnsigned[uint64])) - suite.Run(t, new(UnaryRoundToMultipleFloating[float32])) - suite.Run(t, new(UnaryRoundToMultipleFloating[float64])) -} - -const seed = 0x94378165 - -type binaryOp = func(ctx context.Context, left, right compute.Datum) (compute.Datum, error) - -func Add(ctx context.Context, left, right compute.Datum) (compute.Datum, error) { - var opts compute.ArithmeticOptions - return compute.Add(ctx, opts, left, right) -} - -func Subtract(ctx context.Context, left, right compute.Datum) (compute.Datum, error) { - var opts compute.ArithmeticOptions - return compute.Subtract(ctx, opts, left, right) -} - -func AddUnchecked(ctx context.Context, left, right compute.Datum) (compute.Datum, error) { - opts := compute.ArithmeticOptions{NoCheckOverflow: true} - return compute.Add(ctx, opts, left, right) -} - -func SubtractUnchecked(ctx context.Context, left, right compute.Datum) (compute.Datum, error) { - opts := compute.ArithmeticOptions{NoCheckOverflow: true} - return compute.Subtract(ctx, opts, left, right) -} - -func arrayScalarKernel(b *testing.B, sz int, nullProp float64, op binaryOp, dt arrow.DataType) { - b.Run("array scalar", func(b *testing.B) { - var ( - mem = memory.NewCheckedAllocator(memory.DefaultAllocator) - arraySize = int64(sz / dt.(arrow.FixedWidthDataType).Bytes()) - min int64 = 6 - max = min + 15 - sc, _ = scalar.MakeScalarParam(6, dt) - rhs compute.Datum = &compute.ScalarDatum{Value: sc} - rng = gen.NewRandomArrayGenerator(seed, mem) - ) - - lhs := rng.Numeric(dt.ID(), arraySize, min, max, nullProp) - b.Cleanup(func() { - lhs.Release() - }) - - var ( - res compute.Datum - err error - ctx = context.Background() - left = &compute.ArrayDatum{Value: lhs.Data()} - ) - - b.SetBytes(arraySize) - b.ResetTimer() - for n := 0; n < b.N; n++ { - res, err = op(ctx, left, rhs) - b.StopTimer() - if err != nil { - b.Fatal(err) - } - res.Release() - b.StartTimer() - } - }) -} - -func arrayArrayKernel(b *testing.B, sz int, nullProp float64, op binaryOp, dt arrow.DataType) { - b.Run("array array", func(b *testing.B) { - var ( - mem = memory.NewCheckedAllocator(memory.DefaultAllocator) - arraySize = int64(sz / dt.(arrow.FixedWidthDataType).Bytes()) - rmin int64 = 1 - rmax = rmin + 6 // 7 - lmin = rmax + 1 // 8 - lmax = lmin + 6 // 14 - rng = gen.NewRandomArrayGenerator(seed, mem) - ) - - lhs := rng.Numeric(dt.ID(), arraySize, lmin, lmax, nullProp) - rhs := rng.Numeric(dt.ID(), arraySize, rmin, rmax, nullProp) - b.Cleanup(func() { - lhs.Release() - rhs.Release() - }) - var ( - res compute.Datum - err error - ctx = context.Background() - left = &compute.ArrayDatum{Value: lhs.Data()} - right = &compute.ArrayDatum{Value: rhs.Data()} - ) - - b.SetBytes(arraySize) - b.ResetTimer() - for n := 0; n < b.N; n++ { - res, err = op(ctx, left, right) - b.StopTimer() - if err != nil { - b.Fatal(err) - } - res.Release() - b.StartTimer() - } - }) -} - -func BenchmarkScalarArithmetic(b *testing.B) { - args := []struct { - sz int - nullProb float64 - }{ - {CpuCacheSizes[2], 0}, - {CpuCacheSizes[2], 0.5}, - {CpuCacheSizes[2], 1}, - } - - testfns := []struct { - name string - op binaryOp - }{ - {"Add", Add}, - {"AddUnchecked", AddUnchecked}, - {"Subtract", Subtract}, - {"SubtractUnchecked", SubtractUnchecked}, - } - - for _, dt := range numericTypes { - b.Run(dt.String(), func(b *testing.B) { - for _, benchArgs := range args { - b.Run(fmt.Sprintf("sz=%d/nullprob=%.2f", benchArgs.sz, benchArgs.nullProb), func(b *testing.B) { - for _, tfn := range testfns { - b.Run(tfn.name, func(b *testing.B) { - arrayArrayKernel(b, benchArgs.sz, benchArgs.nullProb, tfn.op, dt) - arrayScalarKernel(b, benchArgs.sz, benchArgs.nullProb, tfn.op, dt) - }) - } - }) - } - }) - } -} diff --git a/go/arrow/compute/cast.go b/go/arrow/compute/cast.go deleted file mode 100644 index 6ef6fdddd16ff..0000000000000 --- a/go/arrow/compute/cast.go +++ /dev/null @@ -1,587 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package compute - -import ( - "context" - "fmt" - "sync" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/compute/exec" - "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" -) - -var ( - castTable map[arrow.Type]*castFunction - castInit sync.Once - - castDoc = FunctionDoc{ - Summary: "cast values to another data type", - Description: "Behavior when values wouldn't fit in the target type\ncan be controlled through CastOptions.", - ArgNames: []string{"input"}, - OptionsType: "CastOptions", - OptionsRequired: true, - } - castMetaFunc = NewMetaFunction("cast", Unary(), castDoc, - func(ctx context.Context, fo FunctionOptions, d ...Datum) (Datum, error) { - castOpts := fo.(*CastOptions) - if castOpts == nil || castOpts.ToType == nil { - return nil, fmt.Errorf("%w: cast requires that options be passed with a ToType", arrow.ErrInvalid) - } - - if arrow.TypeEqual(d[0].(ArrayLikeDatum).Type(), castOpts.ToType) { - return NewDatum(d[0]), nil - } - - fn, err := getCastFunction(castOpts.ToType) - if err != nil { - return nil, fmt.Errorf("%w from %s", err, d[0].(ArrayLikeDatum).Type()) - } - - return fn.Execute(ctx, fo, d...) - }) -) - -func RegisterScalarCast(reg FunctionRegistry) { - reg.AddFunction(castMetaFunc, false) -} - -type castFunction struct { - ScalarFunction - - inIDs []arrow.Type - out arrow.Type -} - -func newCastFunction(name string, outType arrow.Type) *castFunction { - return &castFunction{ - ScalarFunction: *NewScalarFunction(name, Unary(), EmptyFuncDoc), - out: outType, - inIDs: make([]arrow.Type, 0, 1), - } -} - -func (cf *castFunction) AddTypeCast(in arrow.Type, kernel exec.ScalarKernel) error { - kernel.Init = exec.OptionsInit[kernels.CastState] - if err := cf.AddKernel(kernel); err != nil { - return err - } - cf.inIDs = append(cf.inIDs, in) - return nil -} - -func (cf *castFunction) AddNewTypeCast(inID arrow.Type, inTypes []exec.InputType, out exec.OutputType, - ex exec.ArrayKernelExec, nullHandle exec.NullHandling, memAlloc exec.MemAlloc) error { - - kn := exec.NewScalarKernel(inTypes, out, ex, nil) - kn.NullHandling = nullHandle - kn.MemAlloc = memAlloc - return cf.AddTypeCast(inID, kn) -} - -func (cf *castFunction) DispatchExact(vals ...arrow.DataType) (exec.Kernel, error) { - if err := cf.checkArity(len(vals)); err != nil { - return nil, err - } - - candidates := make([]*exec.ScalarKernel, 0, 1) - for i := range cf.kernels { - if cf.kernels[i].Signature.MatchesInputs(vals) { - candidates = append(candidates, &cf.kernels[i]) - } - } - - if len(candidates) == 0 { - return nil, fmt.Errorf("%w: unsupported cast from %s to %s using function %s", - arrow.ErrNotImplemented, vals[0], cf.out, cf.name) - } - - if len(candidates) == 1 { - // one match! - return candidates[0], nil - } - - // in this situation we may have both an EXACT type and - // a SAME_TYPE_ID match. So we will see if there is an exact - // match among the candidates and if not, we just return the - // first one - for _, k := range candidates { - arg0 := k.Signature.InputTypes[0] - if arg0.Kind == exec.InputExact { - // found one! - return k, nil - } - } - - // just return some kernel that matches since we didn't find an exact - return candidates[0], nil -} - -func unpackDictionary(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { - var ( - dictArr = batch.Values[0].Array.MakeArray().(*array.Dictionary) - opts = ctx.State.(kernels.CastState) - dictType = dictArr.DataType().(*arrow.DictionaryType) - toType = opts.ToType - ) - defer dictArr.Release() - - if !arrow.TypeEqual(toType, dictType) && !CanCast(dictType, toType) { - return fmt.Errorf("%w: cast type %s incompatible with dictionary type %s", - arrow.ErrInvalid, toType, dictType) - } - - unpacked, err := TakeArray(ctx.Ctx, dictArr.Dictionary(), dictArr.Indices()) - if err != nil { - return err - } - defer unpacked.Release() - - if !arrow.TypeEqual(dictType, toType) { - unpacked, err = CastArray(ctx.Ctx, unpacked, &opts) - if err != nil { - return err - } - defer unpacked.Release() - } - - out.TakeOwnership(unpacked.Data()) - return nil -} - -func CastFromExtension(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { - opts := ctx.State.(kernels.CastState) - - arr := batch.Values[0].Array.MakeArray().(array.ExtensionArray) - defer arr.Release() - - castOpts := CastOptions(opts) - result, err := CastArray(ctx.Ctx, arr.Storage(), &castOpts) - if err != nil { - return err - } - defer result.Release() - - out.TakeOwnership(result.Data()) - return nil -} - -func CastList[SrcOffsetT, DestOffsetT int32 | int64](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { - var ( - opts = ctx.State.(kernels.CastState) - childType = out.Type.(arrow.NestedType).Fields()[0].Type - input = &batch.Values[0].Array - offsets = exec.GetSpanOffsets[SrcOffsetT](input, 1) - isDowncast = kernels.SizeOf[SrcOffsetT]() > kernels.SizeOf[DestOffsetT]() - ) - - out.Buffers[0] = input.Buffers[0] - out.Buffers[1] = input.Buffers[1] - - if input.Offset != 0 && len(input.Buffers[0].Buf) > 0 { - out.Buffers[0].WrapBuffer(ctx.AllocateBitmap(input.Len)) - bitutil.CopyBitmap(input.Buffers[0].Buf, int(input.Offset), int(input.Len), - out.Buffers[0].Buf, 0) - } - - // Handle list offsets - // Several cases possible: - // - The source offset is non-zero, in which case we slice the - // underlying values and shift the list offsets (regardless of - // their respective types) - // - the source offset is zero but the source and destination types - // have different list offset types, in which case we cast the offsets - // - otherwise we simply keep the original offsets - if isDowncast { - if offsets[input.Len] > SrcOffsetT(kernels.MaxOf[DestOffsetT]()) { - return fmt.Errorf("%w: array of type %s too large to convert to %s", - arrow.ErrInvalid, input.Type, out.Type) - } - } - - values := input.Children[0].MakeArray() - defer values.Release() - - if input.Offset != 0 { - out.Buffers[1].WrapBuffer( - ctx.Allocate(out.Type.(arrow.OffsetsDataType). - OffsetTypeTraits().BytesRequired(int(input.Len) + 1))) - - shiftedOffsets := exec.GetSpanOffsets[DestOffsetT](out, 1) - for i := 0; i < int(input.Len)+1; i++ { - shiftedOffsets[i] = DestOffsetT(offsets[i] - offsets[0]) - } - - values = array.NewSlice(values, int64(offsets[0]), int64(offsets[input.Len])) - defer values.Release() - } else if kernels.SizeOf[SrcOffsetT]() != kernels.SizeOf[DestOffsetT]() { - out.Buffers[1].WrapBuffer(ctx.Allocate(out.Type.(arrow.OffsetsDataType). - OffsetTypeTraits().BytesRequired(int(input.Len) + 1))) - - kernels.DoStaticCast(exec.GetSpanOffsets[SrcOffsetT](input, 1), - exec.GetSpanOffsets[DestOffsetT](out, 1)) - } - - // handle values - opts.ToType = childType - - castedValues, err := CastArray(ctx.Ctx, values, &opts) - if err != nil { - return err - } - defer castedValues.Release() - - out.Children = make([]exec.ArraySpan, 1) - out.Children[0].SetMembers(castedValues.Data()) - for i, b := range out.Children[0].Buffers { - if b.Owner != nil && b.Owner != values.Data().Buffers()[i] { - b.Owner.Retain() - b.SelfAlloc = true - } - } - return nil -} - -func CastStruct(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { - var ( - opts = ctx.State.(kernels.CastState) - inType = batch.Values[0].Array.Type.(*arrow.StructType) - outType = out.Type.(*arrow.StructType) - inFieldCount = inType.NumFields() - outFieldCount = outType.NumFields() - ) - - fieldsToSelect := make([]int, outFieldCount) - for i := range fieldsToSelect { - fieldsToSelect[i] = -1 - } - - outFieldIndex := 0 - for inFieldIndex := 0; inFieldIndex < inFieldCount && outFieldIndex < outFieldCount; inFieldIndex++ { - inField := inType.Field(inFieldIndex) - outField := outType.Field(outFieldIndex) - if inField.Name == outField.Name { - if inField.Nullable && !outField.Nullable { - return fmt.Errorf("%w: cannot cast nullable field to non-nullable field: %s %s", - arrow.ErrType, inType, outType) - } - fieldsToSelect[outFieldIndex] = inFieldIndex - outFieldIndex++ - } - } - - if outFieldIndex < outFieldCount { - return fmt.Errorf("%w: struct fields don't match or are in the wrong order: Input: %s Output: %s", - arrow.ErrType, inType, outType) - } - - input := &batch.Values[0].Array - if len(input.Buffers[0].Buf) > 0 { - out.Buffers[0].WrapBuffer(ctx.AllocateBitmap(input.Len)) - bitutil.CopyBitmap(input.Buffers[0].Buf, int(input.Offset), int(input.Len), - out.Buffers[0].Buf, 0) - } - - out.Children = make([]exec.ArraySpan, outFieldCount) - for outFieldIndex, idx := range fieldsToSelect { - values := input.Children[idx].MakeArray() - defer values.Release() - values = array.NewSlice(values, input.Offset, input.Len) - defer values.Release() - - opts.ToType = outType.Field(outFieldIndex).Type - castedValues, err := CastArray(ctx.Ctx, values, &opts) - if err != nil { - return err - } - defer castedValues.Release() - - out.Children[outFieldIndex].TakeOwnership(castedValues.Data()) - } - return nil -} - -func addListCast[SrcOffsetT, DestOffsetT int32 | int64](fn *castFunction, inType arrow.Type) error { - kernel := exec.NewScalarKernel([]exec.InputType{exec.NewIDInput(inType)}, - kernels.OutputTargetType, CastList[SrcOffsetT, DestOffsetT], nil) - kernel.NullHandling = exec.NullComputedNoPrealloc - kernel.MemAlloc = exec.MemNoPrealloc - return fn.AddTypeCast(inType, kernel) -} - -func addStructToStructCast(fn *castFunction) error { - kernel := exec.NewScalarKernel([]exec.InputType{exec.NewIDInput(arrow.STRUCT)}, - kernels.OutputTargetType, CastStruct, nil) - kernel.NullHandling = exec.NullComputedNoPrealloc - return fn.AddTypeCast(arrow.STRUCT, kernel) -} - -func addCastFuncs(fn []*castFunction) { - for _, f := range fn { - f.AddNewTypeCast(arrow.EXTENSION, []exec.InputType{exec.NewIDInput(arrow.EXTENSION)}, - f.kernels[0].Signature.OutType, CastFromExtension, - exec.NullComputedNoPrealloc, exec.MemNoPrealloc) - castTable[f.out] = f - } -} - -func initCastTable() { - castTable = make(map[arrow.Type]*castFunction) - addCastFuncs(getBooleanCasts()) - addCastFuncs(getNumericCasts()) - addCastFuncs(getBinaryLikeCasts()) - addCastFuncs(getTemporalCasts()) - addCastFuncs(getNestedCasts()) - - nullToExt := newCastFunction("cast_extension", arrow.EXTENSION) - nullToExt.AddNewTypeCast(arrow.NULL, []exec.InputType{exec.NewExactInput(arrow.Null)}, - kernels.OutputTargetType, kernels.CastFromNull, exec.NullComputedNoPrealloc, exec.MemNoPrealloc) - castTable[arrow.EXTENSION] = nullToExt -} - -func getCastFunction(to arrow.DataType) (*castFunction, error) { - castInit.Do(initCastTable) - - fn, ok := castTable[to.ID()] - if ok { - return fn, nil - } - - return nil, fmt.Errorf("%w: unsupported cast to %s", arrow.ErrNotImplemented, to) -} - -func getNestedCasts() []*castFunction { - out := make([]*castFunction, 0) - - addKernels := func(fn *castFunction, kernels []exec.ScalarKernel) { - for _, k := range kernels { - if err := fn.AddTypeCast(k.Signature.InputTypes[0].MatchID(), k); err != nil { - panic(err) - } - } - } - - castLists := newCastFunction("cast_list", arrow.LIST) - addKernels(castLists, kernels.GetCommonCastKernels(arrow.LIST, kernels.OutputTargetType)) - if err := addListCast[int32, int32](castLists, arrow.LIST); err != nil { - panic(err) - } - if err := addListCast[int64, int32](castLists, arrow.LARGE_LIST); err != nil { - panic(err) - } - out = append(out, castLists) - - castLargeLists := newCastFunction("cast_large_list", arrow.LARGE_LIST) - addKernels(castLargeLists, kernels.GetCommonCastKernels(arrow.LARGE_LIST, kernels.OutputTargetType)) - if err := addListCast[int32, int64](castLargeLists, arrow.LIST); err != nil { - panic(err) - } - if err := addListCast[int64, int64](castLargeLists, arrow.LARGE_LIST); err != nil { - panic(err) - } - out = append(out, castLargeLists) - - castFsl := newCastFunction("cast_fixed_size_list", arrow.FIXED_SIZE_LIST) - addKernels(castFsl, kernels.GetCommonCastKernels(arrow.FIXED_SIZE_LIST, kernels.OutputTargetType)) - out = append(out, castFsl) - - castStruct := newCastFunction("cast_struct", arrow.STRUCT) - addKernels(castStruct, kernels.GetCommonCastKernels(arrow.STRUCT, kernels.OutputTargetType)) - if err := addStructToStructCast(castStruct); err != nil { - panic(err) - } - out = append(out, castStruct) - - return out -} - -func getBooleanCasts() []*castFunction { - fn := newCastFunction("cast_boolean", arrow.BOOL) - kns := kernels.GetBooleanCastKernels() - - for _, k := range kns { - if err := fn.AddTypeCast(k.Signature.InputTypes[0].Type.ID(), k); err != nil { - panic(err) - } - } - - return []*castFunction{fn} -} - -func getTemporalCasts() []*castFunction { - output := make([]*castFunction, 0) - addFn := func(name string, id arrow.Type, kernels []exec.ScalarKernel) { - fn := newCastFunction(name, id) - for _, k := range kernels { - if err := fn.AddTypeCast(k.Signature.InputTypes[0].MatchID(), k); err != nil { - panic(err) - } - } - fn.AddNewTypeCast(arrow.DICTIONARY, []exec.InputType{exec.NewIDInput(arrow.DICTIONARY)}, - kernels[0].Signature.OutType, unpackDictionary, exec.NullComputedNoPrealloc, exec.MemNoPrealloc) - output = append(output, fn) - } - - addFn("cast_timestamp", arrow.TIMESTAMP, kernels.GetTimestampCastKernels()) - addFn("cast_date32", arrow.DATE32, kernels.GetDate32CastKernels()) - addFn("cast_date64", arrow.DATE64, kernels.GetDate64CastKernels()) - addFn("cast_time32", arrow.TIME32, kernels.GetTime32CastKernels()) - addFn("cast_time64", arrow.TIME64, kernels.GetTime64CastKernels()) - addFn("cast_duration", arrow.DURATION, kernels.GetDurationCastKernels()) - addFn("cast_month_day_nano_interval", arrow.INTERVAL_MONTH_DAY_NANO, kernels.GetIntervalCastKernels()) - return output -} - -func getNumericCasts() []*castFunction { - out := make([]*castFunction, 0) - - getFn := func(name string, ty arrow.Type, kns []exec.ScalarKernel) *castFunction { - fn := newCastFunction(name, ty) - for _, k := range kns { - if err := fn.AddTypeCast(k.Signature.InputTypes[0].MatchID(), k); err != nil { - panic(err) - } - } - - fn.AddNewTypeCast(arrow.DICTIONARY, []exec.InputType{exec.NewIDInput(arrow.DICTIONARY)}, - kns[0].Signature.OutType, unpackDictionary, exec.NullComputedNoPrealloc, exec.MemNoPrealloc) - - return fn - } - - out = append(out, getFn("cast_int8", arrow.INT8, kernels.GetCastToInteger[int8](arrow.PrimitiveTypes.Int8))) - out = append(out, getFn("cast_int16", arrow.INT16, kernels.GetCastToInteger[int8](arrow.PrimitiveTypes.Int16))) - - castInt32 := getFn("cast_int32", arrow.INT32, kernels.GetCastToInteger[int32](arrow.PrimitiveTypes.Int32)) - castInt32.AddTypeCast(arrow.DATE32, - kernels.GetZeroCastKernel(arrow.DATE32, - exec.NewExactInput(arrow.FixedWidthTypes.Date32), - exec.NewOutputType(arrow.PrimitiveTypes.Int32))) - castInt32.AddTypeCast(arrow.TIME32, - kernels.GetZeroCastKernel(arrow.TIME32, - exec.NewIDInput(arrow.TIME32), exec.NewOutputType(arrow.PrimitiveTypes.Int32))) - out = append(out, castInt32) - - castInt64 := getFn("cast_int64", arrow.INT64, kernels.GetCastToInteger[int64](arrow.PrimitiveTypes.Int64)) - castInt64.AddTypeCast(arrow.DATE64, - kernels.GetZeroCastKernel(arrow.DATE64, - exec.NewIDInput(arrow.DATE64), - exec.NewOutputType(arrow.PrimitiveTypes.Int64))) - castInt64.AddTypeCast(arrow.TIME64, - kernels.GetZeroCastKernel(arrow.TIME64, - exec.NewIDInput(arrow.TIME64), - exec.NewOutputType(arrow.PrimitiveTypes.Int64))) - castInt64.AddTypeCast(arrow.DURATION, - kernels.GetZeroCastKernel(arrow.DURATION, - exec.NewIDInput(arrow.DURATION), - exec.NewOutputType(arrow.PrimitiveTypes.Int64))) - castInt64.AddTypeCast(arrow.TIMESTAMP, - kernels.GetZeroCastKernel(arrow.TIMESTAMP, - exec.NewIDInput(arrow.TIMESTAMP), - exec.NewOutputType(arrow.PrimitiveTypes.Int64))) - out = append(out, castInt64) - - out = append(out, getFn("cast_uint8", arrow.UINT8, kernels.GetCastToInteger[uint8](arrow.PrimitiveTypes.Uint8))) - out = append(out, getFn("cast_uint16", arrow.UINT16, kernels.GetCastToInteger[uint16](arrow.PrimitiveTypes.Uint16))) - out = append(out, getFn("cast_uint32", arrow.UINT32, kernels.GetCastToInteger[uint32](arrow.PrimitiveTypes.Uint32))) - out = append(out, getFn("cast_uint64", arrow.UINT64, kernels.GetCastToInteger[uint64](arrow.PrimitiveTypes.Uint64))) - - out = append(out, getFn("cast_half_float", arrow.FLOAT16, kernels.GetCommonCastKernels(arrow.FLOAT16, exec.NewOutputType(arrow.FixedWidthTypes.Float16)))) - out = append(out, getFn("cast_float", arrow.FLOAT32, kernels.GetCastToFloating[float32](arrow.PrimitiveTypes.Float32))) - out = append(out, getFn("cast_double", arrow.FLOAT64, kernels.GetCastToFloating[float64](arrow.PrimitiveTypes.Float64))) - - // cast to decimal128 - out = append(out, getFn("cast_decimal", arrow.DECIMAL128, kernels.GetCastToDecimal128())) - // cast to decimal256 - out = append(out, getFn("cast_decimal256", arrow.DECIMAL256, kernels.GetCastToDecimal256())) - return out -} - -func getBinaryLikeCasts() []*castFunction { - out := make([]*castFunction, 0) - - addFn := func(name string, ty arrow.Type, kns []exec.ScalarKernel) { - fn := newCastFunction(name, ty) - for _, k := range kns { - if err := fn.AddTypeCast(k.Signature.InputTypes[0].MatchID(), k); err != nil { - panic(err) - } - } - - fn.AddNewTypeCast(arrow.DICTIONARY, []exec.InputType{exec.NewIDInput(arrow.DICTIONARY)}, - kns[0].Signature.OutType, unpackDictionary, exec.NullComputedNoPrealloc, exec.MemNoPrealloc) - - out = append(out, fn) - } - - addFn("cast_binary", arrow.BINARY, kernels.GetToBinaryKernels(arrow.BinaryTypes.Binary)) - addFn("cast_large_binary", arrow.LARGE_BINARY, kernels.GetToBinaryKernels(arrow.BinaryTypes.LargeBinary)) - addFn("cast_string", arrow.STRING, kernels.GetToBinaryKernels(arrow.BinaryTypes.String)) - addFn("cast_large_string", arrow.LARGE_STRING, kernels.GetToBinaryKernels(arrow.BinaryTypes.LargeString)) - addFn("cast_fixed_sized_binary", arrow.FIXED_SIZE_BINARY, kernels.GetFsbCastKernels()) - return out -} - -// CastDatum is a convenience function for casting a Datum to another type. -// It is equivalent to calling CallFunction(ctx, "cast", opts, Datum) and -// should work for Scalar, Array or ChunkedArray Datums. -func CastDatum(ctx context.Context, val Datum, opts *CastOptions) (Datum, error) { - return CallFunction(ctx, "cast", opts, val) -} - -// CastArray is a convenience function for casting an Array to another type. -// It is equivalent to constructing a Datum for the array and using -// CallFunction(ctx, "cast", ...). -func CastArray(ctx context.Context, val arrow.Array, opts *CastOptions) (arrow.Array, error) { - d := NewDatum(val) - defer d.Release() - - out, err := CastDatum(ctx, d, opts) - if err != nil { - return nil, err - } - - defer out.Release() - return out.(*ArrayDatum).MakeArray(), nil -} - -// CastToType is a convenience function equivalent to calling -// CastArray(ctx, val, compute.SafeCastOptions(toType)) -func CastToType(ctx context.Context, val arrow.Array, toType arrow.DataType) (arrow.Array, error) { - return CastArray(ctx, val, SafeCastOptions(toType)) -} - -// CanCast returns true if there is an implementation for casting an array -// or scalar value from the specified DataType to the other data type. -func CanCast(from, to arrow.DataType) bool { - fn, err := getCastFunction(to) - if err != nil { - return false - } - - for _, id := range fn.inIDs { - if from.ID() == id { - return true - } - } - return false -} diff --git a/go/arrow/compute/cast_test.go b/go/arrow/compute/cast_test.go deleted file mode 100644 index db6098225dda8..0000000000000 --- a/go/arrow/compute/cast_test.go +++ /dev/null @@ -1,2867 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package compute_test - -import ( - "context" - "fmt" - "math" - "strconv" - "strings" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/compute" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/decimal256" - "github.com/apache/arrow/go/v18/arrow/internal/testing/gen" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/arrow/scalar" - "github.com/apache/arrow/go/v18/internal/types" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "github.com/stretchr/testify/suite" -) - -func getScalars(inputs []compute.Datum, idx int) []scalar.Scalar { - out := make([]scalar.Scalar, len(inputs)) - for i, in := range inputs { - if in.Kind() == compute.KindArray { - arr := in.(*compute.ArrayDatum).MakeArray() - defer arr.Release() - out[i], _ = scalar.GetScalar(arr, idx) - } else { - out[i] = in.(*compute.ScalarDatum).Value - } - } - return out -} - -func getDatums[T any](inputs []T) []compute.Datum { - out := make([]compute.Datum, len(inputs)) - for i, in := range inputs { - out[i] = compute.NewDatum(in) - } - return out -} - -func assertArraysEqual(t *testing.T, expected, actual arrow.Array, opts ...array.EqualOption) bool { - return assert.Truef(t, array.ApproxEqual(expected, actual, opts...), "expected: %s\ngot: %s", expected, actual) -} - -func assertDatumsEqual(t *testing.T, expected, actual compute.Datum, opts []array.EqualOption, scalarOpts []scalar.EqualOption) { - require.Equal(t, expected.Kind(), actual.Kind()) - - switch expected.Kind() { - case compute.KindScalar: - want := expected.(*compute.ScalarDatum).Value - got := actual.(*compute.ScalarDatum).Value - assert.Truef(t, scalar.ApproxEquals(want, got, scalarOpts...), "expected: %s\ngot: %s", want, got) - case compute.KindArray: - want := expected.(*compute.ArrayDatum).MakeArray() - got := actual.(*compute.ArrayDatum).MakeArray() - assertArraysEqual(t, want, got, opts...) - want.Release() - got.Release() - case compute.KindChunked: - want := expected.(*compute.ChunkedDatum).Value - got := actual.(*compute.ChunkedDatum).Value - assert.Truef(t, array.ChunkedEqual(want, got), "expected: %s\ngot: %s", want, got) - default: - assert.Truef(t, actual.Equals(expected), "expected: %s\ngot: %s", expected, actual) - } -} - -func checkScalarNonRecursive(t *testing.T, funcName string, inputs []compute.Datum, expected compute.Datum, opts compute.FunctionOptions) { - out, err := compute.CallFunction(context.Background(), funcName, opts, inputs...) - assert.NoError(t, err) - defer out.Release() - assertDatumsEqual(t, expected, out, nil, nil) -} - -func checkScalarWithScalars(t *testing.T, funcName string, inputs []scalar.Scalar, expected scalar.Scalar, opts compute.FunctionOptions) { - datums := getDatums(inputs) - defer func() { - for _, s := range inputs { - if r, ok := s.(scalar.Releasable); ok { - r.Release() - } - } - for _, d := range datums { - d.Release() - } - }() - out, err := compute.CallFunction(context.Background(), funcName, opts, datums...) - assert.NoError(t, err) - defer out.Release() - if !scalar.Equals(out.(*compute.ScalarDatum).Value, expected) { - var b strings.Builder - b.WriteString(funcName + "(") - for i, in := range inputs { - if i != 0 { - b.WriteByte(',') - } - b.WriteString(in.String()) - } - b.WriteByte(')') - b.WriteString(" = " + out.(*compute.ScalarDatum).Value.String()) - b.WriteString(" != " + expected.String()) - - if !arrow.TypeEqual(out.(*compute.ScalarDatum).Type(), expected.DataType()) { - fmt.Fprintf(&b, " (types differed: %s vs %s)", - out.(*compute.ScalarDatum).Type(), expected.DataType()) - } - t.Fatal(b.String()) - } -} - -func checkScalar(t *testing.T, funcName string, inputs []compute.Datum, expected compute.Datum, opts compute.FunctionOptions) { - checkScalarNonRecursive(t, funcName, inputs, expected, opts) - - if expected.Kind() == compute.KindScalar { - return - } - - exp := expected.(*compute.ArrayDatum).MakeArray() - defer exp.Release() - - // check for at least 1 array, and make sure the others are of equal len - hasArray := false - for _, in := range inputs { - if in.Kind() == compute.KindArray { - assert.EqualValues(t, exp.Len(), in.(*compute.ArrayDatum).Len()) - hasArray = true - } - } - - require.True(t, hasArray) - - // check all the input scalars - for i := 0; i < exp.Len(); i++ { - e, _ := scalar.GetScalar(exp, i) - checkScalarWithScalars(t, funcName, getScalars(inputs, i), e, opts) - if r, ok := e.(scalar.Releasable); ok { - r.Release() - } - } -} - -func assertBufferSame(t *testing.T, left, right arrow.Array, idx int) { - assert.Same(t, left.Data().Buffers()[idx], right.Data().Buffers()[idx]) -} - -func checkScalarUnary(t *testing.T, funcName string, input compute.Datum, exp compute.Datum, opt compute.FunctionOptions) { - checkScalar(t, funcName, []compute.Datum{input}, exp, opt) -} - -func checkCast(t *testing.T, input arrow.Array, exp arrow.Array, opts compute.CastOptions) { - opts.ToType = exp.DataType() - in, out := compute.NewDatum(input), compute.NewDatum(exp) - defer in.Release() - defer out.Release() - checkScalarUnary(t, "cast", in, out, &opts) -} - -func checkCastFails(t *testing.T, input arrow.Array, opt compute.CastOptions) { - _, err := compute.CastArray(context.Background(), input, &opt) - assert.ErrorIs(t, err, arrow.ErrInvalid) - - // for scalars, check that at least one of the input fails - // since many of the tests contain a mix of passing and failing values. - // in some cases we will want to check more precisely - nfail := 0 - for i := 0; i < input.Len(); i++ { - sc, _ := scalar.GetScalar(input, i) - if r, ok := sc.(scalar.Releasable); ok { - defer r.Release() - } - d := compute.NewDatum(sc) - defer d.Release() - out, err := compute.CastDatum(context.Background(), d, &opt) - if err != nil { - nfail++ - } else { - out.Release() - } - } - assert.Greater(t, nfail, 0) -} - -func checkCastZeroCopy(t *testing.T, input arrow.Array, toType arrow.DataType, opts *compute.CastOptions) { - opts.ToType = toType - out, err := compute.CastArray(context.Background(), input, opts) - assert.NoError(t, err) - defer out.Release() - - assert.Len(t, out.Data().Buffers(), len(input.Data().Buffers())) - for i := range out.Data().Buffers() { - assertBufferSame(t, out, input, i) - } -} - -var ( - signedIntTypes = []arrow.DataType{ - arrow.PrimitiveTypes.Int8, - arrow.PrimitiveTypes.Int16, - arrow.PrimitiveTypes.Int32, - arrow.PrimitiveTypes.Int64, - } - unsignedIntTypes = []arrow.DataType{ - arrow.PrimitiveTypes.Uint8, - arrow.PrimitiveTypes.Uint16, - arrow.PrimitiveTypes.Uint32, - arrow.PrimitiveTypes.Uint64, - } - integerTypes = append(signedIntTypes, unsignedIntTypes...) - floatingTypes = []arrow.DataType{ - arrow.PrimitiveTypes.Float32, - arrow.PrimitiveTypes.Float64, - } - numericTypes = append(integerTypes, floatingTypes...) - baseBinaryTypes = []arrow.DataType{ - arrow.BinaryTypes.Binary, - arrow.BinaryTypes.LargeBinary, - arrow.BinaryTypes.String, - arrow.BinaryTypes.LargeString, - } - dictIndexTypes = integerTypes -) - -type CastSuite struct { - suite.Suite - - mem *memory.CheckedAllocator -} - -func (c *CastSuite) allocateEmptyBitmap(len int) *memory.Buffer { - buf := memory.NewResizableBuffer(c.mem) - buf.Resize(int(bitutil.BytesForBits(int64(len)))) - return buf -} - -func (c *CastSuite) maskArrayWithNullsAt(input arrow.Array, toMask []int) arrow.Array { - masked := input.Data().(*array.Data).Copy() - defer masked.Release() - if masked.Buffers()[0] != nil { - masked.Buffers()[0].Release() - } - masked.Buffers()[0] = c.allocateEmptyBitmap(input.Len()) - masked.SetNullN(array.UnknownNullCount) - - if original := input.NullBitmapBytes(); len(original) > 0 { - bitutil.CopyBitmap(original, input.Data().Offset(), input.Len(), masked.Buffers()[0].Bytes(), 0) - } else { - bitutil.SetBitsTo(masked.Buffers()[0].Bytes(), 0, int64(input.Len()), true) - } - - for _, i := range toMask { - bitutil.SetBitTo(masked.Buffers()[0].Bytes(), i, false) - } - - return array.MakeFromData(masked) -} - -func (c *CastSuite) invalidUtf8Arr(dt arrow.DataType) arrow.Array { - bldr := array.NewBinaryBuilder(c.mem, dt.(arrow.BinaryDataType)) - defer bldr.Release() - - bldr.AppendValues([][]byte{ - []byte("Hi"), - []byte("olá mundo"), - []byte("你好世界"), - []byte(""), - []byte("\xa0\xa1"), // invalid utf8! - }, nil) - - return bldr.NewArray() -} - -type binaryBuilderAppend interface { - array.Builder - AppendValues([][]byte, []bool) -} - -func (c *CastSuite) fixedSizeInvalidUtf8(dt arrow.DataType) arrow.Array { - var bldr binaryBuilderAppend - if dt.ID() == arrow.FIXED_SIZE_BINARY { - c.Require().Equal(3, dt.(*arrow.FixedSizeBinaryType).ByteWidth) - bldr = array.NewFixedSizeBinaryBuilder(c.mem, dt.(*arrow.FixedSizeBinaryType)) - } else { - bldr = array.NewBinaryBuilder(c.mem, dt.(arrow.BinaryDataType)) - } - - defer bldr.Release() - - bldr.AppendValues([][]byte{ - []byte("Hi!"), - []byte("lá"), - []byte("你"), - []byte(" "), - []byte("\xa0\xa1\xa2"), // invalid utf8! - }, nil) - - return bldr.NewArray() -} - -func (c *CastSuite) SetupTest() { - c.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) -} - -func (c *CastSuite) TearDownTest() { - c.mem.AssertSize(c.T(), 0) -} - -func (c *CastSuite) TestCanCast() { - expectCanCast := func(from arrow.DataType, toSet []arrow.DataType, expected bool) { - for _, to := range toSet { - c.Equalf(expected, compute.CanCast(from, to), "CanCast from: %s, to: %s, expected: %t", - from, to, expected) - } - } - - canCast := func(from arrow.DataType, toSet []arrow.DataType) { - expectCanCast(from, toSet, true) - } - - cannotCast := func(from arrow.DataType, toSet []arrow.DataType) { - expectCanCast(from, toSet, false) - } - - canCast(arrow.Null, []arrow.DataType{arrow.FixedWidthTypes.Boolean}) - canCast(arrow.Null, numericTypes) - canCast(arrow.Null, baseBinaryTypes) - canCast(arrow.Null, []arrow.DataType{ - arrow.FixedWidthTypes.Date32, arrow.FixedWidthTypes.Date64, arrow.FixedWidthTypes.Time32ms, arrow.FixedWidthTypes.Timestamp_s, - }) - cannotCast(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint16, ValueType: arrow.Null}, []arrow.DataType{arrow.Null}) - - canCast(arrow.FixedWidthTypes.Boolean, []arrow.DataType{arrow.FixedWidthTypes.Boolean}) - canCast(arrow.FixedWidthTypes.Boolean, numericTypes) - canCast(arrow.FixedWidthTypes.Boolean, []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString}) - cannotCast(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: arrow.FixedWidthTypes.Boolean}, []arrow.DataType{arrow.FixedWidthTypes.Boolean}) - - cannotCast(arrow.FixedWidthTypes.Boolean, []arrow.DataType{arrow.Null}) - cannotCast(arrow.FixedWidthTypes.Boolean, []arrow.DataType{arrow.BinaryTypes.Binary, arrow.BinaryTypes.LargeBinary}) - cannotCast(arrow.FixedWidthTypes.Boolean, []arrow.DataType{ - arrow.FixedWidthTypes.Date32, arrow.FixedWidthTypes.Date64, arrow.FixedWidthTypes.Time32ms, arrow.FixedWidthTypes.Timestamp_s}) - - for _, from := range numericTypes { - canCast(from, []arrow.DataType{arrow.FixedWidthTypes.Boolean}) - canCast(from, numericTypes) - canCast(from, []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString}) - canCast(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: from}, []arrow.DataType{from}) - - cannotCast(from, []arrow.DataType{arrow.Null}) - } - - for _, from := range baseBinaryTypes { - canCast(from, []arrow.DataType{arrow.FixedWidthTypes.Boolean}) - canCast(from, numericTypes) - canCast(from, baseBinaryTypes) - canCast(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int64, ValueType: from}, []arrow.DataType{from}) - - // any cast which is valid for the dictionary is valid for the dictionary array - canCast(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint32, ValueType: from}, baseBinaryTypes) - canCast(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int16, ValueType: from}, baseBinaryTypes) - - cannotCast(from, []arrow.DataType{arrow.Null}) - } - - canCast(arrow.BinaryTypes.String, []arrow.DataType{arrow.FixedWidthTypes.Timestamp_ms}) - canCast(arrow.BinaryTypes.LargeString, []arrow.DataType{arrow.FixedWidthTypes.Timestamp_ns}) - // no formatting supported - cannotCast(arrow.FixedWidthTypes.Timestamp_us, []arrow.DataType{arrow.BinaryTypes.Binary, arrow.BinaryTypes.LargeBinary}) - - canCast(&arrow.FixedSizeBinaryType{ByteWidth: 3}, []arrow.DataType{ - arrow.BinaryTypes.Binary, arrow.BinaryTypes.LargeBinary, arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString, - &arrow.FixedSizeBinaryType{ByteWidth: 3}}) - - arrow.RegisterExtensionType(types.NewSmallintType()) - defer arrow.UnregisterExtensionType("smallint") - canCast(types.NewSmallintType(), []arrow.DataType{arrow.PrimitiveTypes.Int16}) - canCast(types.NewSmallintType(), numericTypes) // any cast which is valid for storage is supported - canCast(arrow.Null, []arrow.DataType{types.NewSmallintType()}) - - canCast(arrow.FixedWidthTypes.Date32, []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString}) - canCast(arrow.FixedWidthTypes.Date64, []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString}) - canCast(arrow.FixedWidthTypes.Timestamp_ns, []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString}) - canCast(arrow.FixedWidthTypes.Timestamp_us, []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString}) - canCast(arrow.FixedWidthTypes.Time32ms, []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString}) - canCast(arrow.FixedWidthTypes.Time64ns, []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString}) -} - -func (c *CastSuite) checkCastFails(dt arrow.DataType, input string, opts *compute.CastOptions) { - inArr, _, _ := array.FromJSON(c.mem, dt, strings.NewReader(input), array.WithUseNumber()) - defer inArr.Release() - - checkCastFails(c.T(), inArr, *opts) -} - -func (c *CastSuite) checkCastOpts(dtIn, dtOut arrow.DataType, inJSON, outJSON string, opts compute.CastOptions) { - inArr, _, _ := array.FromJSON(c.mem, dtIn, strings.NewReader(inJSON), array.WithUseNumber()) - outArr, _, _ := array.FromJSON(c.mem, dtOut, strings.NewReader(outJSON), array.WithUseNumber()) - defer inArr.Release() - defer outArr.Release() - - checkCast(c.T(), inArr, outArr, opts) -} - -func (c *CastSuite) checkCast(dtIn, dtOut arrow.DataType, inJSON, outJSON string) { - c.checkCastOpts(dtIn, dtOut, inJSON, outJSON, *compute.DefaultCastOptions(true)) -} - -func (c *CastSuite) checkCastArr(in arrow.Array, dtOut arrow.DataType, json string, opts compute.CastOptions) { - outArr, _, _ := array.FromJSON(c.mem, dtOut, strings.NewReader(json), array.WithUseNumber()) - defer outArr.Release() - checkCast(c.T(), in, outArr, opts) -} - -func (c *CastSuite) checkCastExp(dtIn arrow.DataType, inJSON string, exp arrow.Array) { - inArr, _, _ := array.FromJSON(c.mem, dtIn, strings.NewReader(inJSON), array.WithUseNumber()) - defer inArr.Release() - checkCast(c.T(), inArr, exp, *compute.DefaultCastOptions(true)) -} - -func (c *CastSuite) TestNumericToBool() { - for _, dt := range numericTypes { - c.checkCast(dt, arrow.FixedWidthTypes.Boolean, - `[0, null, 127, 1, 0]`, `[false, null, true, true, false]`) - } - - // check negative numbers - for _, dt := range []arrow.DataType{arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Float64} { - c.checkCast(dt, arrow.FixedWidthTypes.Boolean, - `[0, null, 127, -1, 0]`, `[false, null, true, true, false]`) - } -} - -func (c *CastSuite) StringToBool() { - for _, dt := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - c.checkCast(dt, arrow.FixedWidthTypes.Boolean, - `["False", null, "true", "True", "false"]`, `[false, null, true, true, false]`) - - c.checkCast(dt, arrow.FixedWidthTypes.Boolean, - `["0", null, "1", "1", "0"]`, `[false, null, true, true, false]`) - - opts := compute.NewCastOptions(arrow.FixedWidthTypes.Boolean, true) - c.checkCastFails(dt, `["false "]`, opts) - c.checkCastFails(dt, `["T"]`, opts) - } -} - -func (c *CastSuite) TestToIntUpcast() { - c.checkCast(arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Int32, - `[0, null, 127, -1, 0]`, `[0, null, 127, -1, 0]`) - - c.checkCast(arrow.PrimitiveTypes.Uint8, arrow.PrimitiveTypes.Int16, - `[0, 100, 200, 255, 0]`, `[0, 100, 200, 255, 0]`) -} - -func (c *CastSuite) TestToIntDowncastSafe() { - // int16 to uint8 no overflow/underflow - c.checkCast(arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Uint8, - `[0, null, 200, 1, 2]`, `[0, null, 200, 1, 2]`) - - // int16 to uint8, overflow - c.checkCastFails(arrow.PrimitiveTypes.Int16, `[0, null, 256, 0, 0]`, - compute.NewCastOptions(arrow.PrimitiveTypes.Uint8, true)) - // and underflow - c.checkCastFails(arrow.PrimitiveTypes.Int16, `[0, null, -1, 0, 0]`, - compute.NewCastOptions(arrow.PrimitiveTypes.Uint8, true)) - - // int32 to int16, no overflow/underflow - c.checkCast(arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int16, - `[0, null, 2000, 1, 2]`, `[0, null, 2000, 1, 2]`) - - // int32 to int16, overflow - c.checkCastFails(arrow.PrimitiveTypes.Int32, `[0, null, 2000, 70000, 2]`, - compute.NewCastOptions(arrow.PrimitiveTypes.Int16, true)) - - // and underflow - c.checkCastFails(arrow.PrimitiveTypes.Int32, `[0, null, 2000, -70000, 2]`, - compute.NewCastOptions(arrow.PrimitiveTypes.Int16, true)) - - c.checkCastFails(arrow.PrimitiveTypes.Int32, `[0, null, 2000, -70000, 2]`, - compute.NewCastOptions(arrow.PrimitiveTypes.Uint8, true)) - -} - -func (c *CastSuite) TestIntegerSignedToUnsigned() { - i32s, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[-2147483648, null, -1, 65535, 2147483647]`)) - defer i32s.Release() - - // same width - checkCastFails(c.T(), i32s, *compute.NewCastOptions(arrow.PrimitiveTypes.Uint32, true)) - // wider - checkCastFails(c.T(), i32s, *compute.NewCastOptions(arrow.PrimitiveTypes.Uint64, true)) - // narrower - checkCastFails(c.T(), i32s, *compute.NewCastOptions(arrow.PrimitiveTypes.Uint16, true)) - - var options compute.CastOptions - options.AllowIntOverflow = true - - u32s, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Uint32, - strings.NewReader(`[2147483648, null, 4294967295, 65535, 2147483647]`)) - defer u32s.Release() - checkCast(c.T(), i32s, u32s, options) - - u64s, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Uint64, - strings.NewReader(`[18446744071562067968, null, 18446744073709551615, 65535, 2147483647]`), - array.WithUseNumber()) // have to use WithUseNumber so it doesn't lose precision converting to float64 - defer u64s.Release() - checkCast(c.T(), i32s, u64s, options) - - // fail because of overflow, instead of underflow - i32s, _, _ = array.FromJSON(c.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[0, null, 0, 65536, 2147483647]`)) - defer i32s.Release() - checkCastFails(c.T(), i32s, *compute.NewCastOptions(arrow.PrimitiveTypes.Uint16, true)) - - u16s, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Uint16, strings.NewReader(`[0, null, 0, 0, 65535]`)) - defer u16s.Release() - checkCast(c.T(), i32s, u16s, options) -} - -func (c *CastSuite) TestIntegerUnsignedToSigned() { - u32s, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Uint32, strings.NewReader(`[4294967295, null, 0, 32768]`)) - defer u32s.Release() - // same width - checkCastFails(c.T(), u32s, *compute.SafeCastOptions(arrow.PrimitiveTypes.Int32)) - - // narrower - checkCastFails(c.T(), u32s, *compute.SafeCastOptions(arrow.PrimitiveTypes.Int16)) - sl := array.NewSlice(u32s, 1, int64(u32s.Len())) - defer sl.Release() - checkCastFails(c.T(), sl, *compute.SafeCastOptions(arrow.PrimitiveTypes.Int16)) - - var opts compute.CastOptions - opts.AllowIntOverflow = true - c.checkCastArr(u32s, arrow.PrimitiveTypes.Int32, `[-1, null, 0, 32768]`, opts) - c.checkCastArr(u32s, arrow.PrimitiveTypes.Int64, `[4294967295, null, 0, 32768]`, opts) - c.checkCastArr(u32s, arrow.PrimitiveTypes.Int16, `[-1, null, 0, -32768]`, opts) -} - -func (c *CastSuite) TestToIntDowncastUnsafe() { - opts := compute.CastOptions{AllowIntOverflow: true} - c.checkCastOpts(arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Uint8, - `[0, null, 200, 1, 2]`, `[0, null, 200, 1, 2]`, opts) - - c.checkCastOpts(arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Uint8, - `[0, null, 256, 1, 2, -1]`, `[0, null, 0, 1, 2, 255]`, opts) - - c.checkCastOpts(arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int16, - `[0, null, 2000, 1, 2, -1]`, `[0, null, 2000, 1, 2, -1]`, opts) - - c.checkCastOpts(arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int16, - `[0, null, 2000, 70000, -70000]`, `[0, null, 2000, 4464, -4464]`, opts) -} - -func (c *CastSuite) TestFloatingToInt() { - for _, from := range []arrow.DataType{arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Float64} { - for _, to := range []arrow.DataType{arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int64} { - // float to int no truncation - c.checkCast(from, to, `[1.0, null, 0.0, -1.0, 5.0]`, `[1, null, 0, -1, 5]`) - - // float to int truncate error - opts := compute.SafeCastOptions(to) - c.checkCastFails(from, `[1.5, 0.0, null, 0.5, -1.5, 5.5]`, opts) - - // float to int truncate allowed - opts.AllowFloatTruncate = true - c.checkCastOpts(from, to, `[1.5, 0.0, null, 0.5, -1.5, 5.5]`, `[1, 0, null, 0, -1, 5]`, *opts) - } - } -} - -func (c *CastSuite) TestIntToFloating() { - for _, from := range []arrow.DataType{arrow.PrimitiveTypes.Uint32, arrow.PrimitiveTypes.Int32} { - two24 := `[16777216, 16777217]` - c.checkCastFails(from, two24, compute.SafeCastOptions(arrow.PrimitiveTypes.Float32)) - one24 := `[16777216]` - c.checkCast(from, arrow.PrimitiveTypes.Float32, one24, one24) - } - - i64s, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int64, - strings.NewReader(`[-9223372036854775808, -9223372036854775807, 0, 9223372036854775806, 9223372036854775807]`), - array.WithUseNumber()) - defer i64s.Release() - - checkCastFails(c.T(), i64s, *compute.SafeCastOptions(arrow.PrimitiveTypes.Float64)) - masked := c.maskArrayWithNullsAt(i64s, []int{0, 1, 3, 4}) - defer masked.Release() - c.checkCastArr(masked, arrow.PrimitiveTypes.Float64, `[null, null, 0, null, null]`, *compute.DefaultCastOptions(true)) - - c.checkCastFails(arrow.PrimitiveTypes.Uint64, `[9007199254740992, 9007199254740993]`, compute.SafeCastOptions(arrow.PrimitiveTypes.Float64)) -} - -func (c *CastSuite) TestDecimal128ToInt() { - opts := compute.SafeCastOptions(arrow.PrimitiveTypes.Int64) - - c.Run("no overflow no truncate", func() { - for _, allowIntOverflow := range []bool{false, true} { - c.Run(fmt.Sprintf("int_overflow=%t", allowIntOverflow), func() { - for _, allowDecTruncate := range []bool{false, true} { - c.Run(fmt.Sprintf("dec_truncate=%t", allowDecTruncate), func() { - opts.AllowIntOverflow = allowIntOverflow - opts.AllowDecimalTruncate = allowDecTruncate - - noOverflowNoTrunc, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, - strings.NewReader(`["02.0000000000", "-11.0000000000", "22.0000000000", "-121.000000000", null]`)) - - c.checkCastArr(noOverflowNoTrunc, arrow.PrimitiveTypes.Int64, `[2, -11, 22, -121, null]`, *opts) - noOverflowNoTrunc.Release() - }) - } - }) - } - }) - - c.Run("truncate no overflow", func() { - for _, allowIntOverflow := range []bool{false, true} { - c.Run("allow overflow"+strconv.FormatBool(allowIntOverflow), func() { - opts.AllowIntOverflow = allowIntOverflow - truncNoOverflow, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, - strings.NewReader(`["02.1000000000", "-11.0000004500", "22.0000004500", "-121.1210000000", null]`)) - - opts.AllowDecimalTruncate = true - c.checkCastArr(truncNoOverflow, arrow.PrimitiveTypes.Int64, `[2, -11, 22, -121, null]`, *opts) - - opts.AllowDecimalTruncate = false - checkCastFails(c.T(), truncNoOverflow, *opts) - truncNoOverflow.Release() - }) - } - }) - - c.Run("overflow no truncate", func() { - for _, allowDecTruncate := range []bool{false, true} { - c.Run("allow truncate "+strconv.FormatBool(allowDecTruncate), func() { - opts.AllowDecimalTruncate = allowDecTruncate - - overflowNoTrunc, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, - strings.NewReader(`[ - "12345678901234567890000.0000000000", - "99999999999999999999999.0000000000", - null]`), array.WithUseNumber()) - defer overflowNoTrunc.Release() - opts.AllowIntOverflow = true - c.checkCastArr(overflowNoTrunc, arrow.PrimitiveTypes.Int64, - // 12345678901234567890000 % 2**64, 99999999999999999999999 % 2**64 - `[4807115922877858896, 200376420520689663, null]`, *opts) - - opts.AllowIntOverflow = false - checkCastFails(c.T(), overflowNoTrunc, *opts) - }) - } - }) - - c.Run("overflow and truncate", func() { - for _, allowIntOverFlow := range []bool{false, true} { - c.Run("allow overflow = "+strconv.FormatBool(allowIntOverFlow), func() { - for _, allowDecTruncate := range []bool{false, true} { - c.Run("allow truncate = "+strconv.FormatBool(allowDecTruncate), func() { - opts.AllowIntOverflow = allowIntOverFlow - opts.AllowDecimalTruncate = allowDecTruncate - - overflowAndTruncate, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, - strings.NewReader(`[ - "12345678901234567890000.0045345000", - "99999999999999999999999.0000344300", - null]`), array.WithUseNumber()) - defer overflowAndTruncate.Release() - if opts.AllowIntOverflow && opts.AllowDecimalTruncate { - c.checkCastArr(overflowAndTruncate, arrow.PrimitiveTypes.Int64, - // 12345678901234567890000 % 2**64, 99999999999999999999999 % 2**64 - `[4807115922877858896, 200376420520689663, null]`, *opts) - } else { - checkCastFails(c.T(), overflowAndTruncate, *opts) - } - }) - } - }) - } - }) - - c.Run("negative scale", func() { - bldr := array.NewDecimal128Builder(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: -4}) - defer bldr.Release() - - var err error - for _, d := range []decimal128.Num{decimal128.FromU64(1234567890000), decimal128.FromI64(-120000)} { - d, err = d.Rescale(0, -4) - c.Require().NoError(err) - bldr.Append(d) - } - negScale := bldr.NewArray() - defer negScale.Release() - - opts.AllowIntOverflow = true - opts.AllowDecimalTruncate = true - c.checkCastArr(negScale, arrow.PrimitiveTypes.Int64, `[1234567890000, -120000]`, *opts) - }) -} - -func (c *CastSuite) TestDecimal256ToInt() { - opts := compute.SafeCastOptions(arrow.PrimitiveTypes.Int64) - - c.Run("no overflow no truncate", func() { - for _, allowIntOverflow := range []bool{false, true} { - c.Run(fmt.Sprintf("int_overflow=%t", allowIntOverflow), func() { - for _, allowDecTruncate := range []bool{false, true} { - c.Run(fmt.Sprintf("dec_truncate=%t", allowDecTruncate), func() { - opts.AllowIntOverflow = allowIntOverflow - opts.AllowDecimalTruncate = allowDecTruncate - - noOverflowNoTrunc, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 40, Scale: 10}, - strings.NewReader(`["02.0000000000", "-11.0000000000", "22.0000000000", "-121.000000000", null]`)) - - c.checkCastArr(noOverflowNoTrunc, arrow.PrimitiveTypes.Int64, `[2, -11, 22, -121, null]`, *opts) - noOverflowNoTrunc.Release() - }) - } - }) - } - }) - - c.Run("truncate no overflow", func() { - for _, allowIntOverflow := range []bool{false, true} { - c.Run("allow overflow"+strconv.FormatBool(allowIntOverflow), func() { - opts.AllowIntOverflow = allowIntOverflow - truncNoOverflow, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 40, Scale: 10}, - strings.NewReader(`["02.1000000000", "-11.0000004500", "22.0000004500", "-121.1210000000", null]`)) - - opts.AllowDecimalTruncate = true - c.checkCastArr(truncNoOverflow, arrow.PrimitiveTypes.Int64, `[2, -11, 22, -121, null]`, *opts) - - opts.AllowDecimalTruncate = false - checkCastFails(c.T(), truncNoOverflow, *opts) - truncNoOverflow.Release() - }) - } - }) - - c.Run("overflow no truncate", func() { - for _, allowDecTruncate := range []bool{false, true} { - c.Run("allow truncate "+strconv.FormatBool(allowDecTruncate), func() { - opts.AllowDecimalTruncate = allowDecTruncate - - overflowNoTrunc, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 40, Scale: 10}, - strings.NewReader(`[ - "1234567890123456789000000.0000000000", - "9999999999999999999999999.0000000000", - null]`), array.WithUseNumber()) - defer overflowNoTrunc.Release() - opts.AllowIntOverflow = true - c.checkCastArr(overflowNoTrunc, arrow.PrimitiveTypes.Int64, - // 1234567890123456789000000 % 2**64, 9999999999999999999999999 % 2**64 - `[1096246371337547584, 1590897978359414783, null]`, *opts) - - opts.AllowIntOverflow = false - checkCastFails(c.T(), overflowNoTrunc, *opts) - }) - } - }) - - c.Run("overflow and truncate", func() { - for _, allowIntOverFlow := range []bool{false, true} { - c.Run("allow overflow = "+strconv.FormatBool(allowIntOverFlow), func() { - for _, allowDecTruncate := range []bool{false, true} { - c.Run("allow truncate = "+strconv.FormatBool(allowDecTruncate), func() { - opts.AllowIntOverflow = allowIntOverFlow - opts.AllowDecimalTruncate = allowDecTruncate - - overflowAndTruncate, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 40, Scale: 10}, - strings.NewReader(`[ - "1234567890123456789000000.0045345000", - "9999999999999999999999999.0000344300", - null]`), array.WithUseNumber()) - defer overflowAndTruncate.Release() - if opts.AllowIntOverflow && opts.AllowDecimalTruncate { - c.checkCastArr(overflowAndTruncate, arrow.PrimitiveTypes.Int64, - // 1234567890123456789000000 % 2**64, 9999999999999999999999999 % 2**64 - `[1096246371337547584, 1590897978359414783, null]`, *opts) - } else { - checkCastFails(c.T(), overflowAndTruncate, *opts) - } - }) - } - }) - } - }) - - c.Run("negative scale", func() { - bldr := array.NewDecimal256Builder(c.mem, &arrow.Decimal256Type{Precision: 40, Scale: -4}) - defer bldr.Release() - - var err error - for _, d := range []decimal256.Num{decimal256.FromU64(1234567890000), decimal256.FromI64(-120000)} { - d, err = d.Rescale(0, -4) - c.Require().NoError(err) - bldr.Append(d) - } - negScale := bldr.NewArray() - defer negScale.Release() - - opts.AllowIntOverflow = true - opts.AllowDecimalTruncate = true - c.checkCastArr(negScale, arrow.PrimitiveTypes.Int64, `[1234567890000, -120000]`, *opts) - }) -} - -func (c *CastSuite) TestIntegerToDecimal() { - for _, decType := range []arrow.DataType{&arrow.Decimal128Type{Precision: 22, Scale: 2}, &arrow.Decimal256Type{Precision: 22, Scale: 2}} { - c.Run(decType.String(), func() { - for _, intType := range integerTypes { - c.Run(intType.String(), func() { - c.checkCast(intType, decType, `[0, 7, null, 100, 99]`, `["0.00", "7.00", null, "100.00", "99.00"]`) - }) - } - }) - } - - c.Run("extreme value", func() { - for _, dt := range []arrow.DataType{&arrow.Decimal128Type{Precision: 19, Scale: 0}, &arrow.Decimal256Type{Precision: 19, Scale: 0}} { - c.Run(dt.String(), func() { - c.checkCast(arrow.PrimitiveTypes.Int64, dt, - `[-9223372036854775808, 9223372036854775807]`, `["-9223372036854775808", "9223372036854775807"]`) - }) - } - for _, dt := range []arrow.DataType{&arrow.Decimal128Type{Precision: 20, Scale: 0}, &arrow.Decimal256Type{Precision: 20, Scale: 0}} { - c.Run(dt.String(), func() { - c.checkCast(arrow.PrimitiveTypes.Uint64, dt, - `[0, 18446744073709551615]`, `["0", "18446744073709551615"]`) - }) - } - }) - - c.Run("insufficient output precision", func() { - var opts compute.CastOptions - opts.ToType = &arrow.Decimal128Type{Precision: 5, Scale: 3} - c.checkCastFails(arrow.PrimitiveTypes.Int8, `[0]`, &opts) - - opts.ToType = &arrow.Decimal256Type{Precision: 76, Scale: 67} - c.checkCastFails(arrow.PrimitiveTypes.Int32, `[0]`, &opts) - }) -} - -func (c *CastSuite) TestDecimal128ToDecimal128() { - var opts compute.CastOptions - - for _, allowDecTruncate := range []bool{false, true} { - c.Run("decTruncate="+strconv.FormatBool(allowDecTruncate), func() { - opts.AllowDecimalTruncate = allowDecTruncate - - noTruncate, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, - strings.NewReader(`["02.0000000000", "30.0000000000", "22.0000000000", "-121.0000000000", null]`)) - expected, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 28, Scale: 10}, - strings.NewReader(`["02.", "30.", "22.", "-121.", null]`)) - - defer noTruncate.Release() - defer expected.Release() - - checkCast(c.T(), noTruncate, expected, opts) - checkCast(c.T(), expected, noTruncate, opts) - }) - } - - c.Run("same scale diff precision", func() { - for _, allowDecTruncate := range []bool{false, true} { - c.Run("decTruncate="+strconv.FormatBool(allowDecTruncate), func() { - opts.AllowDecimalTruncate = allowDecTruncate - - d52, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 5, Scale: 2}, - strings.NewReader(`["12.34", "0.56"]`)) - d42, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 4, Scale: 2}, - strings.NewReader(`["12.34", "0.56"]`)) - - defer d52.Release() - defer d42.Release() - - checkCast(c.T(), d52, d42, opts) - checkCast(c.T(), d42, d52, opts) - }) - } - }) - - c.Run("rescale leads to trunc", func() { - dP38S10, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, - strings.NewReader(`["-02.1234567890", "30.1234567890", null]`)) - dP28S0, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 28, Scale: 0}, - strings.NewReader(`["-02.", "30.", null]`)) - dP38S10RoundTripped, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, - strings.NewReader(`["-02.0000000000", "30.0000000000", null]`)) - defer func() { - dP38S10.Release() - dP28S0.Release() - dP38S10RoundTripped.Release() - }() - - opts.AllowDecimalTruncate = true - checkCast(c.T(), dP38S10, dP28S0, opts) - checkCast(c.T(), dP28S0, dP38S10RoundTripped, opts) - - opts.AllowDecimalTruncate = false - opts.ToType = dP28S0.DataType() - checkCastFails(c.T(), dP38S10, opts) - checkCast(c.T(), dP28S0, dP38S10RoundTripped, opts) - }) - - c.Run("precision loss without rescale = trunc", func() { - d42, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 4, Scale: 2}, - strings.NewReader(`["12.34"]`)) - defer d42.Release() - for _, dt := range []arrow.DataType{ - &arrow.Decimal128Type{Precision: 3, Scale: 2}, - &arrow.Decimal128Type{Precision: 4, Scale: 3}, - &arrow.Decimal128Type{Precision: 2, Scale: 1}} { - - opts.AllowDecimalTruncate = true - opts.ToType = dt - out, err := compute.CastArray(context.Background(), d42, &opts) - out.Release() - c.NoError(err) - - opts.AllowDecimalTruncate = false - opts.ToType = dt - checkCastFails(c.T(), d42, opts) - } - }) -} - -func (c *CastSuite) TestDecimal256ToDecimal256() { - var opts compute.CastOptions - - for _, allowDecTruncate := range []bool{false, true} { - c.Run("decTruncate="+strconv.FormatBool(allowDecTruncate), func() { - opts.AllowDecimalTruncate = allowDecTruncate - - noTruncate, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 38, Scale: 10}, - strings.NewReader(`["02.0000000000", "30.0000000000", "22.0000000000", "-121.0000000000", null]`)) - expected, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 28, Scale: 10}, - strings.NewReader(`["02.", "30.", "22.", "-121.", null]`)) - - defer noTruncate.Release() - defer expected.Release() - - checkCast(c.T(), noTruncate, expected, opts) - checkCast(c.T(), expected, noTruncate, opts) - }) - } - - c.Run("same scale diff precision", func() { - for _, allowDecTruncate := range []bool{false, true} { - c.Run("decTruncate="+strconv.FormatBool(allowDecTruncate), func() { - opts.AllowDecimalTruncate = allowDecTruncate - - d52, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 5, Scale: 2}, - strings.NewReader(`["12.34", "0.56"]`)) - d42, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 4, Scale: 2}, - strings.NewReader(`["12.34", "0.56"]`)) - - defer d52.Release() - defer d42.Release() - - checkCast(c.T(), d52, d42, opts) - checkCast(c.T(), d42, d52, opts) - }) - } - }) - - c.Run("rescale leads to trunc", func() { - dP38S10, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 38, Scale: 10}, - strings.NewReader(`["-02.1234567890", "30.1234567890", null]`)) - dP28S0, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 28, Scale: 0}, - strings.NewReader(`["-02.", "30.", null]`)) - dP38S10RoundTripped, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 38, Scale: 10}, - strings.NewReader(`["-02.0000000000", "30.0000000000", null]`)) - defer func() { - dP38S10.Release() - dP28S0.Release() - dP38S10RoundTripped.Release() - }() - - opts.AllowDecimalTruncate = true - checkCast(c.T(), dP38S10, dP28S0, opts) - checkCast(c.T(), dP28S0, dP38S10RoundTripped, opts) - - opts.AllowDecimalTruncate = false - opts.ToType = dP28S0.DataType() - checkCastFails(c.T(), dP38S10, opts) - checkCast(c.T(), dP28S0, dP38S10RoundTripped, opts) - }) - - c.Run("precision loss without rescale = trunc", func() { - d42, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 4, Scale: 2}, - strings.NewReader(`["12.34"]`)) - defer d42.Release() - for _, dt := range []arrow.DataType{ - &arrow.Decimal256Type{Precision: 3, Scale: 2}, - &arrow.Decimal256Type{Precision: 4, Scale: 3}, - &arrow.Decimal256Type{Precision: 2, Scale: 1}} { - - opts.AllowDecimalTruncate = true - opts.ToType = dt - out, err := compute.CastArray(context.Background(), d42, &opts) - out.Release() - c.NoError(err) - - opts.AllowDecimalTruncate = false - opts.ToType = dt - checkCastFails(c.T(), d42, opts) - } - }) -} - -func (c *CastSuite) TestDecimal128ToDecimal256() { - var opts compute.CastOptions - - for _, allowDecTruncate := range []bool{false, true} { - c.Run("decTruncate="+strconv.FormatBool(allowDecTruncate), func() { - opts.AllowDecimalTruncate = allowDecTruncate - - noTruncate, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, - strings.NewReader(`["02.0000000000", "30.0000000000", "22.0000000000", "-121.0000000000", null]`)) - expected, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 28, Scale: 10}, - strings.NewReader(`["02.", "30.", "22.", "-121.", null]`)) - - defer noTruncate.Release() - defer expected.Release() - - checkCast(c.T(), noTruncate, expected, opts) - }) - } - - c.Run("same scale diff precision", func() { - for _, allowDecTruncate := range []bool{false, true} { - c.Run("decTruncate="+strconv.FormatBool(allowDecTruncate), func() { - opts.AllowDecimalTruncate = allowDecTruncate - - d52, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 5, Scale: 2}, - strings.NewReader(`["12.34", "0.56"]`)) - d42, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 4, Scale: 2}, - strings.NewReader(`["12.34", "0.56"]`)) - d402, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 40, Scale: 2}, - strings.NewReader(`["12.34", "0.56"]`)) - - defer d52.Release() - defer d42.Release() - defer d402.Release() - - checkCast(c.T(), d52, d42, opts) - checkCast(c.T(), d52, d402, opts) - }) - } - }) - - c.Run("rescale leads to trunc", func() { - d128P38S10, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, - strings.NewReader(`["-02.1234567890", "30.1234567890", null]`)) - d128P28S0, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 28, Scale: 0}, - strings.NewReader(`["-02.", "30.", null]`)) - d256P28S0, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 28, Scale: 0}, - strings.NewReader(`["-02.", "30.", null]`)) - d256P38S10RoundTripped, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 38, Scale: 10}, - strings.NewReader(`["-02.0000000000", "30.0000000000", null]`)) - defer func() { - d128P38S10.Release() - d128P28S0.Release() - d256P28S0.Release() - d256P38S10RoundTripped.Release() - }() - - opts.AllowDecimalTruncate = true - checkCast(c.T(), d128P38S10, d256P28S0, opts) - checkCast(c.T(), d128P28S0, d256P38S10RoundTripped, opts) - - opts.AllowDecimalTruncate = false - opts.ToType = d256P28S0.DataType() - checkCastFails(c.T(), d128P38S10, opts) - checkCast(c.T(), d128P28S0, d256P38S10RoundTripped, opts) - }) - - c.Run("precision loss without rescale = trunc", func() { - d128P4S2, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 4, Scale: 2}, - strings.NewReader(`["12.34"]`)) - defer d128P4S2.Release() - for _, dt := range []arrow.DataType{ - &arrow.Decimal256Type{Precision: 3, Scale: 2}, - &arrow.Decimal256Type{Precision: 4, Scale: 3}, - &arrow.Decimal256Type{Precision: 2, Scale: 1}} { - - opts.AllowDecimalTruncate = true - opts.ToType = dt - out, err := compute.CastArray(context.Background(), d128P4S2, &opts) - out.Release() - c.NoError(err) - - opts.AllowDecimalTruncate = false - opts.ToType = dt - checkCastFails(c.T(), d128P4S2, opts) - } - }) -} - -func (c *CastSuite) TestDecimal256ToDecimal128() { - var opts compute.CastOptions - - for _, allowDecTruncate := range []bool{false, true} { - c.Run("decTruncate="+strconv.FormatBool(allowDecTruncate), func() { - opts.AllowDecimalTruncate = allowDecTruncate - - noTruncate, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 42, Scale: 10}, - strings.NewReader(`["02.0000000000", "30.0000000000", "22.0000000000", "-121.0000000000", null]`)) - expected, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 28, Scale: 0}, - strings.NewReader(`["02.", "30.", "22.", "-121.", null]`)) - - defer noTruncate.Release() - defer expected.Release() - - checkCast(c.T(), noTruncate, expected, opts) - checkCast(c.T(), expected, noTruncate, opts) - }) - } - - c.Run("same scale diff precision", func() { - for _, allowDecTruncate := range []bool{false, true} { - c.Run("decTruncate="+strconv.FormatBool(allowDecTruncate), func() { - opts.AllowDecimalTruncate = allowDecTruncate - - dP42S2, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 42, Scale: 2}, - strings.NewReader(`["12.34", "0.56"]`)) - d42, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 4, Scale: 2}, - strings.NewReader(`["12.34", "0.56"]`)) - - defer dP42S2.Release() - defer d42.Release() - - checkCast(c.T(), dP42S2, d42, opts) - checkCast(c.T(), d42, dP42S2, opts) - }) - } - }) - - c.Run("rescale leads to trunc", func() { - d256P52S10, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 52, Scale: 10}, - strings.NewReader(`["-02.1234567890", "30.1234567890", null]`)) - d256P42S0, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 42, Scale: 0}, - strings.NewReader(`["-02.", "30.", null]`)) - d128P28S0, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 28, Scale: 0}, - strings.NewReader(`["-02.", "30.", null]`)) - d128P38S10RoundTripped, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 38, Scale: 10}, - strings.NewReader(`["-02.0000000000", "30.0000000000", null]`)) - defer func() { - d256P52S10.Release() - d256P42S0.Release() - d128P28S0.Release() - d128P38S10RoundTripped.Release() - }() - - opts.AllowDecimalTruncate = true - checkCast(c.T(), d256P52S10, d128P28S0, opts) - checkCast(c.T(), d256P42S0, d128P38S10RoundTripped, opts) - - opts.AllowDecimalTruncate = false - opts.ToType = d128P28S0.DataType() - checkCastFails(c.T(), d256P52S10, opts) - checkCast(c.T(), d256P42S0, d128P38S10RoundTripped, opts) - }) - - c.Run("precision loss without rescale = trunc", func() { - d42, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 4, Scale: 2}, - strings.NewReader(`["12.34"]`)) - defer d42.Release() - for _, dt := range []arrow.DataType{ - &arrow.Decimal128Type{Precision: 3, Scale: 2}, - &arrow.Decimal128Type{Precision: 4, Scale: 3}, - &arrow.Decimal128Type{Precision: 2, Scale: 1}} { - - opts.AllowDecimalTruncate = true - opts.ToType = dt - out, err := compute.CastArray(context.Background(), d42, &opts) - out.Release() - c.NoError(err) - - opts.AllowDecimalTruncate = false - opts.ToType = dt - checkCastFails(c.T(), d42, opts) - } - }) -} - -func (c *CastSuite) TestFloatingToDecimal() { - for _, fltType := range []arrow.DataType{arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Float64} { - c.Run("from "+fltType.String(), func() { - for _, decType := range []arrow.DataType{&arrow.Decimal128Type{Precision: 5, Scale: 2}, &arrow.Decimal256Type{Precision: 5, Scale: 2}} { - c.Run("to "+decType.String(), func() { - c.checkCast(fltType, decType, - `[0.0, null, 123.45, 123.456, 999.994]`, `["0.00", null, "123.45", "123.46", "999.99"]`) - - c.Run("overflow", func() { - opts := compute.CastOptions{ToType: decType} - c.checkCastFails(fltType, `[999.996]`, &opts) - - opts.AllowDecimalTruncate = true - c.checkCastOpts(fltType, decType, `[0.0, null, 999.996, 123.45, 999.994]`, - `["0.00", null, "0.00", "123.45", "999.99"]`, opts) - }) - }) - } - }) - } - - dec128 := func(prec, scale int32) arrow.DataType { - return &arrow.Decimal128Type{Precision: prec, Scale: scale} - } - dec256 := func(prec, scale int32) arrow.DataType { - return &arrow.Decimal256Type{Precision: prec, Scale: scale} - } - - type decFunc func(int32, int32) arrow.DataType - - for _, decType := range []decFunc{dec128, dec256} { - // 2**64 + 2**41 (exactly representable as a float) - c.checkCast(arrow.PrimitiveTypes.Float32, decType(20, 0), - `[1.8446746e+19, -1.8446746e+19]`, - `[18446746272732807168, -18446746272732807168]`) - - c.checkCast(arrow.PrimitiveTypes.Float64, decType(20, 0), - `[1.8446744073709556e+19, -1.8446744073709556e+19]`, - `[18446744073709555712, -18446744073709555712]`) - - c.checkCast(arrow.PrimitiveTypes.Float32, decType(20, 4), - `[1.8446746e+15, -1.8446746e+15]`, - `[1844674627273280.7168, -1844674627273280.7168]`) - - c.checkCast(arrow.PrimitiveTypes.Float64, decType(20, 4), - `[1.8446744073709556e+15, -1.8446744073709556e+15]`, - `[1844674407370955.5712, -1844674407370955.5712]`) - } -} - -func (c *CastSuite) TestDecimalToFloating() { - for _, flt := range []arrow.DataType{arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Float64} { - c.Run(flt.String(), func() { - for _, dec := range []arrow.DataType{&arrow.Decimal128Type{Precision: 5, Scale: 2}, &arrow.Decimal256Type{Precision: 5, Scale: 2}} { - c.Run(dec.String(), func() { - c.checkCast(dec, flt, `["0.00", null, "123.45", "999.99"]`, - `[0.0, null, 123.45, 999.99]`) - }) - } - }) - } -} - -func (c *CastSuite) TestDateToString() { - for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - c.checkCast(arrow.FixedWidthTypes.Date32, stype, - `[0, null]`, `["1970-01-01", null]`) - c.checkCast(arrow.FixedWidthTypes.Date64, stype, - `[86400000, null]`, `["1970-01-02", null]`) - } -} - -func (c *CastSuite) TestTimeToString() { - for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - c.checkCast(arrow.FixedWidthTypes.Time32s, stype, `[1, 62]`, `["00:00:01", "00:01:02"]`) - c.checkCast(arrow.FixedWidthTypes.Time64ns, stype, `[0, 1]`, `["00:00:00.000000000", "00:00:00.000000001"]`) - } -} - -func (c *CastSuite) TestTimestampToString() { - for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - c.checkCast(&arrow.TimestampType{Unit: arrow.Second}, stype, - `[-30610224000, -5364662400]`, `["1000-01-01 00:00:00", "1800-01-01 00:00:00"]`) - - c.checkCast(&arrow.TimestampType{Unit: arrow.Millisecond}, stype, - `[-30610224000000, -5364662400000]`, `["1000-01-01 00:00:00.000", "1800-01-01 00:00:00.000"]`) - - c.checkCast(&arrow.TimestampType{Unit: arrow.Microsecond}, stype, - `[-30610224000000000, -5364662400000000]`, `["1000-01-01 00:00:00.000000", "1800-01-01 00:00:00.000000"]`) - - c.checkCast(&arrow.TimestampType{Unit: arrow.Nanosecond}, stype, - `[-596933876543210988, 349837323456789012]`, `["1951-02-01 01:02:03.456789012", "1981-02-01 01:02:03.456789012"]`) - } -} - -func (c *CastSuite) TestTimestampWithZoneToString() { - for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - c.checkCast(arrow.FixedWidthTypes.Timestamp_s, stype, - `[-30610224000, -5364662400]`, `["1000-01-01 00:00:00Z", "1800-01-01 00:00:00Z"]`) - - c.checkCast(&arrow.TimestampType{Unit: arrow.Second, TimeZone: "America/Phoenix"}, stype, - `[-34226955, 1456767743]`, `["1968-11-30 13:30:45-0700", "2016-02-29 10:42:23-0700"]`) - - c.checkCast(&arrow.TimestampType{Unit: arrow.Millisecond, TimeZone: "America/Phoenix"}, stype, - `[-34226955877, 1456767743456]`, `["1968-11-30 13:30:44.123-0700", "2016-02-29 10:42:23.456-0700"]`) - - c.checkCast(&arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: "America/Phoenix"}, stype, - `[-34226955877000, 1456767743456789]`, `["1968-11-30 13:30:44.123000-0700", "2016-02-29 10:42:23.456789-0700"]`) - - c.checkCast(&arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "America/Phoenix"}, stype, - `[-34226955876543211, 1456767743456789246]`, `["1968-11-30 13:30:44.123456789-0700", "2016-02-29 10:42:23.456789246-0700"]`) - } -} - -func (c *CastSuite) assertBinaryZeroCopy(lhs, rhs arrow.Array) { - // null bitmap and data buffers are always zero-copied - assertBufferSame(c.T(), lhs, rhs, 0) - assertBufferSame(c.T(), lhs, rhs, 2) - - lOffsetByteWidth := lhs.DataType().Layout().Buffers[1].ByteWidth - rOffsetByteWidth := rhs.DataType().Layout().Buffers[1].ByteWidth - if lOffsetByteWidth == rOffsetByteWidth { - assertBufferSame(c.T(), lhs, rhs, 1) - return - } - - offsets := make([]arrow.Array, 0, 2) - for _, arr := range []arrow.Array{lhs, rhs} { - length := arr.Len() - buffer := arr.Data().Buffers()[1] - - byteWidth := arr.DataType().Layout().Buffers[1].ByteWidth - switch byteWidth { - case 4: - data := array.NewData(arrow.PrimitiveTypes.Int32, length, []*memory.Buffer{nil, buffer}, nil, 0, 0) - defer data.Release() - i32 := array.NewInt32Data(data) - i64, err := compute.CastArray(context.Background(), i32, compute.SafeCastOptions(arrow.PrimitiveTypes.Int64)) - c.Require().NoError(err) - i32.Release() - defer i64.Release() - offsets = append(offsets, i64) - default: - data := array.NewData(arrow.PrimitiveTypes.Int64, length, []*memory.Buffer{nil, buffer}, nil, 0, 0) - defer data.Release() - i64 := array.NewInt64Data(data) - defer i64.Release() - offsets = append(offsets, i64) - } - } - c.Truef(array.Equal(offsets[0], offsets[1]), "lhs: %s\nrhs: %s", offsets[0], offsets[1]) -} - -func (c *CastSuite) TestBinaryToString() { - for _, btype := range []arrow.DataType{arrow.BinaryTypes.Binary, arrow.BinaryTypes.LargeBinary} { - c.Run(btype.String(), func() { - for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - c.Run(stype.String(), func() { - // empty -> empty always works - c.checkCast(btype, stype, `[]`, `[]`) - - invalidUtf8 := c.invalidUtf8Arr(btype) - defer invalidUtf8.Release() - - invalidutf8Str := c.invalidUtf8Arr(stype) - defer invalidutf8Str.Release() - - // invalid utf8 masked by a null bit is not an error - masked := c.maskArrayWithNullsAt(invalidUtf8, []int{4}) - expMasked := c.maskArrayWithNullsAt(invalidutf8Str, []int{4}) - defer masked.Release() - defer expMasked.Release() - - checkCast(c.T(), masked, expMasked, *compute.SafeCastOptions(stype)) - - opts := compute.SafeCastOptions(stype) - checkCastFails(c.T(), invalidUtf8, *opts) - - // override utf8 check - opts.AllowInvalidUtf8 = true - strs, err := compute.CastArray(context.Background(), invalidUtf8, opts) - c.NoError(err) - defer strs.Release() - c.assertBinaryZeroCopy(invalidUtf8, strs) - }) - } - }) - } - - c.Run("fixed size binary", func() { - fromType := &arrow.FixedSizeBinaryType{ByteWidth: 3} - invalidUtf8Arr := c.fixedSizeInvalidUtf8(fromType) - defer invalidUtf8Arr.Release() - for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - c.Run(stype.String(), func() { - c.checkCast(fromType, stype, `[]`, `[]`) - - // invalid utf-8 masked by a null bit is not an error - strInvalidUtf8 := c.fixedSizeInvalidUtf8(stype) - defer strInvalidUtf8.Release() - - masked := c.maskArrayWithNullsAt(invalidUtf8Arr, []int{4}) - expMasked := c.maskArrayWithNullsAt(strInvalidUtf8, []int{4}) - defer masked.Release() - defer expMasked.Release() - - checkCast(c.T(), masked, expMasked, *compute.SafeCastOptions(stype)) - - opts := compute.SafeCastOptions(stype) - checkCastFails(c.T(), invalidUtf8Arr, *opts) - - // override utf8 check - opts.AllowInvalidUtf8 = true - strs, err := compute.CastArray(context.Background(), invalidUtf8Arr, opts) - c.NoError(err) - defer strs.Release() - - // null buffer is not always the same if input is sliced - assertBufferSame(c.T(), invalidUtf8Arr, strs, 0) - - c.Same(invalidUtf8Arr.Data().Buffers()[1], strs.Data().Buffers()[2]) - }) - } - }) -} - -func (c *CastSuite) TestBinaryOrStringToBinary() { - for _, fromType := range baseBinaryTypes { - c.Run(fromType.String(), func() { - for _, toType := range []arrow.DataType{arrow.BinaryTypes.Binary, arrow.BinaryTypes.LargeBinary} { - c.Run(toType.String(), func() { - // empty -> empty always works - c.checkCast(fromType, toType, `[]`, `[]`) - - invalidUtf8 := c.invalidUtf8Arr(fromType) - defer invalidUtf8.Release() - - // invalid utf-8 is not an error for binary - out, err := compute.CastToType(context.Background(), invalidUtf8, toType) - c.NoError(err) - defer out.Release() - c.assertBinaryZeroCopy(invalidUtf8, out) - - // invalid utf-8 masked by a null is also not an erro - invalidutf8Bin := c.invalidUtf8Arr(toType) - defer invalidutf8Bin.Release() - - // invalid utf8 masked by a null bit is not an error - masked := c.maskArrayWithNullsAt(invalidUtf8, []int{4}) - expMasked := c.maskArrayWithNullsAt(invalidutf8Bin, []int{4}) - defer masked.Release() - defer expMasked.Release() - - checkCast(c.T(), masked, expMasked, *compute.SafeCastOptions(toType)) - }) - } - }) - } - - c.Run("fixed size binary", func() { - fromType := &arrow.FixedSizeBinaryType{ByteWidth: 3} - invalidUtf8Arr := c.fixedSizeInvalidUtf8(fromType) - defer invalidUtf8Arr.Release() - - checkCast(c.T(), invalidUtf8Arr, invalidUtf8Arr, *compute.DefaultCastOptions(true)) - checkCastFails(c.T(), invalidUtf8Arr, *compute.SafeCastOptions(&arrow.FixedSizeBinaryType{ByteWidth: 5})) - for _, toType := range []arrow.DataType{arrow.BinaryTypes.Binary, arrow.BinaryTypes.LargeBinary} { - c.Run(toType.String(), func() { - c.checkCast(fromType, toType, `[]`, `[]`) - - out, err := compute.CastToType(context.Background(), invalidUtf8Arr, toType) - c.NoError(err) - defer out.Release() - assertBufferSame(c.T(), invalidUtf8Arr, out, 0) - - c.Same(invalidUtf8Arr.Data().Buffers()[1], out.Data().Buffers()[2]) - }) - } - }) -} - -func (c *CastSuite) TestStringToString() { - for _, fromType := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - c.Run("from "+fromType.String(), func() { - for _, toType := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - c.Run("to "+toType.String(), func() { - c.checkCast(fromType, toType, `[]`, `[]`) - - invalidUtf8 := c.invalidUtf8Arr(fromType) - defer invalidUtf8.Release() - - invalidutf8Str := c.invalidUtf8Arr(toType) - defer invalidutf8Str.Release() - - // invalid utf8 masked by a null bit is not an error - masked := c.maskArrayWithNullsAt(invalidUtf8, []int{4}) - expMasked := c.maskArrayWithNullsAt(invalidutf8Str, []int{4}) - defer masked.Release() - defer expMasked.Release() - - checkCast(c.T(), masked, expMasked, *compute.SafeCastOptions(toType)) - - opts := compute.SafeCastOptions(toType) - // override utf8 check - opts.AllowInvalidUtf8 = true - // utf-8 is not checked by cast when the origin (utf-8) guarantees utf-8 - strs, err := compute.CastArray(context.Background(), invalidUtf8, opts) - c.NoError(err) - defer strs.Release() - c.assertBinaryZeroCopy(invalidUtf8, strs) - }) - } - }) - } -} - -func (c *CastSuite) TestStringToInt() { - for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - for _, dt := range signedIntTypes { - c.checkCast(stype, dt, - `["0", null, "127", "-1", "0", "0x0", "0x7F"]`, - `[0, null, 127, -1, 0, 0, 127]`) - } - - c.checkCast(stype, arrow.PrimitiveTypes.Int32, - `["2147483647", null, "-2147483648", "0", "0X0", "0x7FFFFFFF", "-0X1", "-0x10000000"]`, - `[2147483647, null, -2147483648, 0, 0, 2147483647, -1, -268435456]`) - - c.checkCast(stype, arrow.PrimitiveTypes.Int64, - `["9223372036854775807", null, "-9223372036854775808", "0", "0x0", "0x7FFFFFFFFFFFFFFf", "-0x0FFFFFFFFFFFFFFF"]`, - `[9223372036854775807, null, -9223372036854775808, 0, 0, 9223372036854775807, -1152921504606846975]`) - - for _, dt := range unsignedIntTypes { - c.checkCast(stype, dt, `["0", null, "127", "255", "0", "0x0", "0xff", "0X7f"]`, - `[0, null, 127, 255, 0, 0, 255, 127]`) - } - - c.checkCast(stype, arrow.PrimitiveTypes.Uint32, - `["2147483647", null, "4294967295", "0", "0x0", "0x7FFFFFFf", "0xFFFFFFFF"]`, - `[2147483647, null, 4294967295, 0, 0, 2147483647, 4294967295]`) - - c.checkCast(stype, arrow.PrimitiveTypes.Uint64, - `["9223372036854775807", null, "18446744073709551615", "0", "0x0", "0x7FFFFFFFFFFFFFFf", "0xfFFFFFFFFFFFFFFf"]`, - `[9223372036854775807, null, 18446744073709551615, 0, 0, 9223372036854775807, 18446744073709551615]`) - - for _, notInt8 := range []string{"z", "12 z", "128", "-129", "0.5", "0x", "0xfff", "-0xf0"} { - c.checkCastFails(stype, `["`+notInt8+`"]`, compute.SafeCastOptions(arrow.PrimitiveTypes.Int8)) - } - - for _, notUint8 := range []string{"256", "-1", "0.5", "0x", "0x3wa", "0x123"} { - c.checkCastFails(stype, `["`+notUint8+`"]`, compute.SafeCastOptions(arrow.PrimitiveTypes.Uint8)) - } - } -} - -func (c *CastSuite) TestStringToFloating() { - for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - for _, dt := range []arrow.DataType{arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Float64} { - c.checkCast(stype, dt, `["0.1", null, "127.3", "1e3", "200.4", "0.5"]`, - `[0.1, null, 127.3, 1000, 200.4, 0.5]`) - - for _, notFloat := range []string{"z"} { - c.checkCastFails(stype, `["`+notFloat+`"]`, compute.SafeCastOptions(dt)) - } - } - } -} - -func (c *CastSuite) TestUnsupportedInputType() { - // casting to a supported target type, but with an unsupported - // input for that target type. - arr, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[1, 2, 3]`)) - defer arr.Release() - - toType := arrow.ListOf(arrow.BinaryTypes.String) - _, err := compute.CastToType(context.Background(), arr, toType) - c.ErrorIs(err, arrow.ErrNotImplemented) - c.ErrorContains(err, "function 'cast_list' has no kernel matching input types (int32)") - - // test calling through the generic kernel API - datum := compute.NewDatum(arr) - defer datum.Release() - _, err = compute.CallFunction(context.Background(), "cast", compute.SafeCastOptions(toType), datum) - c.ErrorIs(err, arrow.ErrNotImplemented) - c.ErrorContains(err, "function 'cast_list' has no kernel matching input types (int32)") -} - -func (c *CastSuite) TestUnsupportedTargetType() { - arr, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[1, 2, 3]`)) - defer arr.Release() - - toType := arrow.DenseUnionOf([]arrow.Field{{Name: "a", Type: arrow.PrimitiveTypes.Int32}}, []arrow.UnionTypeCode{0}) - _, err := compute.CastToType(context.Background(), arr, toType) - c.ErrorIs(err, arrow.ErrNotImplemented) - c.ErrorContains(err, "unsupported cast to dense_union from int32") - - // test calling through the generic kernel API - datum := compute.NewDatum(arr) - defer datum.Release() - _, err = compute.CallFunction(context.Background(), "cast", compute.SafeCastOptions(toType), datum) - c.ErrorIs(err, arrow.ErrNotImplemented) - c.ErrorContains(err, "unsupported cast to dense_union from int32") -} - -func (c *CastSuite) checkCastSelfZeroCopy(dt arrow.DataType, json string) { - arr, _, _ := array.FromJSON(c.mem, dt, strings.NewReader(json)) - defer arr.Release() - - checkCastZeroCopy(c.T(), arr, dt, compute.NewCastOptions(dt, true)) -} - -func (c *CastSuite) checkCastZeroCopy(from arrow.DataType, json string, to arrow.DataType) { - arr, _, _ := array.FromJSON(c.mem, from, strings.NewReader(json)) - defer arr.Release() - checkCastZeroCopy(c.T(), arr, to, compute.NewCastOptions(to, true)) -} - -func (c *CastSuite) TestTimestampToTimestamp() { - tests := []struct { - coarse, fine arrow.DataType - }{ - {arrow.FixedWidthTypes.Timestamp_s, arrow.FixedWidthTypes.Timestamp_ms}, - {arrow.FixedWidthTypes.Timestamp_ms, arrow.FixedWidthTypes.Timestamp_us}, - {arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Timestamp_ns}, - } - - var opts compute.CastOptions - for _, tt := range tests { - c.Run("coarse "+tt.coarse.String()+" fine "+tt.fine.String(), func() { - c.checkCast(tt.coarse, tt.fine, `[0, null, 200, 1, 2]`, `[0, null, 200000, 1000, 2000]`) - - opts.AllowTimeTruncate = false - opts.ToType = tt.coarse - c.checkCastFails(tt.fine, `[0, null, 200456, 1123, 2456]`, &opts) - - // with truncation allowed, divide/truncate - opts.AllowTimeTruncate = true - c.checkCastOpts(tt.fine, tt.coarse, `[0, null, 200456, 1123, 2456]`, `[0, null, 200, 1, 2]`, opts) - }) - } - - tests = []struct { - coarse, fine arrow.DataType - }{ - {arrow.FixedWidthTypes.Timestamp_s, arrow.FixedWidthTypes.Timestamp_ns}, - } - - for _, tt := range tests { - c.Run("coarse "+tt.coarse.String()+" fine "+tt.fine.String(), func() { - c.checkCast(tt.coarse, tt.fine, `[0, null, 200, 1, 2]`, `[0, null, 200000000000, 1000000000, 2000000000]`) - - opts.AllowTimeTruncate = false - opts.ToType = tt.coarse - c.checkCastFails(tt.fine, `[0, null, 200456000000, 1123000000, 2456000000]`, &opts) - - // with truncation allowed, divide/truncate - opts.AllowTimeTruncate = true - c.checkCastOpts(tt.fine, tt.coarse, `[0, null, 200456000000, 1123000000, 2456000000]`, `[0, null, 200, 1, 2]`, opts) - }) - } -} - -func (c *CastSuite) TestTimestampZeroCopy() { - for _, dt := range []arrow.DataType{arrow.FixedWidthTypes.Timestamp_s /*, arrow.PrimitiveTypes.Int64*/} { - c.checkCastZeroCopy(arrow.FixedWidthTypes.Timestamp_s, `[0, null, 2000, 1000, 0]`, dt) - } - - c.checkCastZeroCopy(arrow.PrimitiveTypes.Int64, `[0, null, 2000, 1000, 0]`, arrow.FixedWidthTypes.Timestamp_s) -} - -func (c *CastSuite) TestTimestampToTimestampMultiplyOverflow() { - opts := compute.CastOptions{ToType: arrow.FixedWidthTypes.Timestamp_ns} - // 1000-01-01, 1800-01-01, 2000-01-01, 2300-01-01, 3000-01-01 - c.checkCastFails(arrow.FixedWidthTypes.Timestamp_s, `[-30610224000, -5364662400, 946684800, 10413792000, 32503680000]`, &opts) -} - -var ( - timestampJSON = `["1970-01-01T00:00:59.123456789","2000-02-29T23:23:23.999999999", - "1899-01-01T00:59:20.001001001","2033-05-18T03:33:20.000000000", - "2020-01-01T01:05:05.001", "2019-12-31T02:10:10.002", - "2019-12-30T03:15:15.003", "2009-12-31T04:20:20.004132", - "2010-01-01T05:25:25.005321", "2010-01-03T06:30:30.006163", - "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45", - "2008-12-28", "2008-12-29", "2012-01-01 01:02:03", null]` - timestampSecondsJSON = `["1970-01-01T00:00:59","2000-02-29T23:23:23", - "1899-01-01T00:59:20","2033-05-18T03:33:20", - "2020-01-01T01:05:05", "2019-12-31T02:10:10", - "2019-12-30T03:15:15", "2009-12-31T04:20:20", - "2010-01-01T05:25:25", "2010-01-03T06:30:30", - "2010-01-04T07:35:35", "2006-01-01T08:40:40", - "2005-12-31T09:45:45", "2008-12-28", "2008-12-29", - "2012-01-01 01:02:03", null]` - timestampExtremeJSON = `["1677-09-20T00:00:59.123456", "2262-04-13T23:23:23.999999"]` -) - -func (c *CastSuite) TestTimestampToDate() { - stamps, _, _ := array.FromJSON(c.mem, arrow.FixedWidthTypes.Timestamp_ns, strings.NewReader(timestampJSON)) - defer stamps.Release() - date32, _, _ := array.FromJSON(c.mem, arrow.FixedWidthTypes.Date32, - strings.NewReader(`[ - 0, 11016, -25932, 23148, - 18262, 18261, 18260, 14609, - 14610, 14612, 14613, 13149, - 13148, 14241, 14242, 15340, null - ]`)) - defer date32.Release() - date64, _, _ := array.FromJSON(c.mem, arrow.FixedWidthTypes.Date64, - strings.NewReader(`[ - 0, 951782400000, -2240524800000, 1999987200000, - 1577836800000, 1577750400000, 1577664000000, 1262217600000, - 1262304000000, 1262476800000, 1262563200000, 1136073600000, - 1135987200000, 1230422400000, 1230508800000, 1325376000000, null]`), array.WithUseNumber()) - defer date64.Release() - - checkCast(c.T(), stamps, date32, *compute.DefaultCastOptions(true)) - checkCast(c.T(), stamps, date64, *compute.DefaultCastOptions(true)) - c.checkCast(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Date32, - timestampExtremeJSON, `[-106753, 106753]`) - c.checkCast(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Date64, - timestampExtremeJSON, `[-9223459200000, 9223459200000]`) - for _, u := range []arrow.TimeUnit{arrow.Second, arrow.Microsecond, arrow.Millisecond, arrow.Nanosecond} { - dt := &arrow.TimestampType{Unit: u} - c.checkCastExp(dt, timestampSecondsJSON, date32) - c.checkCastExp(dt, timestampSecondsJSON, date64) - } -} - -func (c *CastSuite) TestZonedTimestampToDate() { - c.Run("Pacific/Marquesas", func() { - dt := &arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "Pacific/Marquesas"} - c.checkCast(dt, arrow.FixedWidthTypes.Date32, - timestampJSON, `[-1, 11016, -25933, 23147, - 18261, 18260, 18259, 14608, - 14609, 14611, 14612, 13148, - 13148, 14240, 14241, 15339, null]`) - c.checkCast(dt, arrow.FixedWidthTypes.Date64, timestampJSON, - `[-86400000, 951782400000, -2240611200000, 1999900800000, - 1577750400000, 1577664000000, 1577577600000, 1262131200000, - 1262217600000, 1262390400000, 1262476800000, 1135987200000, - 1135987200000, 1230336000000, 1230422400000, 1325289600000, null]`) - }) - - for _, u := range []arrow.TimeUnit{arrow.Second, arrow.Millisecond, arrow.Microsecond, arrow.Nanosecond} { - dt := &arrow.TimestampType{Unit: u, TimeZone: "Australia/Broken_Hill"} - c.checkCast(dt, arrow.FixedWidthTypes.Date32, timestampSecondsJSON, `[ - 0, 11017, -25932, 23148, - 18262, 18261, 18260, 14609, - 14610, 14612, 14613, 13149, - 13148, 14241, 14242, 15340, null]`) - c.checkCast(dt, arrow.FixedWidthTypes.Date64, timestampSecondsJSON, `[ - 0, 951868800000, -2240524800000, 1999987200000, 1577836800000, - 1577750400000, 1577664000000, 1262217600000, 1262304000000, - 1262476800000, 1262563200000, 1136073600000, 1135987200000, - 1230422400000, 1230508800000, 1325376000000, null]`) - } - - // invalid timezones - for _, u := range []arrow.TimeUnit{arrow.Second, arrow.Millisecond, arrow.Microsecond, arrow.Nanosecond} { - dt := &arrow.TimestampType{Unit: u, TimeZone: "Mars/Mariner_Valley"} - c.checkCastFails(dt, timestampSecondsJSON, compute.NewCastOptions(arrow.FixedWidthTypes.Date32, false)) - c.checkCastFails(dt, timestampSecondsJSON, compute.NewCastOptions(arrow.FixedWidthTypes.Date64, false)) - } -} - -func (c *CastSuite) TestTimestampToTime() { - c.checkCast(arrow.FixedWidthTypes.Timestamp_ns, arrow.FixedWidthTypes.Time64ns, - timestampJSON, `[ - 59123456789, 84203999999999, 3560001001001, 12800000000000, - 3905001000000, 7810002000000, 11715003000000, 15620004132000, - 19525005321000, 23430006163000, 27335000000000, 31240000000000, - 35145000000000, 0, 0, 3723000000000, null]`) - c.checkCastFails(arrow.FixedWidthTypes.Timestamp_ns, timestampJSON, compute.NewCastOptions(arrow.FixedWidthTypes.Time64us, true)) - c.checkCast(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Time64us, - timestampExtremeJSON, `[59123456, 84203999999]`) - - timesSec := `[59, 84203, 3560, 12800, - 3905, 7810, 11715, 15620, - 19525, 23430, 27335, 31240, - 35145, 0, 0, 3723, null]` - timesMs := `[59000, 84203000, 3560000, 12800000, - 3905000, 7810000, 11715000, 15620000, - 19525000, 23430000, 27335000, 31240000, - 35145000, 0, 0, 3723000, null]` - timesUs := `[59000000, 84203000000, 3560000000, 12800000000, - 3905000000, 7810000000, 11715000000, 15620000000, - 19525000000, 23430000000, 27335000000, 31240000000, - 35145000000, 0, 0, 3723000000, null]` - timesNs := `[59000000000, 84203000000000, 3560000000000, 12800000000000, - 3905000000000, 7810000000000, 11715000000000, 15620000000000, - 19525000000000, 23430000000000, 27335000000000, 31240000000000, - 35145000000000, 0, 0, 3723000000000, null]` - - c.checkCast(arrow.FixedWidthTypes.Timestamp_s, arrow.FixedWidthTypes.Time32s, - timestampSecondsJSON, timesSec) - c.checkCast(arrow.FixedWidthTypes.Timestamp_s, arrow.FixedWidthTypes.Time32ms, - timestampSecondsJSON, timesMs) - c.checkCast(arrow.FixedWidthTypes.Timestamp_ms, arrow.FixedWidthTypes.Time32s, - timestampSecondsJSON, timesSec) - c.checkCast(arrow.FixedWidthTypes.Timestamp_ms, arrow.FixedWidthTypes.Time32ms, - timestampSecondsJSON, timesMs) - c.checkCast(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Time64us, - timestampSecondsJSON, timesUs) - c.checkCast(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Time64ns, - timestampSecondsJSON, timesNs) - c.checkCast(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Time32ms, - timestampSecondsJSON, timesMs) - c.checkCast(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Time32s, - timestampSecondsJSON, timesSec) - c.checkCast(arrow.FixedWidthTypes.Timestamp_ns, arrow.FixedWidthTypes.Time64us, - timestampSecondsJSON, timesUs) - c.checkCast(arrow.FixedWidthTypes.Timestamp_ns, arrow.FixedWidthTypes.Time64ns, - timestampSecondsJSON, timesNs) - c.checkCast(arrow.FixedWidthTypes.Timestamp_ns, arrow.FixedWidthTypes.Time32ms, - timestampSecondsJSON, timesMs) - c.checkCast(arrow.FixedWidthTypes.Timestamp_ns, arrow.FixedWidthTypes.Time32s, - timestampSecondsJSON, timesSec) - - trunc := compute.CastOptions{AllowTimeTruncate: true} - - timestampsUS := `["1970-01-01T00:00:59.123456","2000-02-29T23:23:23.999999", - "1899-01-01T00:59:20.001001","2033-05-18T03:33:20.000000", - "2020-01-01T01:05:05.001", "2019-12-31T02:10:10.002", - "2019-12-30T03:15:15.003", "2009-12-31T04:20:20.004132", - "2010-01-01T05:25:25.005321", "2010-01-03T06:30:30.006163", - "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45", - "2008-12-28", "2008-12-29", "2012-01-01 01:02:03", null]` - timestampsMS := `["1970-01-01T00:00:59.123","2000-02-29T23:23:23.999", - "1899-01-01T00:59:20.001","2033-05-18T03:33:20.000", - "2020-01-01T01:05:05.001", "2019-12-31T02:10:10.002", - "2019-12-30T03:15:15.003", "2009-12-31T04:20:20.004", - "2010-01-01T05:25:25.005", "2010-01-03T06:30:30.006", - "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45", - "2008-12-28", "2008-12-29", "2012-01-01 01:02:03", null]` - - c.checkCastFails(arrow.FixedWidthTypes.Timestamp_ns, timestampJSON, compute.NewCastOptions(arrow.FixedWidthTypes.Time64us, true)) - c.checkCastFails(arrow.FixedWidthTypes.Timestamp_ns, timestampJSON, compute.NewCastOptions(arrow.FixedWidthTypes.Time32ms, true)) - c.checkCastFails(arrow.FixedWidthTypes.Timestamp_ns, timestampJSON, compute.NewCastOptions(arrow.FixedWidthTypes.Time32s, true)) - c.checkCastFails(arrow.FixedWidthTypes.Timestamp_us, timestampsUS, compute.NewCastOptions(arrow.FixedWidthTypes.Time32ms, true)) - c.checkCastFails(arrow.FixedWidthTypes.Timestamp_us, timestampsUS, compute.NewCastOptions(arrow.FixedWidthTypes.Time32s, true)) - c.checkCastFails(arrow.FixedWidthTypes.Timestamp_ms, timestampsMS, compute.NewCastOptions(arrow.FixedWidthTypes.Time32s, true)) - - timesNsUs := `[59123456, 84203999999, 3560001001, 12800000000, - 3905001000, 7810002000, 11715003000, 15620004132, - 19525005321, 23430006163, 27335000000, 31240000000, - 35145000000, 0, 0, 3723000000, null]` - timesNsMs := `[59123, 84203999, 3560001, 12800000, - 3905001, 7810002, 11715003, 15620004, - 19525005, 23430006, 27335000, 31240000, - 35145000, 0, 0, 3723000, null]` - timesUsNs := `[59123456000, 84203999999000, 3560001001000, 12800000000000, - 3905001000000, 7810002000000, 11715003000000, 15620004132000, - 19525005321000, 23430006163000, 27335000000000, 31240000000000, - 35145000000000, 0, 0, 3723000000000, null]` - timesMsNs := `[59123000000, 84203999000000, 3560001000000, 12800000000000, - 3905001000000, 7810002000000, 11715003000000, 15620004000000, - 19525005000000, 23430006000000, 27335000000000, 31240000000000, - 35145000000000, 0, 0, 3723000000000, null]` - timesMsUs := `[59123000, 84203999000, 3560001000, 12800000000, - 3905001000, 7810002000, 11715003000, 15620004000, - 19525005000, 23430006000, 27335000000, 31240000000, - 35145000000, 0, 0, 3723000000, null]` - - c.checkCastOpts(arrow.FixedWidthTypes.Timestamp_ns, arrow.FixedWidthTypes.Time64us, timestampJSON, timesNsUs, trunc) - c.checkCastOpts(arrow.FixedWidthTypes.Timestamp_ns, arrow.FixedWidthTypes.Time32ms, timestampJSON, timesNsMs, trunc) - c.checkCastOpts(arrow.FixedWidthTypes.Timestamp_ns, arrow.FixedWidthTypes.Time32s, timestampJSON, timesSec, trunc) - c.checkCastOpts(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Time32ms, timestampsUS, timesNsMs, trunc) - c.checkCastOpts(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Time32s, timestampsUS, timesSec, trunc) - c.checkCastOpts(arrow.FixedWidthTypes.Timestamp_ms, arrow.FixedWidthTypes.Time32s, timestampsMS, timesSec, trunc) - - // upscaling tests - c.checkCast(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Time64ns, timestampsUS, timesUsNs) - c.checkCast(arrow.FixedWidthTypes.Timestamp_ms, arrow.FixedWidthTypes.Time64ns, timestampsMS, timesMsNs) - c.checkCast(arrow.FixedWidthTypes.Timestamp_ms, arrow.FixedWidthTypes.Time64us, timestampsMS, timesMsUs) - c.checkCast(arrow.FixedWidthTypes.Timestamp_s, arrow.FixedWidthTypes.Time64ns, timestampSecondsJSON, timesNs) - c.checkCast(arrow.FixedWidthTypes.Timestamp_s, arrow.FixedWidthTypes.Time64us, timestampSecondsJSON, timesUs) - c.checkCast(arrow.FixedWidthTypes.Timestamp_s, arrow.FixedWidthTypes.Time32ms, timestampSecondsJSON, timesMs) - - // invalid timezones - for _, u := range []arrow.TimeUnit{arrow.Second, arrow.Millisecond, arrow.Microsecond, arrow.Nanosecond} { - dt := &arrow.TimestampType{Unit: u, TimeZone: "Mars/Mariner_Valley"} - switch u { - case arrow.Second, arrow.Millisecond: - c.checkCastFails(dt, timestampSecondsJSON, compute.NewCastOptions(&arrow.Time32Type{Unit: u}, false)) - default: - c.checkCastFails(dt, timestampSecondsJSON, compute.NewCastOptions(&arrow.Time64Type{Unit: u}, false)) - } - } -} - -func (c *CastSuite) TestZonedTimestampToTime() { - c.checkCast(&arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "Pacific/Marquesas"}, - arrow.FixedWidthTypes.Time64ns, timestampJSON, `[52259123456789, 50003999999999, 56480001001001, 65000000000000, - 56105001000000, 60010002000000, 63915003000000, 67820004132000, - 71725005321000, 75630006163000, 79535000000000, 83440000000000, - 945000000000, 52200000000000, 52200000000000, 55923000000000, null]`) - - timesSec := `[ - 34259, 35603, 35960, 47000, - 41705, 45610, 49515, 53420, - 57325, 61230, 65135, 69040, - 72945, 37800, 37800, 41523, null - ]` - timesMs := `[ - 34259000, 35603000, 35960000, 47000000, - 41705000, 45610000, 49515000, 53420000, - 57325000, 61230000, 65135000, 69040000, - 72945000, 37800000, 37800000, 41523000, null - ]` - timesUs := `[ - 34259000000, 35603000000, 35960000000, 47000000000, - 41705000000, 45610000000, 49515000000, 53420000000, - 57325000000, 61230000000, 65135000000, 69040000000, - 72945000000, 37800000000, 37800000000, 41523000000, null - ]` - timesNs := `[ - 34259000000000, 35603000000000, 35960000000000, 47000000000000, - 41705000000000, 45610000000000, 49515000000000, 53420000000000, - 57325000000000, 61230000000000, 65135000000000, 69040000000000, - 72945000000000, 37800000000000, 37800000000000, 41523000000000, null - ]` - - c.checkCast(&arrow.TimestampType{Unit: arrow.Second, TimeZone: "Australia/Broken_Hill"}, - arrow.FixedWidthTypes.Time32s, timestampSecondsJSON, timesSec) - c.checkCast(&arrow.TimestampType{Unit: arrow.Millisecond, TimeZone: "Australia/Broken_Hill"}, - arrow.FixedWidthTypes.Time32ms, timestampSecondsJSON, timesMs) - c.checkCast(&arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: "Australia/Broken_Hill"}, - arrow.FixedWidthTypes.Time64us, timestampSecondsJSON, timesUs) - c.checkCast(&arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "Australia/Broken_Hill"}, - arrow.FixedWidthTypes.Time64ns, timestampSecondsJSON, timesNs) -} - -func (c *CastSuite) TestTimeToTime() { - var opts compute.CastOptions - - tests := []struct { - coarse, fine arrow.DataType - }{ - {arrow.FixedWidthTypes.Time32s, arrow.FixedWidthTypes.Time32ms}, - {arrow.FixedWidthTypes.Time32ms, arrow.FixedWidthTypes.Time64us}, - {arrow.FixedWidthTypes.Time64us, arrow.FixedWidthTypes.Time64ns}, - } - - for _, tt := range tests { - c.Run("coarse "+tt.coarse.String()+" fine "+tt.fine.String(), func() { - coarse := `[0, null, 200, 1, 2]` - promoted := `[0, null, 200000, 1000, 2000]` - willBeTruncated := `[0, null, 200456, 1123, 2456]` - - c.checkCast(tt.coarse, tt.fine, coarse, promoted) - - opts.AllowTimeTruncate = false - opts.ToType = tt.coarse - c.checkCastFails(tt.fine, willBeTruncated, &opts) - - opts.AllowTimeTruncate = true - c.checkCastOpts(tt.fine, tt.coarse, willBeTruncated, coarse, opts) - }) - } - - tests = []struct { - coarse, fine arrow.DataType - }{ - {arrow.FixedWidthTypes.Time32s, arrow.FixedWidthTypes.Time64us}, - {arrow.FixedWidthTypes.Time32ms, arrow.FixedWidthTypes.Time64ns}, - } - - for _, tt := range tests { - c.Run("coarse "+tt.coarse.String()+" fine "+tt.fine.String(), func() { - coarse := `[0, null, 200, 1, 2]` - promoted := `[0, null, 200000000, 1000000, 2000000]` - willBeTruncated := `[0, null, 200456000, 1123000, 2456000]` - - c.checkCast(tt.coarse, tt.fine, coarse, promoted) - - opts.AllowTimeTruncate = false - opts.ToType = tt.coarse - c.checkCastFails(tt.fine, willBeTruncated, &opts) - - opts.AllowTimeTruncate = true - c.checkCastOpts(tt.fine, tt.coarse, willBeTruncated, coarse, opts) - }) - } - - tests = []struct { - coarse, fine arrow.DataType - }{ - {arrow.FixedWidthTypes.Time32s, arrow.FixedWidthTypes.Time64ns}, - } - - for _, tt := range tests { - c.Run("coarse "+tt.coarse.String()+" fine "+tt.fine.String(), func() { - coarse := `[0, null, 200, 1, 2]` - promoted := `[0, null, 200000000000, 1000000000, 2000000000]` - willBeTruncated := `[0, null, 200456000000, 1123000000, 2456000000]` - - c.checkCast(tt.coarse, tt.fine, coarse, promoted) - - opts.AllowTimeTruncate = false - opts.ToType = tt.coarse - c.checkCastFails(tt.fine, willBeTruncated, &opts) - - opts.AllowTimeTruncate = true - c.checkCastOpts(tt.fine, tt.coarse, willBeTruncated, coarse, opts) - }) - } -} - -func (c *CastSuite) TestTimeZeroCopy() { - for _, dt := range []arrow.DataType{arrow.FixedWidthTypes.Time32s /*, arrow.PrimitiveTypes.Int32*/} { - c.checkCastZeroCopy(arrow.FixedWidthTypes.Time32s, `[0, null, 2000, 1000, 0]`, dt) - } - c.checkCastZeroCopy(arrow.PrimitiveTypes.Int32, `[0, null, 2000, 1000, 0]`, arrow.FixedWidthTypes.Time32s) - - for _, dt := range []arrow.DataType{arrow.FixedWidthTypes.Time64us /*, arrow.PrimitiveTypes.Int64*/} { - c.checkCastZeroCopy(arrow.FixedWidthTypes.Time64us, `[0, null, 2000, 1000, 0]`, dt) - } - c.checkCastZeroCopy(arrow.PrimitiveTypes.Int64, `[0, null, 2000, 1000, 0]`, arrow.FixedWidthTypes.Time64us) -} - -func (c *CastSuite) TestDateToDate() { - day32 := `[0, null, 100, 1, 10]` - day64 := `[0, null, 8640000000, 86400000, 864000000]` - - // multiply promotion - c.checkCast(arrow.FixedWidthTypes.Date32, arrow.FixedWidthTypes.Date64, day32, day64) - // no truncation - c.checkCast(arrow.FixedWidthTypes.Date64, arrow.FixedWidthTypes.Date32, day64, day32) - - day64WillBeTruncated := `[0, null, 8640000123, 86400456, 864000789]` - - opts := compute.CastOptions{ToType: arrow.FixedWidthTypes.Date32} - c.checkCastFails(arrow.FixedWidthTypes.Date64, day64WillBeTruncated, &opts) - - opts.AllowTimeTruncate = true - c.checkCastOpts(arrow.FixedWidthTypes.Date64, arrow.FixedWidthTypes.Date32, - day64WillBeTruncated, day32, opts) -} - -func (c *CastSuite) TestDateZeroCopy() { - for _, dt := range []arrow.DataType{arrow.FixedWidthTypes.Date32 /*, arrow.PrimitiveTypes.Int32*/} { - c.checkCastZeroCopy(arrow.FixedWidthTypes.Date32, `[0, null, 2000, 1000, 0]`, dt) - } - c.checkCastZeroCopy(arrow.PrimitiveTypes.Int32, `[0, null, 2000, 1000, 0]`, arrow.FixedWidthTypes.Date32) - - for _, dt := range []arrow.DataType{arrow.FixedWidthTypes.Date64 /*, arrow.PrimitiveTypes.Int64*/} { - c.checkCastZeroCopy(arrow.FixedWidthTypes.Date64, `[0, null, 172800000, 86400000, 0]`, dt) - } - c.checkCastZeroCopy(arrow.PrimitiveTypes.Int64, `[0, null, 172800000, 86400000, 0]`, arrow.FixedWidthTypes.Date64) -} - -func (c *CastSuite) TestDurationToDuration() { - var opts compute.CastOptions - - tests := []struct { - coarse, fine arrow.DataType - }{ - {arrow.FixedWidthTypes.Duration_s, arrow.FixedWidthTypes.Duration_ms}, - {arrow.FixedWidthTypes.Duration_ms, arrow.FixedWidthTypes.Duration_us}, - {arrow.FixedWidthTypes.Duration_us, arrow.FixedWidthTypes.Duration_ns}, - } - - for _, tt := range tests { - c.Run("coarse "+tt.coarse.String()+" fine "+tt.fine.String(), func() { - coarse := `[0, null, 200, 1, 2]` - promoted := `[0, null, 200000, 1000, 2000]` - willBeTruncated := `[0, null, 200456, 1123, 2456]` - - c.checkCast(tt.coarse, tt.fine, coarse, promoted) - - opts.AllowTimeTruncate = false - opts.ToType = tt.coarse - c.checkCastFails(tt.fine, willBeTruncated, &opts) - - opts.AllowTimeTruncate = true - c.checkCastOpts(tt.fine, tt.coarse, willBeTruncated, coarse, opts) - }) - } - - tests = []struct { - coarse, fine arrow.DataType - }{ - {arrow.FixedWidthTypes.Duration_s, arrow.FixedWidthTypes.Duration_us}, - {arrow.FixedWidthTypes.Duration_ms, arrow.FixedWidthTypes.Duration_ns}, - } - - for _, tt := range tests { - c.Run("coarse "+tt.coarse.String()+" fine "+tt.fine.String(), func() { - coarse := `[0, null, 200, 1, 2]` - promoted := `[0, null, 200000000, 1000000, 2000000]` - willBeTruncated := `[0, null, 200456000, 1123000, 2456000]` - - c.checkCast(tt.coarse, tt.fine, coarse, promoted) - - opts.AllowTimeTruncate = false - opts.ToType = tt.coarse - c.checkCastFails(tt.fine, willBeTruncated, &opts) - - opts.AllowTimeTruncate = true - c.checkCastOpts(tt.fine, tt.coarse, willBeTruncated, coarse, opts) - }) - } - - tests = []struct { - coarse, fine arrow.DataType - }{ - {arrow.FixedWidthTypes.Duration_s, arrow.FixedWidthTypes.Duration_ns}, - } - - for _, tt := range tests { - c.Run("coarse "+tt.coarse.String()+" fine "+tt.fine.String(), func() { - coarse := `[0, null, 200, 1, 2]` - promoted := `[0, null, 200000000000, 1000000000, 2000000000]` - willBeTruncated := `[0, null, 200456000000, 1123000000, 2456000000]` - - c.checkCast(tt.coarse, tt.fine, coarse, promoted) - - opts.AllowTimeTruncate = false - opts.ToType = tt.coarse - c.checkCastFails(tt.fine, willBeTruncated, &opts) - - opts.AllowTimeTruncate = true - c.checkCastOpts(tt.fine, tt.coarse, willBeTruncated, coarse, opts) - }) - } -} - -func (c *CastSuite) TestDurationZeroCopy() { - for _, dt := range []arrow.DataType{arrow.FixedWidthTypes.Duration_s /*, arrow.PrimitiveTypes.Int64*/} { - c.checkCastZeroCopy(arrow.FixedWidthTypes.Duration_s, `[0, null, 2000, 1000, 0]`, dt) - } - c.checkCastZeroCopy(arrow.PrimitiveTypes.Int64, `[0, null, 2000, 1000, 0]`, arrow.FixedWidthTypes.Duration_s) -} - -func (c *CastSuite) TestDurationToDurationMultiplyOverflow() { - opts := compute.CastOptions{ToType: arrow.FixedWidthTypes.Duration_ns} - c.checkCastFails(arrow.FixedWidthTypes.Duration_s, `[10000000000, 1, 2, 3, 10000000000]`, &opts) -} - -func (c *CastSuite) TestStringToTimestamp() { - for _, dt := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - c.checkCast(dt, &arrow.TimestampType{Unit: arrow.Second}, `["1970-01-01", null, "2000-02-29"]`, `[0, null, 951782400]`) - c.checkCast(dt, &arrow.TimestampType{Unit: arrow.Microsecond}, `["1970-01-01", null, "2000-02-29"]`, `[0, null, 951782400000000]`) - - for _, u := range []arrow.TimeUnit{arrow.Second, arrow.Millisecond, arrow.Microsecond, arrow.Nanosecond} { - for _, notTS := range []string{"", "xxx"} { - opts := compute.NewCastOptions(&arrow.TimestampType{Unit: u}, true) - c.checkCastFails(dt, `["`+notTS+`"]`, opts) - } - } - - zoned, _, _ := array.FromJSON(c.mem, dt, strings.NewReader(`["2020-02-29T00:00:00Z", "2020-03-02T10:11:12+0102"]`)) - defer zoned.Release() - mixed, _, _ := array.FromJSON(c.mem, dt, strings.NewReader(`["2020-03-02T10:11:12+0102", "2020-02-29T00:00:00"]`)) - defer mixed.Release() - - c.checkCastArr(zoned, &arrow.TimestampType{Unit: arrow.Second, TimeZone: "UTC"}, `[1582934400, 1583140152]`, *compute.DefaultCastOptions(true)) - - // timestamp with zone offset should not parse as naive - checkCastFails(c.T(), zoned, *compute.NewCastOptions(&arrow.TimestampType{Unit: arrow.Second}, true)) - - // mixed zoned/unzoned should not parse as naive - checkCastFails(c.T(), mixed, *compute.NewCastOptions(&arrow.TimestampType{Unit: arrow.Second}, true)) - - // timestamp with zone offset can parse as any time zone (since they're unambiguous) - c.checkCastArr(zoned, arrow.FixedWidthTypes.Timestamp_s, `[1582934400, 1583140152]`, *compute.DefaultCastOptions(true)) - c.checkCastArr(zoned, &arrow.TimestampType{Unit: arrow.Second, TimeZone: "America/Phoenix"}, `[1582934400, 1583140152]`, *compute.DefaultCastOptions(true)) - } -} - -func (c *CastSuite) TestIntToString() { - for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - c.Run(stype.String(), func() { - c.checkCast(arrow.PrimitiveTypes.Int8, stype, - `[0, 1, 127, -128, null]`, `["0", "1", "127", "-128", null]`) - - c.checkCast(arrow.PrimitiveTypes.Uint8, stype, - `[0, 1, 255, null]`, `["0", "1", "255", null]`) - - c.checkCast(arrow.PrimitiveTypes.Int16, stype, - `[0, 1, 32767, -32768, null]`, `["0", "1", "32767", "-32768", null]`) - - c.checkCast(arrow.PrimitiveTypes.Uint16, stype, - `[0, 1, 65535, null]`, `["0", "1", "65535", null]`) - - c.checkCast(arrow.PrimitiveTypes.Int32, stype, - `[0, 1, 2147483647, -2147483648, null]`, - `["0", "1", "2147483647", "-2147483648", null]`) - - c.checkCast(arrow.PrimitiveTypes.Uint32, stype, - `[0, 1, 4294967295, null]`, `["0", "1", "4294967295", null]`) - - c.checkCast(arrow.PrimitiveTypes.Int64, stype, - `[0, 1, 9223372036854775807, -9223372036854775808, null]`, - `["0", "1", "9223372036854775807", "-9223372036854775808", null]`) - - c.checkCast(arrow.PrimitiveTypes.Uint64, stype, - `[0, 1, 18446744073709551615, null]`, `["0", "1", "18446744073709551615", null]`) - }) - } -} - -func (c *CastSuite) TestFloatingToString() { - for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - c.Run(stype.String(), func() { - bldr := array.NewFloat32Builder(c.mem) - defer bldr.Release() - bldr.AppendValues([]float32{ - 0, float32(math.Copysign(0, -1)), 1.5, float32(math.Inf(-1)), - float32(math.Inf(0)), float32(math.NaN())}, nil) - bldr.AppendNull() - arr := bldr.NewArray() - defer arr.Release() - - bldr64 := array.NewFloat64Builder(c.mem) - defer bldr64.Release() - bldr64.AppendValues([]float64{ - 0, math.Copysign(0, -1), 1.5, math.Inf(-1), math.Inf(0), math.NaN()}, nil) - bldr64.AppendNull() - arr64 := bldr64.NewArray() - defer arr64.Release() - - c.checkCastArr(arr, stype, `["0", "-0", "1.5", "-Inf", "+Inf", "NaN", null]`, *compute.DefaultCastOptions(true)) - - c.checkCastArr(arr64, stype, `["0", "-0", "1.5", "-Inf", "+Inf", "NaN", null]`, *compute.DefaultCastOptions(true)) - }) - } -} - -func (c *CastSuite) TestBooleanToString() { - for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - c.Run(stype.String(), func() { - c.checkCast(arrow.FixedWidthTypes.Boolean, stype, - `[true, true, false, null]`, `["true", "true", "false", null]`) - }) - } -} - -func (c *CastSuite) TestIdentityCasts() { - c.checkCastSelfZeroCopy(arrow.FixedWidthTypes.Boolean, `[false, true, null, false]`) - - c.checkCastSelfZeroCopy(arrow.Null, `[null, null, null]`) - for _, typ := range numericTypes { - c.checkCastSelfZeroCopy(typ, `[1, 2, null, 4]`) - } - - // ["foo", "bar"] base64 encoded for binary - c.checkCastSelfZeroCopy(arrow.BinaryTypes.Binary, `["Zm9v", "YmFy"]`) - c.checkCastSelfZeroCopy(arrow.BinaryTypes.String, `["foo", "bar"]`) - c.checkCastSelfZeroCopy(&arrow.FixedSizeBinaryType{ByteWidth: 3}, `["Zm9v", "YmFy"]`) - - c.checkCastSelfZeroCopy(arrow.FixedWidthTypes.Time32ms, `[1, 2, 3, 4]`) - c.checkCastSelfZeroCopy(arrow.FixedWidthTypes.Time64us, `[1, 2, 3, 4]`) - c.checkCastSelfZeroCopy(arrow.FixedWidthTypes.Date32, `[1, 2, 3, 4]`) - c.checkCastSelfZeroCopy(arrow.FixedWidthTypes.Date64, `[86400000, 0]`) - c.checkCastSelfZeroCopy(arrow.FixedWidthTypes.Timestamp_s, `[1, 2, 3, 4]`) - - c.checkCastSelfZeroCopy(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.PrimitiveTypes.Int8}, - `[1, 2, 3, 1, null, 3]`) -} - -func (c *CastSuite) TestListToPrimitive() { - arr, _, _ := array.FromJSON(c.mem, arrow.ListOf(arrow.PrimitiveTypes.Int8), strings.NewReader(`[[1, 2], [3, 4]]`)) - defer arr.Release() - - _, err := compute.CastToType(context.Background(), arr, arrow.PrimitiveTypes.Uint8) - c.ErrorIs(err, arrow.ErrNotImplemented) -} - -type makeList func(arrow.DataType) arrow.DataType - -var listFactories = []makeList{ - func(dt arrow.DataType) arrow.DataType { return arrow.ListOf(dt) }, - func(dt arrow.DataType) arrow.DataType { return arrow.LargeListOf(dt) }, -} - -func (c *CastSuite) checkListToList(valTypes []arrow.DataType, jsonData string) { - for _, makeSrc := range listFactories { - for _, makeDest := range listFactories { - for _, srcValueType := range valTypes { - for _, dstValueType := range valTypes { - srcType := makeSrc(srcValueType) - dstType := makeDest(dstValueType) - c.Run(fmt.Sprintf("from %s to %s", srcType, dstType), func() { - c.checkCast(srcType, dstType, jsonData, jsonData) - }) - } - } - } - } -} - -func (c *CastSuite) TestListToList() { - c.checkListToList([]arrow.DataType{arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Int64}, - `[[0], [1], null, [2, 3, 4], [5, 6], null, [], [7], [8, 9]]`) -} - -func (c *CastSuite) TestListToListNoNulls() { - c.checkListToList([]arrow.DataType{arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Int64}, - `[[0], [1], [2, 3, 4], [5, 6], [], [7], [8, 9]]`) -} - -func (c *CastSuite) TestListToListOptionsPassthru() { - for _, makeSrc := range listFactories { - for _, makeDest := range listFactories { - opts := compute.SafeCastOptions(makeDest(arrow.PrimitiveTypes.Int16)) - c.checkCastFails(makeSrc(arrow.PrimitiveTypes.Int32), `[[87654321]]`, opts) - - opts.AllowIntOverflow = true - c.checkCastOpts(makeSrc(arrow.PrimitiveTypes.Int32), makeDest(arrow.PrimitiveTypes.Int16), - `[[87654321]]`, `[[32689]]`, *opts) - } - } -} - -func (c *CastSuite) checkStructToStruct(types []arrow.DataType) { - for _, srcType := range types { - c.Run(srcType.String(), func() { - for _, destType := range types { - c.Run(destType.String(), func() { - fieldNames := []string{"a", "b"} - a1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[1, 2, 3, 4, null]`)) - b1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[null, 7, 8, 9, 0]`)) - a2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[1, 2, 3, 4, null]`)) - b2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[null, 7, 8, 9, 0]`)) - src, _ := array.NewStructArray([]arrow.Array{a1, b1}, fieldNames) - dest, _ := array.NewStructArray([]arrow.Array{a2, b2}, fieldNames) - defer func() { - a1.Release() - b1.Release() - a2.Release() - b2.Release() - src.Release() - dest.Release() - }() - - checkCast(c.T(), src, dest, *compute.DefaultCastOptions(true)) - c.Run("with nulls", func() { - nullBitmap := memory.NewBufferBytes([]byte{10}) - srcNullData := src.Data().(*array.Data).Copy() - srcNullData.Buffers()[0] = nullBitmap - srcNullData.SetNullN(3) - defer srcNullData.Release() - destNullData := dest.Data().(*array.Data).Copy() - destNullData.Buffers()[0] = nullBitmap - destNullData.SetNullN(3) - defer destNullData.Release() - - srcNulls := array.NewStructData(srcNullData) - destNulls := array.NewStructData(destNullData) - defer srcNulls.Release() - defer destNulls.Release() - - checkCast(c.T(), srcNulls, destNulls, *compute.DefaultCastOptions(true)) - }) - }) - } - }) - } -} - -func (c *CastSuite) checkStructToStructSubset(types []arrow.DataType) { - for _, srcType := range types { - c.Run(srcType.String(), func() { - for _, destType := range types { - c.Run(destType.String(), func() { - fieldNames := []string{"a", "b", "c", "d", "e"} - - a1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[1, 2, 5]`)) - defer a1.Release() - b1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[3, 4, 7]`)) - defer b1.Release() - c1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[9, 11, 44]`)) - defer c1.Release() - d1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[6, 51, 49]`)) - defer d1.Release() - e1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[19, 17, 74]`)) - defer e1.Release() - - a2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[1, 2, 5]`)) - defer a2.Release() - b2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[3, 4, 7]`)) - defer b2.Release() - c2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[9, 11, 44]`)) - defer c2.Release() - d2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[6, 51, 49]`)) - defer d2.Release() - e2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[19, 17, 74]`)) - defer e2.Release() - - src, _ := array.NewStructArray([]arrow.Array{a1, b1, c1, d1, e1}, fieldNames) - defer src.Release() - dest1, _ := array.NewStructArray([]arrow.Array{a2}, []string{"a"}) - defer dest1.Release() - - opts := *compute.DefaultCastOptions(true) - checkCast(c.T(), src, dest1, opts) - - dest2, _ := array.NewStructArray([]arrow.Array{b2, c2}, []string{"b", "c"}) - defer dest2.Release() - checkCast(c.T(), src, dest2, opts) - - dest3, _ := array.NewStructArray([]arrow.Array{c2, d2, e2}, []string{"c", "d", "e"}) - defer dest3.Release() - checkCast(c.T(), src, dest3, opts) - - dest4, _ := array.NewStructArray([]arrow.Array{a2, b2, c2, e2}, []string{"a", "b", "c", "e"}) - defer dest4.Release() - checkCast(c.T(), src, dest4, opts) - - dest5, _ := array.NewStructArray([]arrow.Array{a2, b2, c2, d2, e2}, []string{"a", "b", "c", "d", "e"}) - defer dest5.Release() - checkCast(c.T(), src, dest5, opts) - - // field does not exist - dest6 := arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - arrow.Field{Name: "d", Type: arrow.PrimitiveTypes.Int16, Nullable: true}, - arrow.Field{Name: "f", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - ) - options6 := compute.SafeCastOptions(dest6) - _, err := compute.CastArray(context.TODO(), src, options6) - c.ErrorIs(err, arrow.ErrType) - c.ErrorContains(err, "struct fields don't match or are in the wrong order") - - // fields in wrong order - dest7 := arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - arrow.Field{Name: "c", Type: arrow.PrimitiveTypes.Int16, Nullable: true}, - arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - ) - options7 := compute.SafeCastOptions(dest7) - _, err = compute.CastArray(context.TODO(), src, options7) - c.ErrorIs(err, arrow.ErrType) - c.ErrorContains(err, "struct fields don't match or are in the wrong order") - }) - } - }) - } -} - -func (c *CastSuite) checkStructToStructSubsetWithNulls(types []arrow.DataType) { - for _, srcType := range types { - c.Run(srcType.String(), func() { - for _, destType := range types { - c.Run(destType.String(), func() { - fieldNames := []string{"a", "b", "c", "d", "e"} - - a1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[1, 2, 5]`)) - defer a1.Release() - b1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[3, null, 7]`)) - defer b1.Release() - c1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[9, 11, 44]`)) - defer c1.Release() - d1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[6, 51, null]`)) - defer d1.Release() - e1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[null, 17, 74]`)) - defer e1.Release() - - a2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[1, 2, 5]`)) - defer a2.Release() - b2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[3, null, 7]`)) - defer b2.Release() - c2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[9, 11, 44]`)) - defer c2.Release() - d2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[6, 51, null]`)) - defer d2.Release() - e2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[null, 17, 74]`)) - defer e2.Release() - - // 0, 1, 0 - nullBitmap := memory.NewBufferBytes([]byte{2}) - srcNull, _ := array.NewStructArrayWithNulls([]arrow.Array{a1, b1, c1, d1, e1}, fieldNames, nullBitmap, 2, 0) - defer srcNull.Release() - - dest1Null, _ := array.NewStructArrayWithNulls([]arrow.Array{a2}, []string{"a"}, nullBitmap, -1, 0) - defer dest1Null.Release() - opts := compute.DefaultCastOptions(true) - checkCast(c.T(), srcNull, dest1Null, *opts) - - dest2Null, _ := array.NewStructArrayWithNulls([]arrow.Array{b2, c2}, []string{"b", "c"}, nullBitmap, -1, 0) - defer dest2Null.Release() - checkCast(c.T(), srcNull, dest2Null, *opts) - - dest3Null, _ := array.NewStructArrayWithNulls([]arrow.Array{a2, d2, e2}, []string{"a", "d", "e"}, nullBitmap, -1, 0) - defer dest3Null.Release() - checkCast(c.T(), srcNull, dest3Null, *opts) - - dest4Null, _ := array.NewStructArrayWithNulls([]arrow.Array{a2, b2, c2, e2}, []string{"a", "b", "c", "e"}, nullBitmap, -1, 0) - defer dest4Null.Release() - checkCast(c.T(), srcNull, dest4Null, *opts) - - dest5Null, _ := array.NewStructArrayWithNulls([]arrow.Array{a2, b2, c2, d2, e2}, []string{"a", "b", "c", "d", "e"}, nullBitmap, -1, 0) - defer dest5Null.Release() - checkCast(c.T(), srcNull, dest5Null, *opts) - - // field does not exist - dest6Null := arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - arrow.Field{Name: "d", Type: arrow.PrimitiveTypes.Int16, Nullable: true}, - arrow.Field{Name: "f", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - ) - options6Null := compute.SafeCastOptions(dest6Null) - _, err := compute.CastArray(context.TODO(), srcNull, options6Null) - c.ErrorIs(err, arrow.ErrType) - c.ErrorContains(err, "struct fields don't match or are in the wrong order") - - // fields in wrong order - dest7Null := arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - arrow.Field{Name: "c", Type: arrow.PrimitiveTypes.Int16, Nullable: true}, - arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - ) - options7Null := compute.SafeCastOptions(dest7Null) - _, err = compute.CastArray(context.TODO(), srcNull, options7Null) - c.ErrorIs(err, arrow.ErrType) - c.ErrorContains(err, "struct fields don't match or are in the wrong order") - }) - } - }) - } -} - -func (c *CastSuite) TestStructToSameSizedAndNamedStruct() { - c.checkStructToStruct(numericTypes) -} - -func (c *CastSuite) TestStructToStructSubset() { - c.checkStructToStructSubset(numericTypes) -} - -func (c *CastSuite) TestStructToStructSubsetWithNulls() { - c.checkStructToStructSubsetWithNulls(numericTypes) -} - -func (c *CastSuite) TestStructToSameSizedButDifferentNamedStruct() { - fieldNames := []string{"a", "b"} - a, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[1, 2]`)) - defer a.Release() - b, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[3, 4]`)) - defer b.Release() - - src, _ := array.NewStructArray([]arrow.Array{a, b}, fieldNames) - defer src.Release() - - dest := arrow.StructOf( - arrow.Field{Name: "c", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - arrow.Field{Name: "d", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - ) - opts := compute.SafeCastOptions(dest) - _, err := compute.CastArray(context.TODO(), src, opts) - c.ErrorIs(err, arrow.ErrType) - c.ErrorContains(err, "struct fields don't match or are in the wrong order") -} - -func (c *CastSuite) TestStructToBiggerStruct() { - fieldNames := []string{"a", "b"} - a, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[1, 2]`)) - defer a.Release() - b, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[3, 4]`)) - defer b.Release() - - src, _ := array.NewStructArray([]arrow.Array{a, b}, fieldNames) - defer src.Release() - - dest := arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - arrow.Field{Name: "c", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - ) - opts := compute.SafeCastOptions(dest) - _, err := compute.CastArray(context.TODO(), src, opts) - c.ErrorIs(err, arrow.ErrType) - c.ErrorContains(err, "struct fields don't match or are in the wrong order") -} - -func (c *CastSuite) TestStructToDifferentNullabilityStruct() { - c.Run("non-nullable to nullable", func() { - fieldsSrcNonNullable := []arrow.Field{ - {Name: "a", Type: arrow.PrimitiveTypes.Int8}, - {Name: "b", Type: arrow.PrimitiveTypes.Int8}, - {Name: "c", Type: arrow.PrimitiveTypes.Int8}, - } - srcNonNull, _, err := array.FromJSON(c.mem, arrow.StructOf(fieldsSrcNonNullable...), - strings.NewReader(`[ - {"a": 11, "b": 32, "c": 95}, - {"a": 23, "b": 46, "c": 11}, - {"a": 56, "b": 37, "c": 44} - ]`)) - c.Require().NoError(err) - defer srcNonNull.Release() - - fieldsDest1Nullable := []arrow.Field{ - {Name: "a", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - {Name: "b", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - {Name: "c", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - } - destNullable, _, err := array.FromJSON(c.mem, arrow.StructOf(fieldsDest1Nullable...), - strings.NewReader(`[ - {"a": 11, "b": 32, "c": 95}, - {"a": 23, "b": 46, "c": 11}, - {"a": 56, "b": 37, "c": 44} - ]`)) - c.Require().NoError(err) - defer destNullable.Release() - - checkCast(c.T(), srcNonNull, destNullable, *compute.DefaultCastOptions(true)) - - fieldsDest2Nullable := []arrow.Field{ - {Name: "a", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - {Name: "c", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - } - - data := array.NewData(arrow.StructOf(fieldsDest2Nullable...), destNullable.Len(), destNullable.Data().Buffers(), - []arrow.ArrayData{destNullable.Data().Children()[0], destNullable.Data().Children()[2]}, - destNullable.NullN(), 0) - defer data.Release() - dest2Nullable := array.NewStructData(data) - defer dest2Nullable.Release() - checkCast(c.T(), srcNonNull, dest2Nullable, *compute.DefaultCastOptions(true)) - - fieldsDest3Nullable := []arrow.Field{ - {Name: "b", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - } - - data = array.NewData(arrow.StructOf(fieldsDest3Nullable...), destNullable.Len(), destNullable.Data().Buffers(), - []arrow.ArrayData{destNullable.Data().Children()[1]}, destNullable.NullN(), 0) - defer data.Release() - dest3Nullable := array.NewStructData(data) - defer dest3Nullable.Release() - checkCast(c.T(), srcNonNull, dest3Nullable, *compute.DefaultCastOptions(true)) - }) - c.Run("nullable to non-nullable", func() { - fieldsSrcNullable := []arrow.Field{ - {Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - {Name: "b", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - {Name: "c", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - } - srcNullable, _, err := array.FromJSON(c.mem, arrow.StructOf(fieldsSrcNullable...), - strings.NewReader(`[ - {"a": 1, "b": 3, "c": 9}, - {"a": null, "b": 4, "c": 11}, - {"a": 5, "b": null, "c": 44} - ]`)) - c.Require().NoError(err) - defer srcNullable.Release() - - fieldsDest1NonNullable := []arrow.Field{ - {Name: "a", Type: arrow.PrimitiveTypes.Int64, Nullable: false}, - {Name: "b", Type: arrow.PrimitiveTypes.Int64, Nullable: false}, - {Name: "c", Type: arrow.PrimitiveTypes.Int64, Nullable: false}, - } - dest1NonNullable := arrow.StructOf(fieldsDest1NonNullable...) - options1NoNullable := compute.SafeCastOptions(dest1NonNullable) - _, err = compute.CastArray(context.TODO(), srcNullable, options1NoNullable) - c.ErrorIs(err, arrow.ErrType) - c.ErrorContains(err, "cannot cast nullable field to non-nullable field") - - fieldsDest2NonNullable := []arrow.Field{ - {Name: "a", Type: arrow.PrimitiveTypes.Int64, Nullable: false}, - {Name: "c", Type: arrow.PrimitiveTypes.Int64, Nullable: false}, - } - dest2NonNullable := arrow.StructOf(fieldsDest2NonNullable...) - options2NoNullable := compute.SafeCastOptions(dest2NonNullable) - _, err = compute.CastArray(context.TODO(), srcNullable, options2NoNullable) - c.ErrorIs(err, arrow.ErrType) - c.ErrorContains(err, "cannot cast nullable field to non-nullable field") - - fieldsDest3NonNullable := []arrow.Field{ - {Name: "c", Type: arrow.PrimitiveTypes.Int64, Nullable: false}, - } - dest3NonNullable := arrow.StructOf(fieldsDest3NonNullable...) - options3NoNullable := compute.SafeCastOptions(dest3NonNullable) - _, err = compute.CastArray(context.TODO(), srcNullable, options3NoNullable) - c.ErrorIs(err, arrow.ErrType) - c.ErrorContains(err, "cannot cast nullable field to non-nullable field") - }) -} - -func (c *CastSuite) smallIntArrayFromJSON(data string) arrow.Array { - arr, _, _ := array.FromJSON(c.mem, types.NewSmallintType(), strings.NewReader(data)) - return arr -} - -func (c *CastSuite) TestExtensionTypeToIntDowncast() { - smallint := types.NewSmallintType() - arrow.RegisterExtensionType(smallint) - defer arrow.UnregisterExtensionType("smallint") - - c.Run("smallint(int16) to int16", func() { - arr := c.smallIntArrayFromJSON(`[0, 100, 200, 1, 2]`) - defer arr.Release() - - checkCastZeroCopy(c.T(), arr, arrow.PrimitiveTypes.Int16, compute.DefaultCastOptions(true)) - - c.checkCast(smallint, arrow.PrimitiveTypes.Uint8, - `[0, 100, 200, 1, 2]`, `[0, 100, 200, 1, 2]`) - }) - - c.Run("smallint(int16) to uint8 with overflow", func() { - opts := compute.SafeCastOptions(arrow.PrimitiveTypes.Uint8) - c.checkCastFails(smallint, `[0, null, 256, 1, 3]`, opts) - - opts.AllowIntOverflow = true - c.checkCastOpts(smallint, arrow.PrimitiveTypes.Uint8, - `[0, null, 256, 1, 3]`, `[0, null, 0, 1, 3]`, *opts) - }) - - c.Run("smallint(int16) to uint8 with underflow", func() { - opts := compute.SafeCastOptions(arrow.PrimitiveTypes.Uint8) - c.checkCastFails(smallint, `[0, null, -1, 1, 3]`, opts) - - opts.AllowIntOverflow = true - c.checkCastOpts(smallint, arrow.PrimitiveTypes.Uint8, - `[0, null, -1, 1, 3]`, `[0, null, 255, 1, 3]`, *opts) - }) -} - -func (c *CastSuite) TestNoOutBitmapIfIsAllValid() { - a, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[1]`)) - defer a.Release() - - opts := compute.SafeCastOptions(arrow.PrimitiveTypes.Int32) - result, err := compute.CastArray(context.Background(), a, opts) - c.NoError(err) - c.NotNil(a.Data().Buffers()[0]) - c.Nil(result.Data().Buffers()[0]) -} - -func (c *CastSuite) TestFromDictionary() { - ctx := compute.WithAllocator(context.Background(), c.mem) - - dictionaries := []arrow.Array{} - - for _, ty := range numericTypes { - a, _, _ := array.FromJSON(c.mem, ty, strings.NewReader(`[23, 12, 45, 12, null]`)) - defer a.Release() - dictionaries = append(dictionaries, a) - } - - for _, ty := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - a, _, _ := array.FromJSON(c.mem, ty, strings.NewReader(`["foo", "bar", "baz", "foo", null]`)) - defer a.Release() - dictionaries = append(dictionaries, a) - } - - for _, d := range dictionaries { - for _, ty := range dictIndexTypes { - indices, _, _ := array.FromJSON(c.mem, ty, strings.NewReader(`[4, 0, 1, 2, 0, 4, null, 2]`)) - - expected, err := compute.Take(ctx, compute.TakeOptions{}, &compute.ArrayDatum{d.Data()}, &compute.ArrayDatum{indices.Data()}) - c.Require().NoError(err) - exp := expected.(*compute.ArrayDatum).MakeArray() - - dictArr := array.NewDictionaryArray(&arrow.DictionaryType{IndexType: ty, ValueType: d.DataType()}, indices, d) - checkCast(c.T(), dictArr, exp, *compute.SafeCastOptions(d.DataType())) - - indices.Release() - expected.Release() - exp.Release() - dictArr.Release() - return - } - } -} - -func TestCasts(t *testing.T) { - suite.Run(t, new(CastSuite)) -} - -const rngseed = 0x94378165 - -func benchmarkNumericCast(b *testing.B, fromType, toType arrow.DataType, opts compute.CastOptions, size, min, max int64, nullprob float64) { - rng := gen.NewRandomArrayGenerator(rngseed, memory.DefaultAllocator) - arr := rng.Numeric(fromType.ID(), size, min, max, nullprob) - var ( - err error - out compute.Datum - ctx = context.Background() - input = compute.NewDatum(arr.Data()) - ) - - b.Cleanup(func() { - arr.Release() - input.Release() - }) - - opts.ToType = toType - b.ResetTimer() - b.SetBytes(size * int64(fromType.(arrow.FixedWidthDataType).Bytes())) - for i := 0; i < b.N; i++ { - out, err = compute.CastDatum(ctx, input, &opts) - if err != nil { - b.Fatal(err) - } - out.Release() - } -} - -func benchmarkFloatingToIntegerCast(b *testing.B, fromType, toType arrow.DataType, opts compute.CastOptions, size, min, max int64, nullprob float64) { - rng := gen.NewRandomArrayGenerator(rngseed, memory.DefaultAllocator) - arr := rng.Numeric(toType.ID(), size, min, max, nullprob) - asFloat, err := compute.CastToType(context.Background(), arr, fromType) - if err != nil { - b.Fatal(err) - } - arr.Release() - - var ( - out compute.Datum - ctx = context.Background() - input = compute.NewDatum(asFloat.Data()) - ) - - b.Cleanup(func() { - asFloat.Release() - input.Release() - }) - - opts.ToType = toType - b.ResetTimer() - b.SetBytes(size * int64(fromType.(arrow.FixedWidthDataType).Bytes())) - for i := 0; i < b.N; i++ { - out, err = compute.CastDatum(ctx, input, &opts) - if err != nil { - b.Fatal(err) - } - out.Release() - } -} - -func BenchmarkCasting(b *testing.B) { - type benchfn func(b *testing.B, fromType, toType arrow.DataType, opts compute.CastOptions, size, min, max int64, nullprob float64) - - tests := []struct { - from, to arrow.DataType - min, max int64 - safe bool - fn benchfn - }{ - {arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Int32, math.MinInt32, math.MaxInt32, true, benchmarkNumericCast}, - {arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Int32, math.MinInt32, math.MaxInt32, false, benchmarkNumericCast}, - {arrow.PrimitiveTypes.Uint32, arrow.PrimitiveTypes.Int32, 0, math.MaxInt32, true, benchmarkNumericCast}, - {arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Float64, 0, 1000, true, benchmarkNumericCast}, - {arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Float64, 0, 1000, false, benchmarkNumericCast}, - {arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Int32, -1000, 1000, true, benchmarkFloatingToIntegerCast}, - {arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Int32, -1000, 1000, false, benchmarkFloatingToIntegerCast}, - } - - for _, tt := range tests { - for _, sz := range []int64{int64(CpuCacheSizes[1]) /* L2 Cache Size */} { - for _, nullProb := range []float64{0, 0.1, 0.5, 0.9, 1} { - arraySize := sz / int64(tt.from.(arrow.FixedWidthDataType).Bytes()) - opts := compute.DefaultCastOptions(tt.safe) - b.Run(fmt.Sprintf("sz=%d/nullprob=%.2f/from=%s/to=%s/safe=%t", arraySize, nullProb, tt.from, tt.to, tt.safe), func(b *testing.B) { - tt.fn(b, tt.from, tt.to, *opts, arraySize, tt.min, tt.max, nullProb) - }) - } - } - } -} diff --git a/go/arrow/compute/datum.go b/go/arrow/compute/datum.go deleted file mode 100644 index 9619fe09610de..0000000000000 --- a/go/arrow/compute/datum.go +++ /dev/null @@ -1,305 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package compute - -import ( - "fmt" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/scalar" -) - -//go:generate go run golang.org/x/tools/cmd/stringer -type=DatumKind -linecomment - -// DatumKind is an enum used for denoting which kind of type a datum is encapsulating -type DatumKind int - -const ( - KindNone DatumKind = iota // none - KindScalar // scalar - KindArray // array - KindChunked // chunked_array - KindRecord // record_batch - KindTable // table -) - -const UnknownLength int64 = -1 - -// DatumIsValue returns true if the datum passed is a Scalar, Array -// or ChunkedArray type (e.g. it contains a specific value not a -// group of values) -func DatumIsValue(d Datum) bool { - switch d.Kind() { - case KindScalar, KindArray, KindChunked: - return true - } - return false -} - -// Datum is a variant interface for wrapping the various Arrow data structures -// for now the various Datum types just hold a Value which is the type they -// are wrapping, but it might make sense in the future for those types -// to actually be aliases or embed their types instead. Not sure yet. -type Datum interface { - fmt.Stringer - Kind() DatumKind - Len() int64 - Equals(Datum) bool - Release() - - data() any -} - -// ArrayLikeDatum is an interface for treating a Datum similarly to an Array, -// so that it is easy to differentiate between Record/Table/Collection and Scalar, -// Array/ChunkedArray for ease of use. Chunks will return an empty slice for Scalar, -// a slice with 1 element for Array, and the slice of chunks for a chunked array. -type ArrayLikeDatum interface { - Datum - NullN() int64 - Type() arrow.DataType - Chunks() []arrow.Array -} - -// TableLikeDatum is an interface type for specifying either a RecordBatch or a -// Table as both contain a schema as opposed to just a single data type. -type TableLikeDatum interface { - Datum - Schema() *arrow.Schema -} - -// EmptyDatum is the null case, a Datum with nothing in it. -type EmptyDatum struct{} - -func (EmptyDatum) String() string { return "nullptr" } -func (EmptyDatum) Kind() DatumKind { return KindNone } -func (EmptyDatum) Len() int64 { return UnknownLength } -func (EmptyDatum) Release() {} -func (EmptyDatum) Equals(other Datum) bool { - _, ok := other.(EmptyDatum) - return ok -} -func (EmptyDatum) data() any { return nil } - -// ScalarDatum contains a scalar value -type ScalarDatum struct { - Value scalar.Scalar -} - -func (ScalarDatum) Kind() DatumKind { return KindScalar } -func (ScalarDatum) Len() int64 { return 1 } -func (ScalarDatum) Chunks() []arrow.Array { return nil } -func (d *ScalarDatum) Type() arrow.DataType { return d.Value.DataType() } -func (d *ScalarDatum) String() string { return d.Value.String() } -func (d *ScalarDatum) ToScalar() (scalar.Scalar, error) { - return d.Value, nil -} -func (d *ScalarDatum) data() any { return d.Value } -func (d *ScalarDatum) NullN() int64 { - if d.Value.IsValid() { - return 0 - } - return 1 -} - -type releasable interface { - Release() -} - -func (d *ScalarDatum) Release() { - if v, ok := d.Value.(releasable); ok { - v.Release() - } -} - -func (d *ScalarDatum) Equals(other Datum) bool { - if rhs, ok := other.(*ScalarDatum); ok { - return scalar.Equals(d.Value, rhs.Value) - } - return false -} - -// ArrayDatum references an array.Data object which can be used to create -// array instances from if needed. -type ArrayDatum struct { - Value arrow.ArrayData -} - -func (ArrayDatum) Kind() DatumKind { return KindArray } -func (d *ArrayDatum) Type() arrow.DataType { return d.Value.DataType() } -func (d *ArrayDatum) Len() int64 { return int64(d.Value.Len()) } -func (d *ArrayDatum) NullN() int64 { return int64(d.Value.NullN()) } -func (d *ArrayDatum) String() string { return fmt.Sprintf("Array:{%s}", d.Value.DataType()) } -func (d *ArrayDatum) MakeArray() arrow.Array { return array.MakeFromData(d.Value) } -func (d *ArrayDatum) Chunks() []arrow.Array { return []arrow.Array{d.MakeArray()} } -func (d *ArrayDatum) ToScalar() (scalar.Scalar, error) { - return scalar.NewListScalarData(d.Value), nil -} -func (d *ArrayDatum) Release() { - d.Value.Release() - d.Value = nil -} -func (d *ArrayDatum) data() any { return d.Value } -func (d *ArrayDatum) Equals(other Datum) bool { - rhs, ok := other.(*ArrayDatum) - if !ok { - return false - } - - left := d.MakeArray() - defer left.Release() - right := rhs.MakeArray() - defer right.Release() - - return array.Equal(left, right) -} - -// ChunkedDatum contains a chunked array for use with expressions and compute. -type ChunkedDatum struct { - Value *arrow.Chunked -} - -func (ChunkedDatum) Kind() DatumKind { return KindChunked } -func (d *ChunkedDatum) Type() arrow.DataType { return d.Value.DataType() } -func (d *ChunkedDatum) Len() int64 { return int64(d.Value.Len()) } -func (d *ChunkedDatum) NullN() int64 { return int64(d.Value.NullN()) } -func (d *ChunkedDatum) String() string { return fmt.Sprintf("Array:{%s}", d.Value.DataType()) } -func (d *ChunkedDatum) Chunks() []arrow.Array { return d.Value.Chunks() } -func (d *ChunkedDatum) data() any { return d.Value } -func (d *ChunkedDatum) Release() { - d.Value.Release() - d.Value = nil -} - -func (d *ChunkedDatum) Equals(other Datum) bool { - if rhs, ok := other.(*ChunkedDatum); ok { - return array.ChunkedEqual(d.Value, rhs.Value) - } - return false -} - -// RecordDatum contains an array.Record for passing a full record to an expression -// or to compute. -type RecordDatum struct { - Value arrow.Record -} - -func (RecordDatum) Kind() DatumKind { return KindRecord } -func (RecordDatum) String() string { return "RecordBatch" } -func (r *RecordDatum) Len() int64 { return r.Value.NumRows() } -func (r *RecordDatum) Schema() *arrow.Schema { return r.Value.Schema() } -func (r *RecordDatum) data() any { return r.Value } -func (r *RecordDatum) Release() { - r.Value.Release() - r.Value = nil -} - -func (r *RecordDatum) Equals(other Datum) bool { - if rhs, ok := other.(*RecordDatum); ok { - return array.RecordEqual(r.Value, rhs.Value) - } - return false -} - -// TableDatum contains a table so that multiple record batches can be worked with -// together as a single table for being passed to compute and expression handling. -type TableDatum struct { - Value arrow.Table -} - -func (TableDatum) Kind() DatumKind { return KindTable } -func (TableDatum) String() string { return "Table" } -func (d *TableDatum) Len() int64 { return d.Value.NumRows() } -func (d *TableDatum) Schema() *arrow.Schema { return d.Value.Schema() } -func (d *TableDatum) data() any { return d.Value } -func (d *TableDatum) Release() { - d.Value.Release() - d.Value = nil -} - -func (d *TableDatum) Equals(other Datum) bool { - if rhs, ok := other.(*TableDatum); ok { - return array.TableEqual(d.Value, rhs.Value) - } - return false -} - -// NewDatum will construct the appropriate Datum type based on what is passed in -// as the argument. -// -// An arrow.Array gets an ArrayDatum -// An array.Chunked gets a ChunkedDatum -// An array.Record gets a RecordDatum -// an array.Table gets a TableDatum -// a scalar.Scalar gets a ScalarDatum -// -// Anything else is passed to scalar.MakeScalar and receives a scalar -// datum of that appropriate type. -func NewDatum(value interface{}) Datum { - switch v := value.(type) { - case Datum: - return NewDatum(v.data()) - case arrow.Array: - v.Data().Retain() - return &ArrayDatum{v.Data()} - case scalar.Releasable: - v.Retain() - return NewDatumWithoutOwning(v) - case scalar.Scalar: - return &ScalarDatum{v} - default: - return &ScalarDatum{scalar.MakeScalar(value)} - } -} - -// NewDatumWithoutOwning is like NewDatum only it does not call Retain on -// the passed in value (if applicable). This means that if the resulting -// Datum should not have Release called on it and the original value needs -// to outlive the Datum. -// -// Only use this if you know what you're doing. For the most part this is -// just a convenience function.+- - -func NewDatumWithoutOwning(value interface{}) Datum { - switch v := value.(type) { - case arrow.Array: - return &ArrayDatum{v.Data()} - case arrow.ArrayData: - return &ArrayDatum{v} - case *arrow.Chunked: - return &ChunkedDatum{v} - case arrow.Record: - return &RecordDatum{v} - case arrow.Table: - return &TableDatum{v} - case scalar.Scalar: - return &ScalarDatum{v} - default: - return &ScalarDatum{scalar.MakeScalar(value)} - } -} - -var ( - _ ArrayLikeDatum = (*ScalarDatum)(nil) - _ ArrayLikeDatum = (*ArrayDatum)(nil) - _ ArrayLikeDatum = (*ChunkedDatum)(nil) - _ TableLikeDatum = (*RecordDatum)(nil) - _ TableLikeDatum = (*TableDatum)(nil) -) diff --git a/go/arrow/compute/datumkind_string.go b/go/arrow/compute/datumkind_string.go deleted file mode 100644 index 3603e5e495414..0000000000000 --- a/go/arrow/compute/datumkind_string.go +++ /dev/null @@ -1,30 +0,0 @@ -// Code generated by "stringer -type=DatumKind -linecomment"; DO NOT EDIT. - -//go:build go1.18 - -package compute - -import "strconv" - -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[KindNone-0] - _ = x[KindScalar-1] - _ = x[KindArray-2] - _ = x[KindChunked-3] - _ = x[KindRecord-4] - _ = x[KindTable-5] -} - -const _DatumKind_name = "nonescalararraychunked_arrayrecord_batchtable" - -var _DatumKind_index = [...]uint8{0, 4, 10, 15, 28, 40, 45} - -func (i DatumKind) String() string { - if i < 0 || i >= DatumKind(len(_DatumKind_index)-1) { - return "DatumKind(" + strconv.FormatInt(int64(i), 10) + ")" - } - return _DatumKind_name[_DatumKind_index[i]:_DatumKind_index[i+1]] -} diff --git a/go/arrow/compute/doc.go b/go/arrow/compute/doc.go deleted file mode 100644 index 7c763cb18d0ff..0000000000000 --- a/go/arrow/compute/doc.go +++ /dev/null @@ -1,34 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package compute is a native-go implementation of an Acero-like -// arrow compute engine. It requires go1.18+ -// -// While consumers of Arrow that are able to use CGO could utilize the -// C Data API (using the cdata package) and could link against the -// acero library directly, there are consumers who cannot use CGO. This -// is an attempt to provide for those users, and in general create a -// native-go arrow compute engine. -// -// The overwhelming majority of things in this package require go1.18 as -// it utilizes generics. The files in this package and its sub-packages -// are all excluded from being built by go versions lower than 1.18 so -// that the larger Arrow module itself is still compatible with go1.17. -// -// Everything in this package should be considered Experimental for now. -package compute - -//go:generate stringer -type=FuncKind -linecomment diff --git a/go/arrow/compute/example_test.go b/go/arrow/compute/example_test.go deleted file mode 100644 index d427fb622d24a..0000000000000 --- a/go/arrow/compute/example_test.go +++ /dev/null @@ -1,91 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package compute_test - -import ( - "context" - "fmt" - "log" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/compute" - "github.com/apache/arrow/go/v18/arrow/compute/exec" - "github.com/apache/arrow/go/v18/arrow/memory" -) - -// This example demonstrates how to register a custom scalar function. -func Example_customFunction() { - pool := memory.NewGoAllocator() - - ctx := context.Background() - execCtx := compute.DefaultExecCtx() - ctx = compute.SetExecCtx(ctx, execCtx) - - add42 := compute.NewScalarFunction("add_42", compute.Arity{ - NArgs: 1, - }, compute.FunctionDoc{ - Summary: "Returns the input values plus 42", - ArgNames: []string{"input"}, - }) - - if err := add42.AddNewKernel( - []exec.InputType{ - // We accept a single argument (array) of Int8 type. - { - Kind: exec.InputExact, - Type: arrow.PrimitiveTypes.Int8, - }, - }, - // We'll return a single Int8 array. - exec.NewOutputType(arrow.PrimitiveTypes.Int8), - func(ctx *exec.KernelCtx, span *exec.ExecSpan, result *exec.ExecResult) error { - // The second buffer contains the values. Both for the input and the output arrays. - for i, x := range span.Values[0].Array.Buffers[1].Buf { - result.Buffers[1].Buf[i] = x + 42 - } - return nil - }, - nil, - ); err != nil { - log.Fatal(err) - } - execCtx.Registry.AddFunction(add42, true) - - inputArrayBuilder := array.NewInt8Builder(pool) - for i := 0; i < 16; i++ { - inputArrayBuilder.Append(int8(i)) - } - inputArray := inputArrayBuilder.NewArray() - - outputArrayDatum, err := compute.CallFunction( - compute.SetExecCtx(context.Background(), execCtx), - "add_42", - nil, - &compute.ArrayDatum{Value: inputArray.Data()}, - ) - if err != nil { - log.Fatal(err) - } - - fmt.Println(array.NewInt8Data(outputArrayDatum.(*compute.ArrayDatum).Value).Int8Values()) - - // Output: - // [42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57] -} diff --git a/go/arrow/compute/exec.go b/go/arrow/compute/exec.go deleted file mode 100644 index 1142297c1c396..0000000000000 --- a/go/arrow/compute/exec.go +++ /dev/null @@ -1,199 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package compute - -import ( - "context" - "fmt" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/compute/exec" - "github.com/apache/arrow/go/v18/arrow/internal/debug" -) - -func haveChunkedArray(values []Datum) bool { - for _, v := range values { - if v.Kind() == KindChunked { - return true - } - } - return false -} - -// ExecSpanFromBatch constructs and returns a new ExecSpan from the values -// inside of the ExecBatch which could be scalar or arrays. -// -// This is mostly used for tests but is also a convenience method for other -// cases. -func ExecSpanFromBatch(batch *ExecBatch) *exec.ExecSpan { - out := &exec.ExecSpan{Len: batch.Len, Values: make([]exec.ExecValue, len(batch.Values))} - for i, v := range batch.Values { - outVal := &out.Values[i] - if v.Kind() == KindScalar { - outVal.Scalar = v.(*ScalarDatum).Value - } else { - outVal.Array.SetMembers(v.(*ArrayDatum).Value) - outVal.Scalar = nil - } - } - return out -} - -// this is the primary driver of execution -func execInternal(ctx context.Context, fn Function, opts FunctionOptions, passedLen int64, args ...Datum) (result Datum, err error) { - if opts == nil { - if err = checkOptions(fn, opts); err != nil { - return - } - opts = fn.DefaultOptions() - } - - // we only allow Array, ChunkedArray, and Scalars for now. - // RecordBatch and Table datums are disallowed. - if err = checkAllIsValue(args); err != nil { - return - } - - inTypes := make([]arrow.DataType, len(args)) - for i, a := range args { - inTypes[i] = a.(ArrayLikeDatum).Type() - } - - var ( - k exec.Kernel - executor KernelExecutor - ) - - switch fn.Kind() { - case FuncScalar: - executor = scalarExecPool.Get().(*scalarExecutor) - defer func() { - executor.Clear() - scalarExecPool.Put(executor.(*scalarExecutor)) - }() - case FuncVector: - executor = vectorExecPool.Get().(*vectorExecutor) - defer func() { - executor.Clear() - vectorExecPool.Put(executor.(*vectorExecutor)) - }() - default: - return nil, fmt.Errorf("%w: direct execution of %s", arrow.ErrNotImplemented, fn.Kind()) - } - - if k, err = fn.DispatchBest(inTypes...); err != nil { - return - } - - var newArgs []Datum - // cast arguments if necessary - for i, arg := range args { - if !arrow.TypeEqual(inTypes[i], arg.(ArrayLikeDatum).Type()) { - if newArgs == nil { - newArgs = make([]Datum, len(args)) - copy(newArgs, args) - } - newArgs[i], err = CastDatum(ctx, arg, SafeCastOptions(inTypes[i])) - if err != nil { - return nil, err - } - defer newArgs[i].Release() - } - } - if newArgs != nil { - args = newArgs - } - - kctx := &exec.KernelCtx{Ctx: ctx, Kernel: k} - init := k.GetInitFn() - kinitArgs := exec.KernelInitArgs{Kernel: k, Inputs: inTypes, Options: opts} - if init != nil { - kctx.State, err = init(kctx, kinitArgs) - if err != nil { - return - } - } - - if err = executor.Init(kctx, kinitArgs); err != nil { - return - } - - input := ExecBatch{Values: args, Len: 0} - if input.NumValues() == 0 { - if passedLen != -1 { - input.Len = passedLen - } - } else { - inferred, allSame := inferBatchLength(input.Values) - input.Len = inferred - switch fn.Kind() { - case FuncScalar: - if passedLen != -1 && passedLen != inferred { - return nil, fmt.Errorf("%w: passed batch length for execution did not match actual length for scalar fn execution", - arrow.ErrInvalid) - } - case FuncVector: - vkernel := k.(*exec.VectorKernel) - if !(allSame || !vkernel.CanExecuteChunkWise) { - return nil, fmt.Errorf("%w: vector kernel arguments must all be the same length", arrow.ErrInvalid) - } - } - } - - ectx := GetExecCtx(ctx) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - ch := make(chan Datum, ectx.ExecChannelSize) - go func() { - defer close(ch) - if err = executor.Execute(ctx, &input, ch); err != nil { - cancel() - } - }() - - result = executor.WrapResults(ctx, ch, haveChunkedArray(input.Values)) - if err == nil { - debug.Assert(executor.CheckResultType(result) == nil, "invalid result type") - } - - if ctx.Err() == context.Canceled && result != nil { - result.Release() - } - - return -} - -// CallFunction is a one-shot invoker for all types of functions. -// -// It will perform kernel-dispatch, argument checking, iteration of -// ChunkedArray inputs and wrapping of outputs. -// -// To affect the execution options, you must call SetExecCtx and pass -// the resulting context in here. -func CallFunction(ctx context.Context, funcName string, opts FunctionOptions, args ...Datum) (Datum, error) { - ectx := GetExecCtx(ctx) - fn, ok := ectx.Registry.GetFunction(funcName) - if !ok { - return nil, fmt.Errorf("%w: function '%s' not found", arrow.ErrKey, funcName) - } - - return fn.Execute(ctx, opts, args...) -} diff --git a/go/arrow/compute/exec/hash_util.go b/go/arrow/compute/exec/hash_util.go deleted file mode 100644 index 0c8f7df5a3237..0000000000000 --- a/go/arrow/compute/exec/hash_util.go +++ /dev/null @@ -1,24 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package exec - -// ADAPTED FROM HASH UTILITIES FOR BOOST - -func HashCombine(seed, value uint64) uint64 { - seed ^= value + 0x9e3779b9 + (seed << 6) + (seed >> 2) - return seed -} diff --git a/go/arrow/compute/exec/kernel.go b/go/arrow/compute/exec/kernel.go deleted file mode 100644 index 600e52c681686..0000000000000 --- a/go/arrow/compute/exec/kernel.go +++ /dev/null @@ -1,695 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package exec - -import ( - "context" - "fmt" - "hash/maphash" - "strings" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "golang.org/x/exp/slices" -) - -var hashSeed = maphash.MakeSeed() - -type ctxAllocKey struct{} - -// WithAllocator returns a new context with the provided allocator -// embedded into the context. -func WithAllocator(ctx context.Context, mem memory.Allocator) context.Context { - return context.WithValue(ctx, ctxAllocKey{}, mem) -} - -// GetAllocator retrieves the allocator from the context, or returns -// memory.DefaultAllocator if there was no allocator in the provided -// context. -func GetAllocator(ctx context.Context) memory.Allocator { - mem, ok := ctx.Value(ctxAllocKey{}).(memory.Allocator) - if !ok { - return memory.DefaultAllocator - } - return mem -} - -// Kernel defines the minimum interface required for the basic execution -// kernel. It will grow as the implementation requires. -type Kernel interface { - GetInitFn() KernelInitFn - GetSig() *KernelSignature -} - -// NonAggKernel builds on the base Kernel interface for -// non aggregate execution kernels. Specifically this will -// represent Scalar and Vector kernels. -type NonAggKernel interface { - Kernel - Exec(*KernelCtx, *ExecSpan, *ExecResult) error - GetNullHandling() NullHandling - GetMemAlloc() MemAlloc - CanFillSlices() bool -} - -// KernelCtx is a small struct holding the context for a kernel execution -// consisting of a pointer to the kernel, initialized state (if needed) -// and the context for this execution. -type KernelCtx struct { - Ctx context.Context - Kernel Kernel - State KernelState -} - -func (k *KernelCtx) Allocate(bufsize int) *memory.Buffer { - buf := memory.NewResizableBuffer(GetAllocator(k.Ctx)) - buf.Resize(bufsize) - return buf -} - -func (k *KernelCtx) AllocateBitmap(nbits int64) *memory.Buffer { - nbytes := bitutil.BytesForBits(nbits) - return k.Allocate(int(nbytes)) -} - -// TypeMatcher define an interface for matching Input or Output types -// for execution kernels. There are multiple implementations of this -// interface provided by this package. -type TypeMatcher interface { - fmt.Stringer - Matches(typ arrow.DataType) bool - Equals(other TypeMatcher) bool -} - -type sameTypeIDMatcher struct { - accepted arrow.Type -} - -func (s sameTypeIDMatcher) Matches(typ arrow.DataType) bool { return s.accepted == typ.ID() } -func (s sameTypeIDMatcher) Equals(other TypeMatcher) bool { - if s == other { - return true - } - - o, ok := other.(*sameTypeIDMatcher) - if !ok { - return false - } - - return s.accepted == o.accepted -} - -func (s sameTypeIDMatcher) String() string { - return "Type::" + s.accepted.String() -} - -// SameTypeID returns a type matcher which will match -// any DataType that uses the same arrow.Type ID as the one -// passed in here. -func SameTypeID(id arrow.Type) TypeMatcher { return &sameTypeIDMatcher{id} } - -type timeUnitMatcher struct { - id arrow.Type - unit arrow.TimeUnit -} - -func (s timeUnitMatcher) Matches(typ arrow.DataType) bool { - if typ.ID() != s.id { - return false - } - return s.unit == typ.(arrow.TemporalWithUnit).TimeUnit() -} - -func (s timeUnitMatcher) String() string { - return strings.ToLower(s.id.String()) + "(" + s.unit.String() + ")" -} - -func (s *timeUnitMatcher) Equals(other TypeMatcher) bool { - if s == other { - return true - } - - o, ok := other.(*timeUnitMatcher) - if !ok { - return false - } - return o.id == s.id && o.unit == s.unit -} - -// TimestampTypeUnit returns a TypeMatcher that will match only -// a Timestamp datatype with the specified TimeUnit. -func TimestampTypeUnit(unit arrow.TimeUnit) TypeMatcher { - return &timeUnitMatcher{arrow.TIMESTAMP, unit} -} - -// Time32TypeUnit returns a TypeMatcher that will match only -// a Time32 datatype with the specified TimeUnit. -func Time32TypeUnit(unit arrow.TimeUnit) TypeMatcher { - return &timeUnitMatcher{arrow.TIME32, unit} -} - -// Time64TypeUnit returns a TypeMatcher that will match only -// a Time64 datatype with the specified TimeUnit. -func Time64TypeUnit(unit arrow.TimeUnit) TypeMatcher { - return &timeUnitMatcher{arrow.TIME64, unit} -} - -// DurationTypeUnit returns a TypeMatcher that will match only -// a Duration datatype with the specified TimeUnit. -func DurationTypeUnit(unit arrow.TimeUnit) TypeMatcher { - return &timeUnitMatcher{arrow.DURATION, unit} -} - -type integerMatcher struct{} - -func (integerMatcher) String() string { return "integer" } -func (integerMatcher) Matches(typ arrow.DataType) bool { return arrow.IsInteger(typ.ID()) } -func (integerMatcher) Equals(other TypeMatcher) bool { - _, ok := other.(integerMatcher) - return ok -} - -type binaryLikeMatcher struct{} - -func (binaryLikeMatcher) String() string { return "binary-like" } -func (binaryLikeMatcher) Matches(typ arrow.DataType) bool { return arrow.IsBinaryLike(typ.ID()) } -func (binaryLikeMatcher) Equals(other TypeMatcher) bool { - _, ok := other.(binaryLikeMatcher) - return ok -} - -type largeBinaryLikeMatcher struct{} - -func (largeBinaryLikeMatcher) String() string { return "large-binary-like" } -func (largeBinaryLikeMatcher) Matches(typ arrow.DataType) bool { - return arrow.IsLargeBinaryLike(typ.ID()) -} -func (largeBinaryLikeMatcher) Equals(other TypeMatcher) bool { - _, ok := other.(largeBinaryLikeMatcher) - return ok -} - -type fsbLikeMatcher struct{} - -func (fsbLikeMatcher) String() string { return "fixed-size-binary-like" } -func (fsbLikeMatcher) Matches(typ arrow.DataType) bool { return arrow.IsFixedSizeBinary(typ.ID()) } -func (fsbLikeMatcher) Equals(other TypeMatcher) bool { - _, ok := other.(fsbLikeMatcher) - return ok -} - -// Integer returns a TypeMatcher which will match any integral type like int8 or uint16 -func Integer() TypeMatcher { return integerMatcher{} } - -// BinaryLike returns a TypeMatcher that will match Binary or String -func BinaryLike() TypeMatcher { return binaryLikeMatcher{} } - -// LargeBinaryLike returns a TypeMatcher which will match LargeBinary or LargeString -func LargeBinaryLike() TypeMatcher { return largeBinaryLikeMatcher{} } - -// FixedSizeBinaryLike returns a TypeMatcher that will match FixedSizeBinary -// or Decimal128/256 -func FixedSizeBinaryLike() TypeMatcher { return fsbLikeMatcher{} } - -type primitiveMatcher struct{} - -func (primitiveMatcher) String() string { return "primitive" } -func (primitiveMatcher) Matches(typ arrow.DataType) bool { return arrow.IsPrimitive(typ.ID()) } -func (primitiveMatcher) Equals(other TypeMatcher) bool { - _, ok := other.(primitiveMatcher) - return ok -} - -// Primitive returns a TypeMatcher that will match any type that arrow.IsPrimitive -// returns true for. -func Primitive() TypeMatcher { return primitiveMatcher{} } - -type reeMatcher struct { - runEndsMatcher TypeMatcher - encodedMatcher TypeMatcher -} - -func (r reeMatcher) Matches(typ arrow.DataType) bool { - if typ.ID() != arrow.RUN_END_ENCODED { - return false - } - - dt := typ.(*arrow.RunEndEncodedType) - return r.runEndsMatcher.Matches(dt.RunEnds()) && r.encodedMatcher.Matches(dt.Encoded()) -} - -func (r reeMatcher) Equals(other TypeMatcher) bool { - o, ok := other.(reeMatcher) - if !ok { - return false - } - return r.runEndsMatcher.Equals(o.runEndsMatcher) && r.encodedMatcher.Equals(o.encodedMatcher) -} - -func (r reeMatcher) String() string { - return "run_end_encoded(run_ends=" + r.runEndsMatcher.String() + ", values=" + r.encodedMatcher.String() + ")" -} - -// RunEndEncoded returns a matcher which matches a RunEndEncoded -// type whose encoded type is matched by the passed in matcher. -func RunEndEncoded(runEndsMatcher, encodedMatcher TypeMatcher) TypeMatcher { - return reeMatcher{ - runEndsMatcher: runEndsMatcher, - encodedMatcher: encodedMatcher} -} - -// InputKind is an enum representing the type of Input matching -// that will be done. Either accepting any type, an exact specific type -// or using a TypeMatcher. -type InputKind int8 - -const ( - InputAny InputKind = iota - InputExact - InputUseMatcher -) - -// InputType is used for type checking arguments passed to a kernel -// and stored within a KernelSignature. The type-checking rule can -// be supplied either with an exact DataType instance or a custom -// TypeMatcher. -type InputType struct { - Kind InputKind - Type arrow.DataType - Matcher TypeMatcher -} - -func NewExactInput(dt arrow.DataType) InputType { return InputType{Kind: InputExact, Type: dt} } -func NewMatchedInput(match TypeMatcher) InputType { - return InputType{Kind: InputUseMatcher, Matcher: match} -} -func NewIDInput(id arrow.Type) InputType { return NewMatchedInput(SameTypeID(id)) } - -func (it InputType) MatchID() arrow.Type { - switch it.Kind { - case InputExact: - return it.Type.ID() - case InputUseMatcher: - if idMatch, ok := it.Matcher.(*sameTypeIDMatcher); ok { - return idMatch.accepted - } - } - debug.Assert(false, "MatchID called on non-id matching InputType") - return -1 -} - -func (it InputType) String() string { - switch it.Kind { - case InputAny: - return "any" - case InputUseMatcher: - return it.Matcher.String() - case InputExact: - return it.Type.String() - } - return "" -} - -func (it *InputType) Equals(other *InputType) bool { - if it == other { - return true - } - - if it.Kind != other.Kind { - return false - } - - switch it.Kind { - case InputAny: - return true - case InputExact: - return arrow.TypeEqual(it.Type, other.Type) - case InputUseMatcher: - return it.Matcher.Equals(other.Matcher) - default: - return false - } -} - -func (it InputType) Hash() uint64 { - var h maphash.Hash - - h.SetSeed(hashSeed) - result := HashCombine(h.Sum64(), uint64(it.Kind)) - switch it.Kind { - case InputExact: - result = HashCombine(result, arrow.HashType(hashSeed, it.Type)) - } - return result -} - -func (it InputType) Matches(dt arrow.DataType) bool { - switch it.Kind { - case InputExact: - return arrow.TypeEqual(it.Type, dt) - case InputUseMatcher: - return it.Matcher.Matches(dt) - case InputAny: - return true - default: - debug.Assert(false, "invalid InputKind") - return true - } -} - -// ResolveKind defines the way that a particular OutputType resolves -// its type. Either it has a fixed type to resolve to or it contains -// a Resolver which will compute the resolved type based on -// the input types. -type ResolveKind int8 - -const ( - ResolveFixed ResolveKind = iota - ResolveComputed -) - -// TypeResolver is simply a function that takes a KernelCtx and a list of input types -// and returns the resolved type or an error. -type TypeResolver = func(*KernelCtx, []arrow.DataType) (arrow.DataType, error) - -type OutputType struct { - Kind ResolveKind - Type arrow.DataType - Resolver TypeResolver -} - -func NewOutputType(dt arrow.DataType) OutputType { - return OutputType{Kind: ResolveFixed, Type: dt} -} - -func NewComputedOutputType(resolver TypeResolver) OutputType { - return OutputType{Kind: ResolveComputed, Resolver: resolver} -} - -func (o OutputType) String() string { - if o.Kind == ResolveFixed { - return o.Type.String() - } - return "computed" -} - -func (o OutputType) Resolve(ctx *KernelCtx, types []arrow.DataType) (arrow.DataType, error) { - switch o.Kind { - case ResolveFixed: - return o.Type, nil - } - - return o.Resolver(ctx, types) -} - -// NullHandling is an enum representing how a particular Kernel -// wants the executor to handle nulls. -type NullHandling int8 - -const ( - // Compute the output validity bitmap by intersection the validity - // bitmaps of the arguments using bitwise-and operations. This means - // that values in the output are valid/non-null only if the corresponding - // values in all input arguments were valid/non-null. Kernels generally - // do not have to touch the bitmap afterwards, but a kernel's exec function - // is permitted to alter the bitmap after the null intersection is computed - // if necessary. - NullIntersection NullHandling = iota - // Kernel expects a pre-allocated buffer to write the result bitmap - // into. - NullComputedPrealloc - // Kernel will allocate and set the validity bitmap of the output - NullComputedNoPrealloc - // kernel output is never null and a validity bitmap doesn't need to - // be allocated - NullNoOutput -) - -// MemAlloc is the preference for preallocating memory of fixed-width -// type outputs during kernel execution. -type MemAlloc int8 - -const ( - // For data types that support pre-allocation (fixed-width), the - // kernel expects to be provided a pre-allocated buffer to write into. - // Non-fixed-width types must always allocate their own buffers. - // The allocation is made for the same length as the execution batch, - // so vector kernels yielding differently sized outputs should not - // use this. - // - // It is valid for the data to not be preallocated but the validity - // bitmap is (or is computed using intersection). - // - // For variable-size output types like Binary or String, or for nested - // types, this option has no effect. - MemPrealloc MemAlloc = iota - // The kernel is responsible for allocating its own data buffer - // for fixed-width output types. - MemNoPrealloc -) - -type KernelState any - -// KernelInitArgs are the arguments required to initialize an Kernel's -// state using the input types and any options. -type KernelInitArgs struct { - Kernel Kernel - Inputs []arrow.DataType - // Options are opaque and specific to the Kernel being initialized, - // may be nil if the kernel doesn't require options. - Options any -} - -// KernelInitFn is any function that receives a KernelCtx and initialization -// arguments and returns the initialized state or an error. -type KernelInitFn = func(*KernelCtx, KernelInitArgs) (KernelState, error) - -// KernelSignature holds the input and output types for a kernel. -// -// Variable argument functions with a minimum of N arguments should pass -// up to N input types to be used to validate for invocation. The first -// N-1 types will be matched against the first N-1 arguments and the last -// type will be matched against the remaining arguments. -type KernelSignature struct { - InputTypes []InputType - OutType OutputType - IsVarArgs bool - - // store the hashcode after it is computed so we don't - // need to recompute it - hashCode uint64 -} - -func (k KernelSignature) String() string { - var b strings.Builder - if k.IsVarArgs { - b.WriteString("varargs[") - } else { - b.WriteByte('(') - } - - for i, t := range k.InputTypes { - if i != 0 { - b.WriteString(", ") - } - b.WriteString(t.String()) - } - if k.IsVarArgs { - b.WriteString("*]") - } else { - b.WriteByte(')') - } - - b.WriteString(" -> ") - b.WriteString(k.OutType.String()) - return b.String() -} - -func (k KernelSignature) Equals(other KernelSignature) bool { - if k.IsVarArgs != other.IsVarArgs { - return false - } - - return slices.EqualFunc(k.InputTypes, other.InputTypes, func(e1, e2 InputType) bool { - return e1.Equals(&e2) - }) -} - -func (k *KernelSignature) Hash() uint64 { - if k.hashCode != 0 { - return k.hashCode - } - - var h maphash.Hash - h.SetSeed(hashSeed) - result := h.Sum64() - for _, typ := range k.InputTypes { - result = HashCombine(result, typ.Hash()) - } - k.hashCode = result - return result -} - -func (k KernelSignature) MatchesInputs(types []arrow.DataType) bool { - switch k.IsVarArgs { - case true: - // check that it has enough to match at least the non-vararg types - if len(types) < (len(k.InputTypes) - 1) { - return false - } - - for i, t := range types { - if !k.InputTypes[Min(i, len(k.InputTypes)-1)].Matches(t) { - return false - } - } - case false: - if len(types) != len(k.InputTypes) { - return false - } - for i, t := range types { - if !k.InputTypes[i].Matches(t) { - return false - } - } - } - return true -} - -// ArrayKernelExec is an alias definition for a kernel's execution function. -// -// This is used for both stateless and stateful kernels. If a kernel -// depends on some execution state, it can be accessed from the KernelCtx -// object, which also contains the context.Context object which can be -// used for shortcircuiting by checking context.Done / context.Err. -// This allows kernels to control handling timeouts or cancellation of -// computation. -type ArrayKernelExec = func(*KernelCtx, *ExecSpan, *ExecResult) error - -type kernel struct { - Init KernelInitFn - Signature *KernelSignature - Data KernelState - Parallelizable bool -} - -func (k kernel) GetInitFn() KernelInitFn { return k.Init } -func (k kernel) GetSig() *KernelSignature { return k.Signature } - -// A ScalarKernel is the kernel implementation for a Scalar Function. -// In addition to the members found in the base Kernel, it contains -// the null handling and memory pre-allocation preferences. -type ScalarKernel struct { - kernel - - ExecFn ArrayKernelExec - CanWriteIntoSlices bool - NullHandling NullHandling - MemAlloc MemAlloc -} - -// NewScalarKernel constructs a new kernel for scalar execution, constructing -// a KernelSignature with the provided input types and output type, and using -// the passed in execution implementation and initialization function. -func NewScalarKernel(in []InputType, out OutputType, exec ArrayKernelExec, init KernelInitFn) ScalarKernel { - return NewScalarKernelWithSig(&KernelSignature{ - InputTypes: in, - OutType: out, - }, exec, init) -} - -// NewScalarKernelWithSig is a convenience when you already have a signature -// to use for constructing a kernel. It's equivalent to passing the components -// of the signature (input and output types) to NewScalarKernel. -func NewScalarKernelWithSig(sig *KernelSignature, exec ArrayKernelExec, init KernelInitFn) ScalarKernel { - return ScalarKernel{ - kernel: kernel{Signature: sig, Init: init, Parallelizable: true}, - ExecFn: exec, - CanWriteIntoSlices: true, - NullHandling: NullIntersection, - MemAlloc: MemPrealloc, - } -} - -func (s *ScalarKernel) Exec(ctx *KernelCtx, sp *ExecSpan, out *ExecResult) error { - return s.ExecFn(ctx, sp, out) -} - -func (s ScalarKernel) GetNullHandling() NullHandling { return s.NullHandling } -func (s ScalarKernel) GetMemAlloc() MemAlloc { return s.MemAlloc } -func (s ScalarKernel) CanFillSlices() bool { return s.CanWriteIntoSlices } - -// ChunkedExec is the signature for executing a stateful vector kernel -// against a ChunkedArray input. It is optional -type ChunkedExec func(*KernelCtx, []*arrow.Chunked, *ExecResult) ([]*ExecResult, error) - -// FinalizeFunc is an optional finalizer function for any postprocessing -// that may need to be done on data before returning it -type FinalizeFunc func(*KernelCtx, []*ArraySpan) ([]*ArraySpan, error) - -// VectorKernel is a structure for implementations of vector functions. -// It can optionally contain a finalizer function, the null handling -// and memory pre-allocation preferences (different defaults from -// scalar kernels when using NewVectorKernel), and other execution related -// options. -type VectorKernel struct { - kernel - - ExecFn ArrayKernelExec - ExecChunked ChunkedExec - Finalize FinalizeFunc - NullHandling NullHandling - MemAlloc MemAlloc - CanWriteIntoSlices bool - CanExecuteChunkWise bool - OutputChunked bool -} - -// NewVectorKernel constructs a new kernel for execution of vector functions, -// which take into account more than just the individual scalar values -// of its input. Output of a vector kernel may be a different length -// than its inputs. -func NewVectorKernel(inTypes []InputType, outType OutputType, exec ArrayKernelExec, init KernelInitFn) VectorKernel { - return NewVectorKernelWithSig(&KernelSignature{ - InputTypes: inTypes, OutType: outType}, exec, init) -} - -// NewVectorKernelWithSig is a convenience function for creating a kernel -// when you already have a signature constructed. -func NewVectorKernelWithSig(sig *KernelSignature, exec ArrayKernelExec, init KernelInitFn) VectorKernel { - return VectorKernel{ - kernel: kernel{Signature: sig, Init: init, Parallelizable: true}, - ExecFn: exec, - CanWriteIntoSlices: true, - CanExecuteChunkWise: true, - OutputChunked: true, - NullHandling: NullComputedNoPrealloc, - MemAlloc: MemNoPrealloc, - } -} - -func (s *VectorKernel) Exec(ctx *KernelCtx, sp *ExecSpan, out *ExecResult) error { - return s.ExecFn(ctx, sp, out) -} - -func (s VectorKernel) GetNullHandling() NullHandling { return s.NullHandling } -func (s VectorKernel) GetMemAlloc() MemAlloc { return s.MemAlloc } -func (s VectorKernel) CanFillSlices() bool { return s.CanWriteIntoSlices } diff --git a/go/arrow/compute/exec/kernel_test.go b/go/arrow/compute/exec/kernel_test.go deleted file mode 100644 index 248bad323a307..0000000000000 --- a/go/arrow/compute/exec/kernel_test.go +++ /dev/null @@ -1,588 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package exec_test - -import ( - "fmt" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/compute" - "github.com/apache/arrow/go/v18/arrow/compute/exec" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/arrow/scalar" - "github.com/stretchr/testify/assert" -) - -func TestTypeMatcherSameTypeID(t *testing.T) { - matcher := exec.SameTypeID(arrow.DECIMAL128) - assert.True(t, matcher.Matches(&arrow.Decimal128Type{Precision: 12, Scale: 2})) - assert.False(t, matcher.Matches(arrow.PrimitiveTypes.Int8)) - - assert.Equal(t, "Type::DECIMAL128", matcher.String()) - - assert.True(t, matcher.Equals(matcher)) - assert.True(t, matcher.Equals(exec.SameTypeID(arrow.DECIMAL))) - assert.False(t, matcher.Equals(exec.SameTypeID(arrow.TIMESTAMP))) - assert.False(t, matcher.Equals(exec.Time32TypeUnit(arrow.Microsecond))) -} - -func TestTypeMatcherTimestampTypeUnit(t *testing.T) { - matcher := exec.TimestampTypeUnit(arrow.Millisecond) - matcher2 := exec.Time32TypeUnit(arrow.Millisecond) - matcher3 := exec.Time64TypeUnit(arrow.Microsecond) - matcher4 := exec.DurationTypeUnit(arrow.Microsecond) - - assert.True(t, matcher.Matches(arrow.FixedWidthTypes.Timestamp_ms)) - assert.True(t, matcher.Matches(&arrow.TimestampType{Unit: arrow.Millisecond, TimeZone: "utc"})) - assert.False(t, matcher.Matches(arrow.FixedWidthTypes.Timestamp_s)) - assert.False(t, matcher.Matches(arrow.FixedWidthTypes.Time32ms)) - assert.True(t, matcher2.Matches(arrow.FixedWidthTypes.Time32ms)) - - assert.True(t, matcher3.Matches(arrow.FixedWidthTypes.Time64us)) - assert.False(t, matcher3.Matches(arrow.FixedWidthTypes.Time64ns)) - assert.True(t, matcher4.Matches(arrow.FixedWidthTypes.Duration_us)) - assert.False(t, matcher4.Matches(arrow.FixedWidthTypes.Duration_ms)) - - // check String() representation - assert.Equal(t, "timestamp(s)", exec.TimestampTypeUnit(arrow.Second).String()) - assert.Equal(t, "timestamp(ms)", exec.TimestampTypeUnit(arrow.Millisecond).String()) - assert.Equal(t, "timestamp(us)", exec.TimestampTypeUnit(arrow.Microsecond).String()) - assert.Equal(t, "timestamp(ns)", exec.TimestampTypeUnit(arrow.Nanosecond).String()) - - // equals implementation - assert.True(t, matcher.Equals(matcher)) - assert.True(t, matcher.Equals(exec.TimestampTypeUnit(arrow.Millisecond))) - assert.False(t, matcher.Equals(exec.TimestampTypeUnit(arrow.Microsecond))) - assert.False(t, matcher.Equals(exec.Time32TypeUnit(arrow.Millisecond))) - assert.False(t, matcher3.Equals(matcher2)) - assert.False(t, matcher4.Equals(matcher3)) - assert.True(t, matcher4.Equals(exec.DurationTypeUnit(arrow.Microsecond))) - assert.False(t, matcher.Equals(exec.SameTypeID(arrow.TIMESTAMP))) -} - -func TestIntegerMatcher(t *testing.T) { - match := exec.Integer() - - assert.Equal(t, "integer", match.String()) - assert.True(t, match.Matches(arrow.PrimitiveTypes.Int8)) - assert.True(t, match.Matches(arrow.PrimitiveTypes.Uint64)) - assert.True(t, match.Equals(exec.Integer())) - assert.False(t, match.Equals(exec.BinaryLike())) -} - -func TestBinaryLikeMatcher(t *testing.T) { - match := exec.BinaryLike() - - assert.Equal(t, "binary-like", match.String()) - assert.True(t, match.Matches(arrow.BinaryTypes.String)) - assert.True(t, match.Matches(arrow.BinaryTypes.Binary)) - assert.False(t, match.Matches(arrow.BinaryTypes.LargeString)) - assert.False(t, match.Matches(arrow.BinaryTypes.LargeBinary)) - assert.False(t, match.Equals(exec.LargeBinaryLike())) - assert.True(t, match.Equals(exec.BinaryLike())) -} - -func TestLargeBinaryLikeMatcher(t *testing.T) { - match := exec.LargeBinaryLike() - - assert.Equal(t, "large-binary-like", match.String()) - assert.False(t, match.Matches(arrow.BinaryTypes.String)) - assert.False(t, match.Matches(arrow.BinaryTypes.Binary)) - assert.True(t, match.Matches(arrow.BinaryTypes.LargeString)) - assert.True(t, match.Matches(arrow.BinaryTypes.LargeBinary)) - assert.True(t, match.Equals(exec.LargeBinaryLike())) - assert.False(t, match.Equals(exec.BinaryLike())) -} - -func TestFixedSizeBinaryMatcher(t *testing.T) { - match := exec.FixedSizeBinaryLike() - - assert.Equal(t, "fixed-size-binary-like", match.String()) - assert.False(t, match.Matches(arrow.BinaryTypes.String)) - assert.True(t, match.Matches(&arrow.Decimal128Type{Precision: 12, Scale: 5})) - assert.True(t, match.Matches(&arrow.Decimal256Type{Precision: 12, Scale: 10})) - assert.True(t, match.Matches(&arrow.FixedSizeBinaryType{})) - assert.False(t, match.Equals(exec.LargeBinaryLike())) - assert.True(t, match.Equals(exec.FixedSizeBinaryLike())) -} - -func TestPrimitiveMatcher(t *testing.T) { - match := exec.Primitive() - - assert.Equal(t, "primitive", match.String()) - assert.True(t, match.Equals(exec.Primitive())) - - types := []arrow.DataType{ - arrow.FixedWidthTypes.Boolean, - arrow.PrimitiveTypes.Uint8, - arrow.PrimitiveTypes.Int8, - arrow.PrimitiveTypes.Uint16, - arrow.PrimitiveTypes.Int16, - arrow.PrimitiveTypes.Uint32, - arrow.PrimitiveTypes.Int32, - arrow.PrimitiveTypes.Uint64, - arrow.PrimitiveTypes.Int64, - arrow.FixedWidthTypes.Float16, - arrow.PrimitiveTypes.Float32, - arrow.PrimitiveTypes.Float64, - arrow.FixedWidthTypes.Date32, - arrow.FixedWidthTypes.Date64, - arrow.FixedWidthTypes.Time32ms, - arrow.FixedWidthTypes.Time64ns, - arrow.FixedWidthTypes.Timestamp_ms, - arrow.FixedWidthTypes.Duration_ms, - arrow.FixedWidthTypes.MonthInterval, - arrow.FixedWidthTypes.DayTimeInterval, - arrow.FixedWidthTypes.MonthDayNanoInterval, - } - - for _, typ := range types { - assert.True(t, match.Matches(typ)) - } - - assert.False(t, match.Matches(arrow.Null)) -} - -func TestREEMatcher(t *testing.T) { - tests := []struct { - runEnds exec.TypeMatcher - enc exec.TypeMatcher - matchRunEnds arrow.DataType - nomatchRunEnds arrow.DataType - matchEnc arrow.DataType - nomatchEnc arrow.DataType - }{ - {exec.Integer(), exec.Integer(), arrow.PrimitiveTypes.Int16, arrow.FixedWidthTypes.Float16, arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String}, - {exec.SameTypeID(arrow.INT32), exec.BinaryLike(), arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int64, arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32}, - {exec.SameTypeID(arrow.INT64), exec.SameTypeID(arrow.STRUCT), arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Int32, arrow.StructOf(arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int16}), arrow.PrimitiveTypes.Int8}, - } - - for _, tt := range tests { - t.Run(tt.enc.String(), func(t *testing.T) { - matcher := exec.RunEndEncoded(tt.runEnds, tt.enc) - assert.False(t, matcher.Matches(tt.matchEnc)) - assert.True(t, matcher.Matches(arrow.RunEndEncodedOf(tt.matchRunEnds, tt.matchEnc))) - assert.False(t, matcher.Matches(arrow.RunEndEncodedOf(tt.matchRunEnds, tt.nomatchEnc))) - assert.False(t, matcher.Matches(arrow.RunEndEncodedOf(tt.nomatchRunEnds, tt.matchEnc))) - assert.False(t, matcher.Matches(arrow.RunEndEncodedOf(tt.nomatchRunEnds, tt.nomatchEnc))) - - assert.Equal(t, "run_end_encoded(run_ends="+tt.runEnds.String()+", values="+tt.enc.String()+")", matcher.String()) - - assert.True(t, matcher.Equals(exec.RunEndEncoded(tt.runEnds, tt.enc))) - assert.False(t, matcher.Equals(exec.Primitive())) - assert.False(t, matcher.Equals(exec.RunEndEncoded(exec.SameTypeID(tt.nomatchRunEnds.ID()), exec.SameTypeID(tt.nomatchEnc.ID())))) - assert.False(t, matcher.Equals(exec.RunEndEncoded(exec.SameTypeID(tt.matchRunEnds.ID()), exec.SameTypeID(tt.nomatchEnc.ID())))) - assert.False(t, matcher.Equals(exec.RunEndEncoded(exec.SameTypeID(tt.nomatchRunEnds.ID()), exec.SameTypeID(tt.matchEnc.ID())))) - }) - } -} - -func TestInputTypeAnyType(t *testing.T) { - var ty exec.InputType - assert.Equal(t, exec.InputAny, ty.Kind) -} - -func TestInputType(t *testing.T) { - ty1 := exec.NewExactInput(arrow.PrimitiveTypes.Int8) - assert.Equal(t, exec.InputExact, ty1.Kind) - assert.True(t, arrow.TypeEqual(arrow.PrimitiveTypes.Int8, ty1.Type)) - assert.Equal(t, "int8", ty1.String()) - - ty2 := exec.NewIDInput(arrow.DECIMAL) - assert.Equal(t, exec.InputUseMatcher, ty2.Kind) - assert.Equal(t, "Type::DECIMAL128", ty2.String()) - assert.True(t, ty2.Matcher.Matches(&arrow.Decimal128Type{Precision: 12, Scale: 2})) - assert.False(t, ty2.Matcher.Matches(arrow.PrimitiveTypes.Int16)) - - ty3 := exec.NewMatchedInput(exec.TimestampTypeUnit(arrow.Microsecond)) - assert.Equal(t, "timestamp(us)", ty3.String()) - - var ty4 exec.InputType - assert.Equal(t, "any", ty4.String()) - // InputAny matches anything - assert.True(t, ty4.Matches((arrow.DataType)(nil))) -} - -func TestInputTypeEquals(t *testing.T) { - t1 := exec.NewExactInput(arrow.PrimitiveTypes.Int8) - t2 := exec.NewExactInput(arrow.PrimitiveTypes.Int8) - t3 := exec.NewExactInput(arrow.PrimitiveTypes.Int32) - - t5 := exec.NewIDInput(arrow.DECIMAL) - t6 := exec.NewIDInput(arrow.DECIMAL) - - assert.True(t, t1.Equals(&t2)) - assert.False(t, t1.Equals(&t3)) - assert.False(t, t1.Equals(&t5)) - assert.True(t, t5.Equals(&t5)) - assert.True(t, t5.Equals(&t6)) - - var ty exec.InputType - assert.True(t, ty.Equals(&exec.InputType{Kind: exec.InputAny})) - - // for now, an ID matcher for arrow.INT32 and a ExactInput for - // arrow.PrimitiveTypes.Int32 are treated as being different. - // this could be made equivalent later if desireable - - // check that field metadata is excluded from equality checks - t7 := exec.NewExactInput(arrow.ListOfField( - arrow.Field{Name: "item", Type: arrow.BinaryTypes.String, - Nullable: true, Metadata: arrow.NewMetadata([]string{"foo"}, []string{"bar"})})) - t8 := exec.NewExactInput(arrow.ListOf(arrow.BinaryTypes.String)) - assert.True(t, t7.Equals(&t8)) -} - -func TestInputTypeHash(t *testing.T) { - var ( - t0 exec.InputType - t1 = exec.NewExactInput(arrow.PrimitiveTypes.Int8) - t2 = exec.NewIDInput(arrow.DECIMAL) - ) - - // these checks try to determine first of all whether hash - // always returns the same value, and whether the elements - // of the type are all incorporated into the hash - assert.Equal(t, t0.Hash(), t0.Hash()) - assert.Equal(t, t1.Hash(), t1.Hash()) - assert.Equal(t, t2.Hash(), t2.Hash()) - assert.NotEqual(t, t0.Hash(), t1.Hash()) - assert.NotEqual(t, t0.Hash(), t2.Hash()) - assert.NotEqual(t, t1.Hash(), t2.Hash()) -} - -func TestInputTypeMatches(t *testing.T) { - in1 := exec.NewExactInput(arrow.PrimitiveTypes.Int8) - - assert.True(t, in1.Matches(arrow.PrimitiveTypes.Int8)) - assert.False(t, in1.Matches(arrow.PrimitiveTypes.Int16)) - - in2 := exec.NewIDInput(arrow.DECIMAL) - assert.True(t, in2.Matches(&arrow.Decimal128Type{Precision: 12, Scale: 2})) - - ty2 := &arrow.Decimal128Type{Precision: 12, Scale: 2} - ty3 := arrow.PrimitiveTypes.Float64 - - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - arr2 := array.MakeArrayOfNull(mem, ty2, 1) - arr3 := array.MakeArrayOfNull(mem, ty3, 1) - defer arr2.Release() - defer arr3.Release() - - scalar2, err := scalar.GetScalar(arr2, 0) - assert.NoError(t, err) - - datumArr := compute.NewDatum(arr2) - defer datumArr.Release() - datumScalar := compute.NewDatum(scalar2) - defer datumScalar.Release() - - assert.False(t, in2.Matches(ty3)) - assert.False(t, in2.Matches(arr3.DataType())) -} - -func TestOutputType(t *testing.T) { - ty1 := exec.NewOutputType(arrow.PrimitiveTypes.Int8) - assert.Equal(t, exec.ResolveFixed, ty1.Kind) - assert.True(t, arrow.TypeEqual(arrow.PrimitiveTypes.Int8, ty1.Type)) - - dummyResolver := func(_ *exec.KernelCtx, args []arrow.DataType) (arrow.DataType, error) { - return arrow.PrimitiveTypes.Int32, nil - } - - ty2 := exec.NewComputedOutputType(dummyResolver) - assert.Equal(t, exec.ResolveComputed, ty2.Kind) - - outType2, err := ty2.Resolve(nil, nil) - assert.NoError(t, err) - assert.Same(t, arrow.PrimitiveTypes.Int32, outType2) - - ty3 := ty1 - assert.Equal(t, exec.ResolveFixed, ty3.Kind) - assert.True(t, arrow.TypeEqual(ty1.Type, ty3.Type)) - - ty4 := ty2 - assert.Equal(t, exec.ResolveComputed, ty4.Kind) - outType4, err := ty4.Resolve(nil, nil) - assert.NoError(t, err) - assert.Same(t, arrow.PrimitiveTypes.Int32, outType4) - - assert.Equal(t, "int8", ty3.String()) - assert.Equal(t, "computed", ty4.String()) -} - -func TestOutputTypeResolve(t *testing.T) { - ty1 := exec.NewOutputType(arrow.PrimitiveTypes.Int32) - - result, err := ty1.Resolve(nil, nil) - assert.NoError(t, err) - assert.Same(t, arrow.PrimitiveTypes.Int32, result) - - result, err = ty1.Resolve(nil, []arrow.DataType{arrow.PrimitiveTypes.Int8}) - assert.NoError(t, err) - assert.Same(t, arrow.PrimitiveTypes.Int32, result) - - result, err = ty1.Resolve(nil, []arrow.DataType{arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Int8}) - assert.NoError(t, err) - assert.Same(t, arrow.PrimitiveTypes.Int32, result) - - resolver := func(_ *exec.KernelCtx, args []arrow.DataType) (arrow.DataType, error) { - return args[0], nil - } - ty2 := exec.NewComputedOutputType(resolver) - - result, err = ty2.Resolve(nil, []arrow.DataType{arrow.BinaryTypes.String}) - assert.NoError(t, err) - assert.Same(t, arrow.BinaryTypes.String, result) - - // type resolver that returns an error - ty3 := exec.NewComputedOutputType(func(_ *exec.KernelCtx, dt []arrow.DataType) (arrow.DataType, error) { - // checking the value types versus the function arity should be validated - // elsewhere. this is just for illustration purposes - if len(dt) == 0 { - return nil, fmt.Errorf("%w: need at least one argument", arrow.ErrInvalid) - } - return dt[0], nil - }) - - _, err = ty3.Resolve(nil, []arrow.DataType{}) - assert.ErrorIs(t, err, arrow.ErrInvalid) - - // resolver returns a fixed value - ty4 := exec.NewComputedOutputType(func(*exec.KernelCtx, []arrow.DataType) (arrow.DataType, error) { - return arrow.PrimitiveTypes.Int32, nil - }) - result, err = ty4.Resolve(nil, []arrow.DataType{arrow.PrimitiveTypes.Int8}) - assert.NoError(t, err) - assert.Same(t, arrow.PrimitiveTypes.Int32, result) - result, err = ty4.Resolve(nil, []arrow.DataType{}) - assert.NoError(t, err) - assert.Same(t, arrow.PrimitiveTypes.Int32, result) -} - -func TestKernelSignatureEquals(t *testing.T) { - sig1 := exec.KernelSignature{ - InputTypes: []exec.InputType{}, - OutType: exec.NewOutputType(arrow.BinaryTypes.String)} - sig1Copy := exec.KernelSignature{ - InputTypes: []exec.InputType{}, - OutType: exec.NewOutputType(arrow.BinaryTypes.String)} - sig2 := exec.KernelSignature{ - InputTypes: []exec.InputType{ - exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - } - - // output type doesn't matter (for now) - sig3 := exec.KernelSignature{ - InputTypes: []exec.InputType{ - exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, - OutType: exec.NewOutputType(arrow.PrimitiveTypes.Int32), - } - - sig4 := exec.KernelSignature{ - InputTypes: []exec.InputType{ - exec.NewExactInput(arrow.PrimitiveTypes.Int8), - exec.NewExactInput(arrow.PrimitiveTypes.Int16), - }, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - } - sig4Copy := exec.KernelSignature{ - InputTypes: []exec.InputType{ - exec.NewExactInput(arrow.PrimitiveTypes.Int8), - exec.NewExactInput(arrow.PrimitiveTypes.Int16), - }, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - } - sig5 := exec.KernelSignature{ - InputTypes: []exec.InputType{ - exec.NewExactInput(arrow.PrimitiveTypes.Int8), - exec.NewExactInput(arrow.PrimitiveTypes.Int16), - exec.NewExactInput(arrow.PrimitiveTypes.Int32), - }, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - } - - assert.True(t, sig1.Equals(sig1)) - assert.True(t, sig2.Equals(sig3)) - assert.False(t, sig3.Equals(sig4)) - - // different sig objects but same sig - assert.True(t, sig1.Equals(sig1Copy)) - assert.True(t, sig4.Equals(sig4Copy)) - - // match first 2 args, but not third - assert.False(t, sig4.Equals(sig5)) -} - -func TestKernelSignatureVarArgsEqual(t *testing.T) { - sig1 := exec.KernelSignature{ - InputTypes: []exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - IsVarArgs: true, - } - sig2 := exec.KernelSignature{ - InputTypes: []exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - IsVarArgs: true, - } - sig3 := exec.KernelSignature{ - InputTypes: []exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - } - - assert.True(t, sig1.Equals(sig2)) - assert.False(t, sig2.Equals(sig3)) -} - -func TestKernelSignatureHash(t *testing.T) { - sig1 := exec.KernelSignature{ - InputTypes: []exec.InputType{}, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - } - sig2 := exec.KernelSignature{ - InputTypes: []exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - } - sig3 := exec.KernelSignature{ - InputTypes: []exec.InputType{ - exec.NewExactInput(arrow.PrimitiveTypes.Int8), - exec.NewExactInput(arrow.PrimitiveTypes.Int32)}, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - } - - assert.Equal(t, sig1.Hash(), sig1.Hash()) - assert.Equal(t, sig2.Hash(), sig2.Hash()) - assert.NotEqual(t, sig1.Hash(), sig2.Hash()) - assert.NotEqual(t, sig2.Hash(), sig3.Hash()) -} - -func TestKernelSignatureMatchesInputs(t *testing.T) { - // () -> boolean - sig1 := exec.KernelSignature{ - OutType: exec.NewOutputType(arrow.FixedWidthTypes.Boolean)} - - assert.True(t, sig1.MatchesInputs([]arrow.DataType{})) - assert.False(t, sig1.MatchesInputs([]arrow.DataType{arrow.PrimitiveTypes.Int8})) - - // (int8, decimal) -> boolean - sig2 := exec.KernelSignature{ - InputTypes: []exec.InputType{ - exec.NewExactInput(arrow.PrimitiveTypes.Int8), - exec.NewIDInput(arrow.DECIMAL)}, - OutType: exec.NewOutputType(arrow.FixedWidthTypes.Boolean), - } - assert.False(t, sig2.MatchesInputs([]arrow.DataType{})) - assert.False(t, sig2.MatchesInputs([]arrow.DataType{arrow.PrimitiveTypes.Int8})) - assert.True(t, sig2.MatchesInputs([]arrow.DataType{ - arrow.PrimitiveTypes.Int8, - &arrow.Decimal128Type{Precision: 12, Scale: 2}})) - - // (int8, int32) -> boolean - sig3 := exec.KernelSignature{ - InputTypes: []exec.InputType{ - exec.NewExactInput(arrow.PrimitiveTypes.Int8), - exec.NewExactInput(arrow.PrimitiveTypes.Int32), - }, - OutType: exec.NewOutputType(arrow.FixedWidthTypes.Boolean), - } - assert.False(t, sig3.MatchesInputs(nil)) - assert.True(t, sig3.MatchesInputs([]arrow.DataType{arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Int32})) - assert.False(t, sig3.MatchesInputs([]arrow.DataType{arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Int16})) -} - -func TestKernelSignatureVarArgsMatchesInputs(t *testing.T) { - { - sig := exec.KernelSignature{ - InputTypes: []exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - IsVarArgs: true, - } - - args := []arrow.DataType{arrow.PrimitiveTypes.Int8} - assert.True(t, sig.MatchesInputs(args)) - args = append(args, arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Int8) - assert.True(t, sig.MatchesInputs(args)) - args = append(args, arrow.PrimitiveTypes.Int32) - assert.False(t, sig.MatchesInputs(args)) - } - { - sig := exec.KernelSignature{ - InputTypes: []exec.InputType{ - exec.NewExactInput(arrow.PrimitiveTypes.Int8), - exec.NewExactInput(arrow.BinaryTypes.String), - }, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - IsVarArgs: true, - } - - args := []arrow.DataType{arrow.PrimitiveTypes.Int8} - assert.True(t, sig.MatchesInputs(args)) - args = append(args, arrow.BinaryTypes.String, arrow.BinaryTypes.String) - assert.True(t, sig.MatchesInputs(args)) - args = append(args, arrow.PrimitiveTypes.Int32) - assert.False(t, sig.MatchesInputs(args)) - } -} - -func TestKernelSignatureToString(t *testing.T) { - inTypes := []exec.InputType{ - exec.NewExactInput(arrow.PrimitiveTypes.Int8), - exec.NewIDInput(arrow.DECIMAL), - exec.NewExactInput(arrow.BinaryTypes.String), - } - - sig := exec.KernelSignature{ - InputTypes: inTypes, OutType: exec.NewOutputType(arrow.BinaryTypes.String), - } - assert.Equal(t, "(int8, Type::DECIMAL128, utf8) -> utf8", sig.String()) - - outType := exec.NewComputedOutputType(func(*exec.KernelCtx, []arrow.DataType) (arrow.DataType, error) { - return nil, arrow.ErrInvalid - }) - sig2 := exec.KernelSignature{ - InputTypes: []exec.InputType{ - exec.NewExactInput(arrow.PrimitiveTypes.Int8), - exec.NewIDInput(arrow.DECIMAL)}, - OutType: outType, - } - assert.Equal(t, "(int8, Type::DECIMAL128) -> computed", sig2.String()) -} - -func TestKernelSignatureVarArgsToString(t *testing.T) { - sig1 := exec.KernelSignature{ - InputTypes: []exec.InputType{ - exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - IsVarArgs: true, - } - assert.Equal(t, "varargs[int8*] -> utf8", sig1.String()) - - sig2 := exec.KernelSignature{ - InputTypes: []exec.InputType{ - exec.NewExactInput(arrow.BinaryTypes.String), - exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - IsVarArgs: true, - } - assert.Equal(t, "varargs[utf8, int8*] -> utf8", sig2.String()) -} diff --git a/go/arrow/compute/exec/span.go b/go/arrow/compute/exec/span.go deleted file mode 100644 index 6156acfd008aa..0000000000000 --- a/go/arrow/compute/exec/span.go +++ /dev/null @@ -1,634 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package exec - -import ( - "sync/atomic" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/arrow/scalar" -) - -// BufferSpan is a lightweight Buffer holder for ArraySpans that does not -// take ownership of the underlying memory.Buffer at all or could be -// used to reference raw byte slices instead. -type BufferSpan struct { - // Buf should be the byte slice representing this buffer, if this is - // nil then this bufferspan should be considered empty. - Buf []byte - // Owner should point to an underlying parent memory.Buffer if this - // memory is owned by a different, existing, buffer. Retain is not - // called on this buffer, so it must not be released as long as - // this BufferSpan refers to it. - Owner *memory.Buffer - // SelfAlloc tracks whether or not this bufferspan is the only owner - // of the Owning memory.Buffer. This happens when preallocating - // memory or if a kernel allocates it's own buffer for a result. - // In these cases, we have to know so we can properly maintain the - // refcount if this is later turned into an ArrayData object. - SelfAlloc bool -} - -// SetBuffer sets the given buffer into this BufferSpan and marks -// SelfAlloc as false. This should be called when setting a buffer -// that is externally owned/created. -func (b *BufferSpan) SetBuffer(buf *memory.Buffer) { - b.Buf = buf.Bytes() - b.Owner = buf - b.SelfAlloc = false -} - -// WrapBuffer wraps this bufferspan around a buffer and marks -// SelfAlloc as true. This should be called when setting a buffer -// that was allocated as part of an execution rather than just -// re-using an existing buffer from an input array. -func (b *BufferSpan) WrapBuffer(buf *memory.Buffer) { - b.Buf = buf.Bytes() - b.Owner = buf - b.SelfAlloc = true -} - -// ArraySpan is a light-weight, non-owning version of arrow.ArrayData -// for more efficient handling with computation and engines. We use -// explicit go Arrays to define the buffers and some scratch space -// for easily populating and shifting around pointers to memory without -// having to worry about and deal with retain/release during calculations. -type ArraySpan struct { - Type arrow.DataType - Len int64 - Nulls int64 - Offset int64 - Buffers [3]BufferSpan - - // Scratch is a holding spot for things such as - // offsets or union type codes when converting from scalars - Scratch [2]uint64 - - Children []ArraySpan -} - -// if an error is encountered, call Release on a preallocated span -// to ensure it releases any self-allocated buffers, it will -// not call release on buffers it doesn't own (SelfAlloc != true) -func (a *ArraySpan) Release() { - for _, c := range a.Children { - c.Release() - } - - for _, b := range a.Buffers { - if b.SelfAlloc { - b.Owner.Release() - } - } -} - -func (a *ArraySpan) MayHaveNulls() bool { - return atomic.LoadInt64(&a.Nulls) != 0 && a.Buffers[0].Buf != nil -} - -// UpdateNullCount will count the bits in the null bitmap and update the -// number of nulls if the current null count is unknown, otherwise it just -// returns the value of a.Nulls -func (a *ArraySpan) UpdateNullCount() int64 { - curNulls := atomic.LoadInt64(&a.Nulls) - if curNulls != array.UnknownNullCount { - return curNulls - } - - newNulls := a.Len - int64(bitutil.CountSetBits(a.Buffers[0].Buf, int(a.Offset), int(a.Len))) - atomic.StoreInt64(&a.Nulls, newNulls) - return newNulls -} - -// Dictionary returns a pointer to the array span for the dictionary which -// we will always place as the first (and only) child if it exists. -func (a *ArraySpan) Dictionary() *ArraySpan { return &a.Children[0] } - -// NumBuffers returns the number of expected buffers for this type -func (a *ArraySpan) NumBuffers() int { return getNumBuffers(a.Type) } - -// MakeData generates an arrow.ArrayData object for this ArraySpan, -// properly updating the buffer ref count if necessary. -func (a *ArraySpan) MakeData() arrow.ArrayData { - var bufs [3]*memory.Buffer - for i := range bufs { - b := a.GetBuffer(i) - bufs[i] = b - if b != nil && a.Buffers[i].SelfAlloc { - // if this buffer is just a pointer to another existing buffer - // then we never bumped the refcount for that buffer. - // As a result, we won't call release here so that the call - // to array.NewData properly updates the ref counts of the buffers. - // If instead this buffer was allocated during calculation - // (such as during prealloc or by a kernel itself) - // then we need to release after we create the ArrayData so that it - // maintains the correct refcount of 1, giving the resulting - // ArrayData object ownership of this buffer. - defer b.Release() - } - } - - var ( - nulls = int(atomic.LoadInt64(&a.Nulls)) - length = int(a.Len) - off = int(a.Offset) - dt = a.Type - children []arrow.ArrayData - ) - - if a.Type.ID() == arrow.NULL { - nulls = length - } else if len(a.Buffers[0].Buf) == 0 { - nulls = 0 - } - - // we use a.Type for the NewData call at the end, so we can - // handle extension types by using dt to point to the storage type - // and let the proper extension type get set into the ArrayData - // object we return. - if dt.ID() == arrow.EXTENSION { - dt = dt.(arrow.ExtensionType).StorageType() - } - - if dt.ID() == arrow.DICTIONARY { - result := array.NewData(a.Type, length, bufs[:a.NumBuffers()], nil, nulls, off) - dict := a.Dictionary().MakeData() - defer dict.Release() - result.SetDictionary(dict) - return result - } else if dt.ID() == arrow.DENSE_UNION || dt.ID() == arrow.SPARSE_UNION { - bufs[0] = nil - nulls = 0 - } - - if len(a.Children) > 0 { - children = make([]arrow.ArrayData, len(a.Children)) - for i, c := range a.Children { - d := c.MakeData() - defer d.Release() - children[i] = d - } - } - return array.NewData(a.Type, length, bufs[:a.NumBuffers()], children, nulls, off) -} - -// MakeArray is a convenience function for calling array.MakeFromData(a.MakeData()) -func (a *ArraySpan) MakeArray() arrow.Array { - d := a.MakeData() - defer d.Release() - return array.MakeFromData(d) -} - -// SetSlice updates the offset and length of this ArraySpan to refer to -// a specific slice of the underlying buffers. -func (a *ArraySpan) SetSlice(off, length int64) { - if off == a.Offset && length == a.Len { - // don't modify the nulls if the slice is the entire span - return - } - - if a.Type.ID() != arrow.NULL { - if a.Nulls != 0 { - if a.Nulls == a.Len { - a.Nulls = length - } else { - a.Nulls = array.UnknownNullCount - } - } - } else { - a.Nulls = length - } - - a.Offset, a.Len = off, length -} - -// GetBuffer returns the buffer for the requested index. If this buffer -// is owned by another array/arrayspan the Owning buffer is returned, -// otherwise if this slice has no owning buffer, we call NewBufferBytes -// to wrap it as a memory.Buffer. Can also return nil if there is no -// buffer in this index. -func (a *ArraySpan) GetBuffer(idx int) *memory.Buffer { - buf := a.Buffers[idx] - switch { - case buf.Owner != nil: - return buf.Owner - case buf.Buf != nil: - return memory.NewBufferBytes(buf.Buf) - } - return nil -} - -// convenience function to resize the children slice if necessary, -// or just shrink the slice without re-allocating if there's enough -// capacity already. -func (a *ArraySpan) resizeChildren(i int) { - if cap(a.Children) >= i { - a.Children = a.Children[:i] - } else { - a.Children = make([]ArraySpan, i) - } -} - -// FillFromScalar populates this ArraySpan as if it were a 1 length array -// with the single value equal to the passed in Scalar. -func (a *ArraySpan) FillFromScalar(val scalar.Scalar) { - var ( - trueBit byte = 0x01 - falseBit byte = 0x00 - ) - - a.Type = val.DataType() - a.Len = 1 - typeID := a.Type.ID() - if val.IsValid() { - a.Nulls = 0 - } else { - a.Nulls = 1 - } - - if !arrow.IsUnion(typeID) && typeID != arrow.NULL { - if val.IsValid() { - a.Buffers[0].Buf = []byte{trueBit} - } else { - a.Buffers[0].Buf = []byte{falseBit} - } - a.Buffers[0].Owner = nil - a.Buffers[0].SelfAlloc = false - } - - switch { - case typeID == arrow.BOOL: - if val.(*scalar.Boolean).Value { - a.Buffers[1].Buf = []byte{trueBit} - } else { - a.Buffers[1].Buf = []byte{falseBit} - } - a.Buffers[1].Owner = nil - a.Buffers[1].SelfAlloc = false - case arrow.IsPrimitive(typeID) || arrow.IsDecimal(typeID): - sc := val.(scalar.PrimitiveScalar) - a.Buffers[1].Buf = sc.Data() - a.Buffers[1].Owner = nil - a.Buffers[1].SelfAlloc = false - case typeID == arrow.DICTIONARY: - sc := val.(scalar.PrimitiveScalar) - a.Buffers[1].Buf = sc.Data() - a.Buffers[1].Owner = nil - a.Buffers[1].SelfAlloc = false - a.resizeChildren(1) - a.Children[0].SetMembers(val.(*scalar.Dictionary).Value.Dict.Data()) - case arrow.IsBaseBinary(typeID): - sc := val.(scalar.BinaryScalar) - a.Buffers[1].Buf = arrow.Uint64Traits.CastToBytes(a.Scratch[:]) - a.Buffers[1].Owner = nil - a.Buffers[1].SelfAlloc = false - - var dataBuffer []byte - if sc.IsValid() { - dataBuffer = sc.Data() - a.Buffers[2].Owner = sc.Buffer() - a.Buffers[2].SelfAlloc = false - } - if arrow.IsBinaryLike(typeID) { - setOffsetsForScalar(a, - unsafe.Slice((*int32)(unsafe.Pointer(&a.Scratch[0])), 2), - int64(len(dataBuffer)), 1) - } else { - // large_binary_like - setOffsetsForScalar(a, - unsafe.Slice((*int64)(unsafe.Pointer(&a.Scratch[0])), 2), - int64(len(dataBuffer)), 1) - } - a.Buffers[2].Buf = dataBuffer - case typeID == arrow.FIXED_SIZE_BINARY: - sc := val.(scalar.BinaryScalar) - if !sc.IsValid() { - a.Buffers[1].Buf = make([]byte, sc.DataType().(*arrow.FixedSizeBinaryType).ByteWidth) - a.Buffers[1].Owner = nil - a.Buffers[1].SelfAlloc = false - break - } - a.Buffers[1].Buf = sc.Data() - a.Buffers[1].Owner = sc.Buffer() - a.Buffers[1].SelfAlloc = false - case arrow.IsListLike(typeID): - sc := val.(scalar.ListScalar) - valueLen := 0 - a.resizeChildren(1) - - if sc.GetList() != nil { - a.Children[0].SetMembers(sc.GetList().Data()) - valueLen = sc.GetList().Len() - } else { - // even when the value is null, we must populate - // child data to yield a valid array. ugh - FillZeroLength(sc.DataType().(arrow.NestedType).Fields()[0].Type, &a.Children[0]) - } - - switch typeID { - case arrow.LIST, arrow.MAP: - setOffsetsForScalar(a, - unsafe.Slice((*int32)(unsafe.Pointer(&a.Scratch[0])), 2), - int64(valueLen), 1) - case arrow.LARGE_LIST: - setOffsetsForScalar(a, - unsafe.Slice((*int64)(unsafe.Pointer(&a.Scratch[0])), 2), - int64(valueLen), 1) - default: - // fixed size list has no second buffer - a.Buffers[1].Buf, a.Buffers[1].Owner = nil, nil - a.Buffers[1].SelfAlloc = false - } - case typeID == arrow.STRUCT: - sc := val.(*scalar.Struct) - a.Buffers[1].Buf = nil - a.Buffers[1].Owner = nil - a.Buffers[1].SelfAlloc = false - a.resizeChildren(len(sc.Value)) - for i, v := range sc.Value { - a.Children[i].FillFromScalar(v) - } - case arrow.IsUnion(typeID): - // first buffer is kept null since unions have no validity vector - a.Buffers[0].Buf, a.Buffers[0].Owner = nil, nil - a.Buffers[0].SelfAlloc = false - - a.Buffers[1].Buf = arrow.Uint64Traits.CastToBytes(a.Scratch[:])[:1] - a.Buffers[1].Owner = nil - a.Buffers[1].SelfAlloc = false - codes := unsafe.Slice((*arrow.UnionTypeCode)(unsafe.Pointer(&a.Buffers[1].Buf[0])), 1) - - a.resizeChildren(len(a.Type.(arrow.UnionType).Fields())) - switch sc := val.(type) { - case *scalar.DenseUnion: - codes[0] = sc.TypeCode - // has offset, start 4 bytes in so it's aligned to the 32-bit boundaries - off := unsafe.Slice((*int32)(unsafe.Add(unsafe.Pointer(&a.Scratch[0]), arrow.Int32SizeBytes)), 2) - setOffsetsForScalar(a, off, 1, 2) - // we can't "see" the other arrays in the union, but we put the "active" - // union array in the right place and fill zero-length arrays for - // the others. - childIDS := a.Type.(arrow.UnionType).ChildIDs() - for i, f := range a.Type.(arrow.UnionType).Fields() { - if i == childIDS[sc.TypeCode] { - a.Children[i].FillFromScalar(sc.Value) - } else { - FillZeroLength(f.Type, &a.Children[i]) - } - } - case *scalar.SparseUnion: - codes[0] = sc.TypeCode - // sparse union scalars have a full complement of child values - // even though only one of them is relevant, so we just fill them - // in here - for i, v := range sc.Value { - a.Children[i].FillFromScalar(v) - } - } - case typeID == arrow.EXTENSION: - // pass through storage - sc := val.(*scalar.Extension) - a.FillFromScalar(sc.Value) - // restore the extension type - a.Type = val.DataType() - case typeID == arrow.NULL: - for i := range a.Buffers { - a.Buffers[i].Buf = nil - a.Buffers[i].Owner = nil - a.Buffers[i].SelfAlloc = false - } - } -} - -func (a *ArraySpan) SetDictionary(span *ArraySpan) { - a.resizeChildren(1) - a.Children[0].Release() - a.Children[0] = *span -} - -// TakeOwnership is like SetMembers only this takes ownership of -// the buffers by calling Retain on them so that the passed in -// ArrayData can be released without negatively affecting this -// ArraySpan -func (a *ArraySpan) TakeOwnership(data arrow.ArrayData) { - a.Type = data.DataType() - a.Len = int64(data.Len()) - if a.Type.ID() == arrow.NULL { - a.Nulls = a.Len - } else { - a.Nulls = int64(data.NullN()) - } - a.Offset = int64(data.Offset()) - - for i, b := range data.Buffers() { - if b != nil { - a.Buffers[i].WrapBuffer(b) - b.Retain() - } else { - a.Buffers[i].Buf = nil - a.Buffers[i].Owner = nil - a.Buffers[i].SelfAlloc = false - } - } - - typeID := a.Type.ID() - if a.Buffers[0].Buf == nil { - switch typeID { - case arrow.NULL, arrow.SPARSE_UNION, arrow.DENSE_UNION: - default: - // should already be zero, but we make sure - a.Nulls = 0 - } - } - - for i := len(data.Buffers()); i < 3; i++ { - a.Buffers[i].Buf = nil - a.Buffers[i].Owner = nil - a.Buffers[i].SelfAlloc = false - } - - if typeID == arrow.DICTIONARY { - a.resizeChildren(1) - dict := data.Dictionary() - if dict != (*array.Data)(nil) { - a.Children[0].TakeOwnership(dict) - } - } else { - a.resizeChildren(len(data.Children())) - for i, c := range data.Children() { - a.Children[i].TakeOwnership(c) - } - } -} - -// SetMembers populates this ArraySpan from the given ArrayData object. -// As this is a non-owning reference, the ArrayData object must not -// be fully released while this ArraySpan is in use, otherwise any buffers -// referenced will be released too -func (a *ArraySpan) SetMembers(data arrow.ArrayData) { - a.Type = data.DataType() - a.Len = int64(data.Len()) - if a.Type.ID() == arrow.NULL { - a.Nulls = a.Len - } else { - a.Nulls = int64(data.NullN()) - } - a.Offset = int64(data.Offset()) - - for i, b := range data.Buffers() { - if b != nil { - a.Buffers[i].SetBuffer(b) - } else { - a.Buffers[i].Buf = nil - a.Buffers[i].Owner = nil - a.Buffers[i].SelfAlloc = false - } - } - - typeID := a.Type.ID() - if a.Buffers[0].Buf == nil { - switch typeID { - case arrow.NULL, arrow.SPARSE_UNION, arrow.DENSE_UNION: - default: - // should already be zero, but we make sure - a.Nulls = 0 - } - } - - for i := len(data.Buffers()); i < 3; i++ { - a.Buffers[i].Buf = nil - a.Buffers[i].Owner = nil - a.Buffers[i].SelfAlloc = false - } - - if typeID == arrow.DICTIONARY { - a.resizeChildren(1) - dict := data.Dictionary() - if dict != (*array.Data)(nil) { - a.Children[0].SetMembers(dict) - } - } else { - if cap(a.Children) >= len(data.Children()) { - a.Children = a.Children[:len(data.Children())] - } else { - a.Children = make([]ArraySpan, len(data.Children())) - } - for i, c := range data.Children() { - a.Children[i].SetMembers(c) - } - } -} - -// ExecValue represents a single input to an execution which could -// be either an Array (ArraySpan) or a Scalar value -type ExecValue struct { - Array ArraySpan - Scalar scalar.Scalar -} - -func (e *ExecValue) IsArray() bool { return e.Scalar == nil } -func (e *ExecValue) IsScalar() bool { return !e.IsArray() } - -func (e *ExecValue) Type() arrow.DataType { - if e.IsArray() { - return e.Array.Type - } - return e.Scalar.DataType() -} - -// ExecResult is the result of a kernel execution and should be populated -// by the execution functions and/or a kernel. For now we're just going to -// alias an ArraySpan. -type ExecResult = ArraySpan - -// ExecSpan represents a slice of inputs and is used to provide slices -// of input values to iterate over. -// -// Len is the length of the span (all elements in Values should either -// be scalar or an array with a length + offset of at least Len). -type ExecSpan struct { - Len int64 - Values []ExecValue -} - -func getNumBuffers(dt arrow.DataType) int { - switch dt.ID() { - case arrow.RUN_END_ENCODED: - return 0 - case arrow.NULL, arrow.STRUCT, arrow.FIXED_SIZE_LIST: - return 1 - case arrow.BINARY, arrow.LARGE_BINARY, arrow.STRING, arrow.LARGE_STRING, arrow.DENSE_UNION: - return 3 - case arrow.EXTENSION: - return getNumBuffers(dt.(arrow.ExtensionType).StorageType()) - default: - return 2 - } -} - -// FillZeroLength fills an ArraySpan with the appropriate information for -// a Zero Length Array of the provided type. -func FillZeroLength(dt arrow.DataType, span *ArraySpan) { - span.Scratch[0], span.Scratch[1] = 0, 0 - span.Type = dt - span.Len = 0 - numBufs := getNumBuffers(dt) - for i := 0; i < numBufs; i++ { - span.Buffers[i].Buf = arrow.Uint64Traits.CastToBytes(span.Scratch[:])[:0] - span.Buffers[i].Owner = nil - } - - for i := numBufs; i < 3; i++ { - span.Buffers[i].Buf, span.Buffers[i].Owner = nil, nil - } - - if dt.ID() == arrow.DICTIONARY { - span.resizeChildren(1) - FillZeroLength(dt.(*arrow.DictionaryType).ValueType, &span.Children[0]) - return - } - - nt, ok := dt.(arrow.NestedType) - if !ok { - if len(span.Children) > 0 { - span.Children = span.Children[:0] - } - return - } - - span.resizeChildren(nt.NumFields()) - for i, f := range nt.Fields() { - FillZeroLength(f.Type, &span.Children[i]) - } -} - -// PromoteExecSpanScalars promotes the values of the passed in ExecSpan -// from scalars to Arrays of length 1 for each value. -func PromoteExecSpanScalars(span ExecSpan) { - for i := range span.Values { - if span.Values[i].Scalar != nil { - span.Values[i].Array.FillFromScalar(span.Values[i].Scalar) - span.Values[i].Scalar = nil - } - } -} diff --git a/go/arrow/compute/exec/span_offsets.go b/go/arrow/compute/exec/span_offsets.go deleted file mode 100644 index d2d0398884c9d..0000000000000 --- a/go/arrow/compute/exec/span_offsets.go +++ /dev/null @@ -1,36 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.20 || tinygo - -package exec - -import ( - "unsafe" -) - -// convenience function for populating the offsets buffer from a scalar -// value's size. -func setOffsetsForScalar[T int32 | int64](span *ArraySpan, buf []T, valueSize int64, bufidx int) { - buf[0] = 0 - buf[1] = T(valueSize) - - span.Buffers[bufidx].Buf = unsafe.Slice((*byte)(unsafe.Pointer(unsafe.SliceData(buf))), - 2*int(unsafe.Sizeof(T(0)))) - - span.Buffers[bufidx].Owner = nil - span.Buffers[bufidx].SelfAlloc = false -} diff --git a/go/arrow/compute/exec/span_test.go b/go/arrow/compute/exec/span_test.go deleted file mode 100644 index 018fbb7d623d9..0000000000000 --- a/go/arrow/compute/exec/span_test.go +++ /dev/null @@ -1,835 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package exec_test - -import ( - "reflect" - "strings" - "testing" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/compute/exec" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/endian" - "github.com/apache/arrow/go/v18/arrow/extensions" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/arrow/scalar" - "github.com/apache/arrow/go/v18/internal/types" - "github.com/stretchr/testify/assert" -) - -func TestBufferSpan_SetBuffer(t *testing.T) { - type fields struct { - Buf []byte - Owner *memory.Buffer - SelfAlloc bool - } - type args struct { - buf *memory.Buffer - } - foo := []byte{0xde, 0xad, 0xbe, 0xef} - own := memory.NewBufferBytes(foo) - tests := []struct { - name string - fields fields - args args - }{ - {"simple", fields{SelfAlloc: true}, args{own}}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - b := &exec.BufferSpan{ - Buf: tt.fields.Buf, - Owner: tt.fields.Owner, - SelfAlloc: tt.fields.SelfAlloc, - } - b.SetBuffer(tt.args.buf) - assert.Same(t, &foo[0], &b.Buf[0]) - assert.Same(t, own, b.Owner) - assert.False(t, b.SelfAlloc) - }) - } -} - -func TestBufferSpan_WrapBuffer(t *testing.T) { - type fields struct { - Buf []byte - Owner *memory.Buffer - SelfAlloc bool - } - type args struct { - buf *memory.Buffer - } - foo := []byte{0xde, 0xad, 0xbe, 0xef} - own := memory.NewBufferBytes(foo) - tests := []struct { - name string - fields fields - args args - }{ - {"simple", fields{SelfAlloc: false}, args{own}}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - b := &exec.BufferSpan{ - Buf: tt.fields.Buf, - Owner: tt.fields.Owner, - SelfAlloc: tt.fields.SelfAlloc, - } - b.WrapBuffer(tt.args.buf) - assert.Same(t, &foo[0], &b.Buf[0]) - assert.Same(t, own, b.Owner) - assert.True(t, b.SelfAlloc) - }) - } -} - -func TestArraySpan_UpdateNullCount(t *testing.T) { - type fields struct { - Type arrow.DataType - Len int64 - Nulls int64 - Offset int64 - Buffers [3]exec.BufferSpan - Scratch [2]uint64 - Children []exec.ArraySpan - } - tests := []struct { - name string - fields fields - want int64 - }{ - {"known", fields{Nulls: 25}, 25}, - {"unknown", fields{ - Nulls: array.UnknownNullCount, - Len: 8, // 0b01101101 - Buffers: [3]exec.BufferSpan{{Buf: []byte{109}}, {}, {}}}, 3}, - {"unknown with offset", fields{ - Nulls: array.UnknownNullCount, - Len: 4, - Offset: 2, // 0b01101101 - Buffers: [3]exec.BufferSpan{{Buf: []byte{109}}, {}, {}}}, 1}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - a := &exec.ArraySpan{ - Type: tt.fields.Type, - Len: tt.fields.Len, - Nulls: tt.fields.Nulls, - Offset: tt.fields.Offset, - Buffers: tt.fields.Buffers, - Scratch: tt.fields.Scratch, - Children: tt.fields.Children, - } - if got := a.UpdateNullCount(); got != tt.want { - t.Errorf("ArraySpan.UpdateNullCount() = %v, want %v", got, tt.want) - } - }) - } -} - -func TestArraySpan_Dictionary(t *testing.T) { - type fields struct { - Type arrow.DataType - Len int64 - Nulls int64 - Offset int64 - Buffers [3]exec.BufferSpan - Scratch [2]uint64 - Children []exec.ArraySpan - } - children := []exec.ArraySpan{{}} - tests := []struct { - name string - fields fields - want *exec.ArraySpan - }{ - {"basic", fields{Children: children}, &children[0]}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - a := &exec.ArraySpan{ - Type: tt.fields.Type, - Len: tt.fields.Len, - Nulls: tt.fields.Nulls, - Offset: tt.fields.Offset, - Buffers: tt.fields.Buffers, - Scratch: tt.fields.Scratch, - Children: tt.fields.Children, - } - if got := a.Dictionary(); !reflect.DeepEqual(got, tt.want) { - t.Errorf("ArraySpan.Dictionary() = %v, want %v", got, tt.want) - } - }) - } -} - -func TestArraySpan_NumBuffers(t *testing.T) { - type fields struct { - Type arrow.DataType - Len int64 - Nulls int64 - Offset int64 - Buffers [3]exec.BufferSpan - Scratch [2]uint64 - Children []exec.ArraySpan - } - - tests := []struct { - name string - fields fields - want int - }{ - {"null", fields{Type: arrow.Null}, 1}, - {"struct", fields{Type: arrow.StructOf()}, 1}, - {"fixed size list", fields{Type: arrow.FixedSizeListOf(4, arrow.PrimitiveTypes.Int32)}, 1}, - {"binary", fields{Type: arrow.BinaryTypes.Binary}, 3}, - {"large binary", fields{Type: arrow.BinaryTypes.LargeBinary}, 3}, - {"string", fields{Type: arrow.BinaryTypes.String}, 3}, - {"large string", fields{Type: arrow.BinaryTypes.LargeString}, 3}, - {"extension", fields{Type: extensions.NewUUIDType()}, 2}, - {"int32", fields{Type: arrow.PrimitiveTypes.Int32}, 2}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - a := &exec.ArraySpan{ - Type: tt.fields.Type, - Len: tt.fields.Len, - Nulls: tt.fields.Nulls, - Offset: tt.fields.Offset, - Buffers: tt.fields.Buffers, - Scratch: tt.fields.Scratch, - Children: tt.fields.Children, - } - if got := a.NumBuffers(); got != tt.want { - t.Errorf("ArraySpan.NumBuffers() = %v, want %v", got, tt.want) - } - }) - } -} - -func TestArraySpan_MakeData(t *testing.T) { - type fields struct { - Type arrow.DataType - Len int64 - Nulls int64 - Offset int64 - Buffers [3]exec.BufferSpan - Scratch [2]uint64 - Children []exec.ArraySpan - } - - var ( - buf1 *memory.Buffer - ) - arrow.RegisterExtensionType(types.NewDictExtensionType()) - defer arrow.UnregisterExtensionType("dict-extension") - - tests := []struct { - name string - fields func(mem memory.Allocator) fields - want func(mem memory.Allocator) arrow.ArrayData - }{ - {"null type", func(mem memory.Allocator) fields { - return fields{ - Type: arrow.Null, - Len: 5, - Nulls: array.UnknownNullCount, - } - }, func(mem memory.Allocator) arrow.ArrayData { - return array.NewData(arrow.Null, 5, []*memory.Buffer{nil}, nil, 5, 0) - }}, - {"zero len", func(mem memory.Allocator) fields { - return fields{Type: arrow.PrimitiveTypes.Int32} - }, func(mem memory.Allocator) arrow.ArrayData { - return array.NewData(arrow.PrimitiveTypes.Int32, 0, []*memory.Buffer{nil, nil}, nil, 0, 0) - }}, - {"non-owning offset", func(mem memory.Allocator) fields { - ret := fields{ - Type: arrow.PrimitiveTypes.Int8, - Len: 4, - Nulls: 1, - Offset: 1, - } - buf1 = memory.NewResizableBuffer(mem) - buf1.Resize(1) - buf1.Bytes()[0] = 109 - ret.Buffers[0].SetBuffer(buf1) - ret.Buffers[1].SetBuffer(memory.NewBufferBytes([]byte{5, 5, 5, 5, 5})) - return ret - }, func(mem memory.Allocator) arrow.ArrayData { - // created in the above func, we release after constructing - // the NewData so the refcount is as expected - defer buf1.Release() - return array.NewData(arrow.PrimitiveTypes.Int8, 4, - []*memory.Buffer{buf1, memory.NewBufferBytes([]byte{5, 5, 5, 5, 5})}, nil, 1, 1) - }}, - {"self-alloc", func(mem memory.Allocator) fields { - ret := fields{ - Type: arrow.PrimitiveTypes.Int8, - Len: 4, - } - buf := memory.NewResizableBuffer(mem) - buf.Resize(1) - ret.Buffers[0].WrapBuffer(buf) - buf2 := memory.NewResizableBuffer(mem) - buf2.Resize(4) - ret.Buffers[1].WrapBuffer(buf2) - return ret - }, func(mem memory.Allocator) arrow.ArrayData { - buf := memory.NewResizableBuffer(mem) - buf.Resize(1) - defer buf.Release() - buf2 := memory.NewResizableBuffer(mem) - buf2.Resize(4) - defer buf2.Release() - return array.NewData(arrow.PrimitiveTypes.Int8, 4, []*memory.Buffer{buf, buf2}, nil, 0, 0) - }}, - {"with children", func(mem memory.Allocator) fields { - ret := fields{ - Type: arrow.ListOf(arrow.PrimitiveTypes.Int8), - Len: 1, - Children: []exec.ArraySpan{{ - Type: arrow.PrimitiveTypes.Int8, - Len: 4, - }}, - } - var offsets [8]byte - endian.Native.PutUint32(offsets[4:], 4) - ret.Buffers[1].SetBuffer(memory.NewBufferBytes(offsets[:])) - buf := memory.NewResizableBuffer(mem) - buf.Resize(4) - buf.Bytes()[0] = 1 - buf.Bytes()[1] = 2 - buf.Bytes()[2] = 3 - buf.Bytes()[3] = 4 - - ret.Children[0].Buffers[1].WrapBuffer(buf) - return ret - }, func(mem memory.Allocator) arrow.ArrayData { - buf := memory.NewResizableBuffer(mem) - buf.Resize(4) - buf.Bytes()[0] = 1 - buf.Bytes()[1] = 2 - buf.Bytes()[2] = 3 - buf.Bytes()[3] = 4 - defer buf.Release() - child := array.NewData(arrow.PrimitiveTypes.Int8, 4, []*memory.Buffer{nil, buf}, nil, 0, 0) - defer child.Release() - - var offsets [8]byte - endian.Native.PutUint32(offsets[4:], 4) - - return array.NewData(arrow.ListOf(arrow.PrimitiveTypes.Int8), 1, - []*memory.Buffer{nil, memory.NewBufferBytes(offsets[:])}, - []arrow.ArrayData{child}, 0, 0) - }}, - {"dict-extension-type", func(mem memory.Allocator) fields { - // dict-extension-type is dict(Index: int8, Value: string) - // so there should be an int8 in the arrayspan and - // a child of a string arrayspan in the first index of - // Children - ret := fields{ - Type: types.NewDictExtensionType(), - Len: 1, - Children: []exec.ArraySpan{{ - Type: arrow.BinaryTypes.String, - Len: 2, - }}, - } - - indices := memory.NewResizableBuffer(mem) - indices.Resize(1) - indices.Bytes()[0] = 1 - ret.Buffers[1].WrapBuffer(indices) - - offsets := memory.NewResizableBuffer(mem) - offsets.Resize(3 * arrow.Int32SizeBytes) - copy(offsets.Bytes(), arrow.Int32Traits.CastToBytes([]int32{0, 5, 10})) - - values := memory.NewResizableBuffer(mem) - values.Resize(len("HelloWorld")) - copy(values.Bytes(), []byte("HelloWorld")) - - nulls := memory.NewResizableBuffer(mem) - nulls.Resize(1) - nulls.Bytes()[0] = 3 - ret.Children[0].Buffers[0].WrapBuffer(nulls) - ret.Children[0].Buffers[1].WrapBuffer(offsets) - ret.Children[0].Buffers[2].WrapBuffer(values) - - return ret - }, func(mem memory.Allocator) arrow.ArrayData { - dict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["Hello", "World"]`)) - defer dict.Release() - index, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[1]`)) - defer index.Release() - - out := array.NewData(types.NewDictExtensionType(), 1, []*memory.Buffer{nil, index.Data().Buffers()[1]}, nil, 0, 0) - out.SetDictionary(dict.Data()) - return out - }}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - t.Run("MakeData", func(t *testing.T) { - f := tt.fields(mem) - a := &exec.ArraySpan{ - Type: f.Type, - Len: f.Len, - Nulls: f.Nulls, - Offset: f.Offset, - Buffers: f.Buffers, - Scratch: f.Scratch, - Children: f.Children, - } - got := a.MakeData() - want := tt.want(mem) - if !reflect.DeepEqual(got, want) { - t.Errorf("ArraySpan.MakeData() = %v, want %v", got, want) - } - want.Release() - got.Release() - }) - - t.Run("MakeArray", func(t *testing.T) { - f := tt.fields(mem) - a := &exec.ArraySpan{ - Type: f.Type, - Len: f.Len, - Nulls: f.Nulls, - Offset: f.Offset, - Buffers: f.Buffers, - Scratch: f.Scratch, - Children: f.Children, - } - arr := a.MakeArray() - want := tt.want(mem) - defer want.Release() - exp := array.MakeFromData(want) - - assert.Truef(t, array.Equal(arr, exp), "expected: %s\ngot: %s", exp, arr) - - exp.Release() - arr.Release() - }) - }) - } -} - -func TestArraySpan_SetSlice(t *testing.T) { - type fields struct { - Type arrow.DataType - Len int64 - Nulls int64 - Offset int64 - Buffers [3]exec.BufferSpan - Scratch [2]uint64 - Children []exec.ArraySpan - } - type args struct { - off int64 - length int64 - } - tests := []struct { - name string - fields fields - args args - wantNulls int64 - }{ - {"null type", fields{Type: arrow.Null}, args{5, 10}, 10}, - {"not-null type", fields{Type: arrow.PrimitiveTypes.Int8}, args{5, 10}, 0}, - {"not-null type with nulls", fields{Type: arrow.PrimitiveTypes.Int8, Nulls: -1}, args{5, 10}, array.UnknownNullCount}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - a := &exec.ArraySpan{ - Type: tt.fields.Type, - Len: tt.fields.Len, - Nulls: tt.fields.Nulls, - Offset: tt.fields.Offset, - Buffers: tt.fields.Buffers, - Scratch: tt.fields.Scratch, - Children: tt.fields.Children, - } - a.SetSlice(tt.args.off, tt.args.length) - assert.Equal(t, tt.args.off, a.Offset) - assert.Equal(t, tt.args.length, a.Len) - assert.Equal(t, tt.wantNulls, a.Nulls) - }) - } -} - -func TestArraySpan_FillFromScalar(t *testing.T) { - var ( - expDecimalBuf [arrow.Decimal128SizeBytes]byte - expScratch [2]uint64 - ) - - endian.Native.PutUint64(expDecimalBuf[:], 1234) - endian.Native.PutUint32(arrow.Uint64Traits.CastToBytes(expScratch[:])[4:], 10) - - dict, _, _ := array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.String, strings.NewReader(`["Hello", "World"]`)) - defer dict.Release() - - tests := []struct { - name string - args scalar.Scalar - exp exec.ArraySpan - }{ - {"null-type", - scalar.MakeNullScalar(arrow.Null), - exec.ArraySpan{Type: arrow.Null, Len: 1, Nulls: 1}}, - {"bool valid", - scalar.MakeScalar(true), - exec.ArraySpan{ - Type: arrow.FixedWidthTypes.Boolean, - Len: 1, - Nulls: 0, - Buffers: [3]exec.BufferSpan{{Buf: []byte{0x01}}, {Buf: []byte{0x01}}, {}}, - }}, - {"bool valid false", - scalar.MakeScalar(false), - exec.ArraySpan{ - Type: arrow.FixedWidthTypes.Boolean, - Len: 1, - Nulls: 0, - Buffers: [3]exec.BufferSpan{{Buf: []byte{0x01}}, {Buf: []byte{0x00}}, {}}, - }}, - {"primitive null", - scalar.MakeNullScalar(arrow.PrimitiveTypes.Int32), - exec.ArraySpan{ - Type: arrow.PrimitiveTypes.Int32, - Len: 1, - Nulls: 1, - Buffers: [3]exec.BufferSpan{{Buf: []byte{0x00}}, {Buf: []byte{0, 0, 0, 0}}, {}}, - }}, - {"decimal valid", - scalar.NewDecimal128Scalar(decimal128.FromU64(1234), &arrow.Decimal128Type{Precision: 12, Scale: 2}), - exec.ArraySpan{ - Type: &arrow.Decimal128Type{Precision: 12, Scale: 2}, - Len: 1, - Nulls: 0, - Buffers: [3]exec.BufferSpan{{Buf: []byte{0x01}}, {Buf: expDecimalBuf[:]}, {}}, - }}, - {"dictionary scalar", - scalar.NewDictScalar(scalar.NewInt8Scalar(1), dict), - exec.ArraySpan{ - Type: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.BinaryTypes.String}, - Len: 1, - Nulls: 0, - Buffers: [3]exec.BufferSpan{{Buf: []byte{0x01}}, - {Buf: []byte{1}}, {}, - }, - Children: []exec.ArraySpan{{ - Type: arrow.BinaryTypes.String, - Len: 2, - Buffers: [3]exec.BufferSpan{ - {Buf: dict.NullBitmapBytes(), Owner: dict.Data().Buffers()[0]}, - {Buf: dict.Data().Buffers()[1].Bytes(), Owner: dict.Data().Buffers()[1]}, - {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}, - }, - }}, - }, - }, - {"binary scalar", - scalar.NewBinaryScalar(dict.Data().Buffers()[2], arrow.BinaryTypes.String), - exec.ArraySpan{ - Type: arrow.BinaryTypes.String, - Len: 1, - Nulls: 0, - Scratch: expScratch, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0x01}}, - {Buf: arrow.Uint64Traits.CastToBytes(expScratch[:1])}, - {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}}, - }, - }, - {"large binary", - scalar.NewLargeStringScalarFromBuffer(dict.Data().Buffers()[2]), - exec.ArraySpan{ - Type: arrow.BinaryTypes.LargeString, - Len: 1, - Nulls: 0, - Scratch: [2]uint64{0, 10}, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0x01}}, - {Buf: arrow.Uint64Traits.CastToBytes([]uint64{0, 10})}, - {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}}, - }}, - {"fixed size binary", - scalar.NewFixedSizeBinaryScalar(dict.Data().Buffers()[2], &arrow.FixedSizeBinaryType{ByteWidth: 10}), - exec.ArraySpan{ - Type: &arrow.FixedSizeBinaryType{ByteWidth: 10}, - Len: 1, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0x01}}, - {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}, {}, - }, - }}, - {"map scalar null value", - scalar.MakeNullScalar(arrow.MapOf(arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String)), - exec.ArraySpan{ - Type: arrow.MapOf(arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String), - Len: 1, - Nulls: 1, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0}}, - {Buf: []byte{0, 0, 0, 0, 0, 0, 0, 0}}, - {}, - }, - Children: []exec.ArraySpan{{ - Type: arrow.StructOf(arrow.Field{Name: "key", Type: arrow.PrimitiveTypes.Int8}, - arrow.Field{Name: "value", Type: arrow.BinaryTypes.String, Nullable: true}), - Len: 0, - Nulls: 0, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{}}, {}, {}, - }, - Children: []exec.ArraySpan{ - { - Type: arrow.PrimitiveTypes.Int8, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{}}, {Buf: []byte{}}, {}, - }, - }, - { - Type: arrow.BinaryTypes.String, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{}}, {Buf: []byte{}}, {Buf: []byte{}}, - }, - }, - }, - }}, - }}, - {"list scalar", - scalar.NewListScalarData(dict.Data()), - exec.ArraySpan{ - Type: arrow.ListOf(arrow.BinaryTypes.String), - Len: 1, - Scratch: [2]uint64{ - *(*uint64)(unsafe.Pointer(&[]int32{0, 2}[0])), - 0, - }, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0x1}}, - {Buf: arrow.Int32Traits.CastToBytes([]int32{0, 2})}, - }, - Children: []exec.ArraySpan{{ - Type: arrow.BinaryTypes.String, - Len: 2, - Buffers: [3]exec.BufferSpan{ - {Buf: dict.NullBitmapBytes(), Owner: dict.Data().Buffers()[0]}, - {Buf: dict.Data().Buffers()[1].Bytes(), Owner: dict.Data().Buffers()[1]}, - {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}, - }, - }}, - }, - }, - {"large list scalar", - scalar.NewLargeListScalarData(dict.Data()), - exec.ArraySpan{ - Type: arrow.LargeListOf(arrow.BinaryTypes.String), - Len: 1, - Scratch: [2]uint64{0, 2}, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0x1}}, - {Buf: arrow.Int64Traits.CastToBytes([]int64{0, 2})}, - }, - Children: []exec.ArraySpan{{ - Type: arrow.BinaryTypes.String, - Len: 2, - Buffers: [3]exec.BufferSpan{ - {Buf: dict.NullBitmapBytes(), Owner: dict.Data().Buffers()[0]}, - {Buf: dict.Data().Buffers()[1].Bytes(), Owner: dict.Data().Buffers()[1]}, - {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}, - }, - }}, - }, - }, - {"fixed size list", - scalar.NewFixedSizeListScalar(dict), - exec.ArraySpan{ - Type: arrow.FixedSizeListOf(2, arrow.BinaryTypes.String), - Len: 1, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0x1}}, - {}, {}, - }, - Children: []exec.ArraySpan{{ - Type: arrow.BinaryTypes.String, - Len: 2, - Buffers: [3]exec.BufferSpan{ - {Buf: dict.NullBitmapBytes(), Owner: dict.Data().Buffers()[0]}, - {Buf: dict.Data().Buffers()[1].Bytes(), Owner: dict.Data().Buffers()[1]}, - {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}, - }, - }}, - }, - }, - {"struct scalar", - func() scalar.Scalar { - s, _ := scalar.NewStructScalarWithNames([]scalar.Scalar{ - scalar.MakeScalar(int32(5)), scalar.MakeScalar(uint8(10)), - }, []string{"int32", "uint8"}) - return s - }(), - exec.ArraySpan{ - Type: arrow.StructOf( - arrow.Field{Name: "int32", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, - arrow.Field{Name: "uint8", Type: arrow.PrimitiveTypes.Uint8, Nullable: true}), - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0x1}}, {}, {}, - }, - Len: 1, - Children: []exec.ArraySpan{ - { - Type: arrow.PrimitiveTypes.Int32, - Len: 1, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0x1}}, - {Buf: arrow.Int32Traits.CastToBytes([]int32{5})}, - {}, - }, - }, - { - Type: arrow.PrimitiveTypes.Uint8, - Len: 1, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0x1}}, - {Buf: []byte{10}}, - {}, - }, - }, - }, - }, - }, - {"dense union scalar", - func() scalar.Scalar { - dt := arrow.UnionOf(arrow.DenseMode, []arrow.Field{ - {Name: "string", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, - {Name: "other_number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, - }, []arrow.UnionTypeCode{3, 42, 43}) - return scalar.NewDenseUnionScalar(scalar.MakeScalar(uint64(25)), 42, dt.(*arrow.DenseUnionType)) - }(), - exec.ArraySpan{ - Type: arrow.UnionOf(arrow.DenseMode, []arrow.Field{ - {Name: "string", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, - {Name: "other_number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, - }, []arrow.UnionTypeCode{3, 42, 43}), - Len: 1, - Scratch: [2]uint64{42, 1}, - Buffers: [3]exec.BufferSpan{{}, - {Buf: []byte{42}}, {Buf: arrow.Int32Traits.CastToBytes([]int32{0, 1})}, - }, - Children: []exec.ArraySpan{ - { - Type: arrow.BinaryTypes.String, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{}}, {Buf: []byte{}}, {Buf: []byte{}}, - }, - }, - { - Type: arrow.PrimitiveTypes.Uint64, - Len: 1, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0x1}}, - {Buf: arrow.Uint64Traits.CastToBytes([]uint64{25})}, - {}, - }, - }, - { - Type: arrow.PrimitiveTypes.Uint64, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{}}, {Buf: []byte{}}, {}, - }, - }, - }, - }, - }, - {"sparse union", - func() scalar.Scalar { - dt := arrow.UnionOf(arrow.SparseMode, []arrow.Field{ - {Name: "string", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, - {Name: "other_number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, - }, []arrow.UnionTypeCode{3, 42, 43}) - return scalar.NewSparseUnionScalarFromValue(scalar.MakeScalar(uint64(25)), 1, dt.(*arrow.SparseUnionType)) - }(), - exec.ArraySpan{ - Type: arrow.UnionOf(arrow.SparseMode, []arrow.Field{ - {Name: "string", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, - {Name: "other_number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, - }, []arrow.UnionTypeCode{3, 42, 43}), - Len: 1, - Scratch: [2]uint64{42, 0}, - Buffers: [3]exec.BufferSpan{{}, - {Buf: []byte{42}}, {}, - }, - Children: []exec.ArraySpan{ - { - Type: arrow.BinaryTypes.String, - Len: 1, - Nulls: 1, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0x0}}, - {Buf: []byte{0, 0, 0, 0, 0, 0, 0, 0}}, - {}, - }, - }, - { - Type: arrow.PrimitiveTypes.Uint64, - Len: 1, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0x1}}, - {Buf: arrow.Uint64Traits.CastToBytes([]uint64{25})}, - {}, - }, - }, - { - Type: arrow.PrimitiveTypes.Uint64, - Len: 1, - Nulls: 1, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0x0}}, {Buf: []byte{0, 0, 0, 0, 0, 0, 0, 0}}, {}, - }, - }, - }, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - a := &exec.ArraySpan{ - Nulls: array.UnknownNullCount, - Buffers: [3]exec.BufferSpan{{SelfAlloc: true, Owner: &memory.Buffer{}}, {SelfAlloc: true, Owner: &memory.Buffer{}}, {}}, - } - a.FillFromScalar(tt.args) - assert.Equal(t, tt.exp, *a) - }) - } -} diff --git a/go/arrow/compute/exec/utils.go b/go/arrow/compute/exec/utils.go deleted file mode 100644 index 832f93f13165d..0000000000000 --- a/go/arrow/compute/exec/utils.go +++ /dev/null @@ -1,276 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package exec - -import ( - "fmt" - "math" - "sync/atomic" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/memory" - "golang.org/x/exp/constraints" - "golang.org/x/exp/slices" -) - -// GetSpanValues returns a properly typed slice by reinterpreting -// the buffer at index i using unsafe.Slice. This will take into account -// the offset of the given ArraySpan. -func GetSpanValues[T arrow.FixedWidthType](span *ArraySpan, i int) []T { - if len(span.Buffers[i].Buf) == 0 { - return nil - } - ret := unsafe.Slice((*T)(unsafe.Pointer(&span.Buffers[i].Buf[0])), span.Offset+span.Len) - return ret[span.Offset:] -} - -// GetSpanOffsets is like GetSpanValues, except it is only for int32 -// or int64 and adds the additional 1 expected value for an offset -// buffer (ie. len(output) == span.Len+1) -func GetSpanOffsets[T int32 | int64](span *ArraySpan, i int) []T { - ret := unsafe.Slice((*T)(unsafe.Pointer(&span.Buffers[i].Buf[0])), span.Offset+span.Len+1) - return ret[span.Offset:] -} - -func Min[T constraints.Ordered](a, b T) T { - if a < b { - return a - } - return b -} - -func Max[T constraints.Ordered](a, b T) T { - if a > b { - return a - } - return b -} - -// OptionsInit should be used in the case where a KernelState is simply -// represented with a specific type by value (instead of pointer). -// This will initialize the KernelState as a value-copied instance of -// the passed in function options argument to ensure separation -// and allow the kernel to manipulate the options if necessary without -// any negative consequences since it will have its own copy of the options. -func OptionsInit[T any](_ *KernelCtx, args KernelInitArgs) (KernelState, error) { - if opts, ok := args.Options.(*T); ok { - return *opts, nil - } - - return nil, fmt.Errorf("%w: attempted to initialize kernel state from invalid function options", - arrow.ErrInvalid) -} - -type arrayBuilder[T arrow.NumericType | bool] interface { - array.Builder - Append(T) - AppendValues([]T, []bool) -} - -func ArrayFromSlice[T arrow.NumericType | bool](mem memory.Allocator, data []T) arrow.Array { - bldr := array.NewBuilder(mem, arrow.GetDataType[T]()).(arrayBuilder[T]) - defer bldr.Release() - - bldr.AppendValues(data, nil) - return bldr.NewArray() -} - -func ArrayFromSliceWithValid[T arrow.NumericType | bool](mem memory.Allocator, data []T, valid []bool) arrow.Array { - bldr := array.NewBuilder(mem, arrow.GetDataType[T]()).(arrayBuilder[T]) - defer bldr.Release() - - bldr.AppendValues(data, valid) - return bldr.NewArray() -} - -func RechunkArraysConsistently(groups [][]arrow.Array) [][]arrow.Array { - if len(groups) <= 1 { - return groups - } - - var totalLen int - for _, a := range groups[0] { - totalLen += a.Len() - } - - if totalLen == 0 { - return groups - } - - rechunked := make([][]arrow.Array, len(groups)) - offsets := make([]int64, len(groups)) - // scan all array vectors at once, rechunking along the way - var start int64 - for start < int64(totalLen) { - // first compute max possible length for next chunk - var chunkLength int64 = math.MaxInt64 - for i, g := range groups { - offset := offsets[i] - // skip any done arrays including 0-length - for offset == int64(g[0].Len()) { - g = g[1:] - offset = 0 - } - arr := g[0] - chunkLength = Min(chunkLength, int64(arr.Len())-offset) - - offsets[i] = offset - groups[i] = g - } - - // now slice all the arrays along this chunk size - for i, g := range groups { - offset := offsets[i] - arr := g[0] - if offset == 0 && int64(arr.Len()) == chunkLength { - // slice spans entire array - arr.Retain() - rechunked[i] = append(rechunked[i], arr) - } else { - rechunked[i] = append(rechunked[i], array.NewSlice(arr, int64(offset), int64(offset+chunkLength))) - } - offsets[i] += chunkLength - } - - start += int64(chunkLength) - } - return rechunked -} - -type ChunkResolver struct { - offsets []int64 - cached int64 -} - -func NewChunkResolver(chunks []arrow.Array) *ChunkResolver { - offsets := make([]int64, len(chunks)+1) - var offset int64 - for i, c := range chunks { - curOffset := offset - offset += int64(c.Len()) - offsets[i] = curOffset - } - offsets[len(chunks)] = offset - return &ChunkResolver{offsets: offsets} -} - -func (c *ChunkResolver) Resolve(idx int64) (chunk, index int64) { - // some algorithms consecutively access indexes that are a - // relatively small distance from each other, falling into - // the same chunk. - // This is trivial when merging (assuming each side of the - // merge uses its own resolver), but also in the inner - // recursive invocations of partitioning. - if len(c.offsets) <= 1 { - return 0, idx - } - - cached := atomic.LoadInt64(&c.cached) - cacheHit := idx >= c.offsets[cached] && idx < c.offsets[cached+1] - if cacheHit { - return cached, idx - c.offsets[cached] - } - - chkIdx, found := slices.BinarySearch(c.offsets, idx) - if !found { - chkIdx-- - } - - chunk, index = int64(chkIdx), idx-c.offsets[chkIdx] - atomic.StoreInt64(&c.cached, chunk) - return -} - -type arrayTypes interface { - arrow.FixedWidthType | arrow.TemporalType | bool | string | []byte -} - -type ArrayIter[T arrayTypes] interface { - Next() T -} - -type BoolIter struct { - Rdr *bitutil.BitmapReader -} - -func NewBoolIter(arr *ArraySpan) ArrayIter[bool] { - return &BoolIter{ - Rdr: bitutil.NewBitmapReader(arr.Buffers[1].Buf, int(arr.Offset), int(arr.Len))} -} - -func (b *BoolIter) Next() (out bool) { - out = b.Rdr.Set() - b.Rdr.Next() - return -} - -type PrimitiveIter[T arrow.FixedWidthType] struct { - Values []T -} - -func NewPrimitiveIter[T arrow.FixedWidthType](arr *ArraySpan) ArrayIter[T] { - return &PrimitiveIter[T]{Values: GetSpanValues[T](arr, 1)} -} - -func (p *PrimitiveIter[T]) Next() (v T) { - v = p.Values[0] - p.Values = p.Values[1:] - return -} - -type VarBinaryIter[OffsetT int32 | int64] struct { - Offsets []OffsetT - Data []byte - Pos int64 -} - -func NewVarBinaryIter[OffsetT int32 | int64](arr *ArraySpan) ArrayIter[[]byte] { - return &VarBinaryIter[OffsetT]{ - Offsets: GetSpanOffsets[OffsetT](arr, 1), - Data: arr.Buffers[2].Buf, - } -} - -func (v *VarBinaryIter[OffsetT]) Next() []byte { - cur := v.Pos - v.Pos++ - return v.Data[v.Offsets[cur]:v.Offsets[v.Pos]] -} - -type FSBIter struct { - Data []byte - Width int - Pos int64 -} - -func NewFSBIter(arr *ArraySpan) ArrayIter[[]byte] { - return &FSBIter{ - Data: arr.Buffers[1].Buf, - Width: arr.Type.(arrow.FixedWidthDataType).Bytes(), - } -} - -func (f *FSBIter) Next() []byte { - start := f.Width * int(f.Pos) - f.Pos++ - return f.Data[start : start+f.Width] -} diff --git a/go/arrow/compute/exec/utils_test.go b/go/arrow/compute/exec/utils_test.go deleted file mode 100644 index b8b7212b538c5..0000000000000 --- a/go/arrow/compute/exec/utils_test.go +++ /dev/null @@ -1,111 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package exec_test - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/compute/exec" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestRechunkConsistentArraysTrivial(t *testing.T) { - var groups [][]arrow.Array - rechunked := exec.RechunkArraysConsistently(groups) - assert.Zero(t, rechunked) - - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - a1 := exec.ArrayFromSlice(mem, []int16{}) - defer a1.Release() - a2 := exec.ArrayFromSlice(mem, []int16{}) - defer a2.Release() - b1 := exec.ArrayFromSlice(mem, []int32{}) - defer b1.Release() - groups = [][]arrow.Array{{a1, a2}, {}, {b1}} - rechunked = exec.RechunkArraysConsistently(groups) - assert.Len(t, rechunked, 3) - - for _, arrvec := range rechunked { - for _, arr := range arrvec { - assert.Zero(t, arr.Len()) - } - } -} - -func assertEqual[T arrow.NumericType](t *testing.T, mem memory.Allocator, arr arrow.Array, data []T) { - exp := exec.ArrayFromSlice(mem, data) - defer exp.Release() - assert.Truef(t, array.Equal(exp, arr), "expected: %s\ngot: %s", exp, arr) -} - -func TestRechunkArraysConsistentlyPlain(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - a1 := exec.ArrayFromSlice(mem, []int16{1, 2, 3}) - defer a1.Release() - a2 := exec.ArrayFromSlice(mem, []int16{4, 5}) - defer a2.Release() - a3 := exec.ArrayFromSlice(mem, []int16{6, 7, 8, 9}) - defer a3.Release() - - b1 := exec.ArrayFromSlice(mem, []int32{41, 42}) - defer b1.Release() - b2 := exec.ArrayFromSlice(mem, []int32{43, 44, 45}) - defer b2.Release() - b3 := exec.ArrayFromSlice(mem, []int32{46, 47}) - defer b3.Release() - b4 := exec.ArrayFromSlice(mem, []int32{48, 49}) - defer b4.Release() - - groups := [][]arrow.Array{{a1, a2, a3}, {b1, b2, b3, b4}} - rechunked := exec.RechunkArraysConsistently(groups) - assert.Len(t, rechunked, 2) - ra := rechunked[0] - rb := rechunked[1] - - assert.Len(t, ra, 5) - assertEqual(t, mem, ra[0], []int16{1, 2}) - ra[0].Release() - assertEqual(t, mem, ra[1], []int16{3}) - ra[1].Release() - assertEqual(t, mem, ra[2], []int16{4, 5}) - ra[2].Release() - assertEqual(t, mem, ra[3], []int16{6, 7}) - ra[3].Release() - assertEqual(t, mem, ra[4], []int16{8, 9}) - ra[4].Release() - - assert.Len(t, rb, 5) - assertEqual(t, mem, rb[0], []int32{41, 42}) - rb[0].Release() - assertEqual(t, mem, rb[1], []int32{43}) - rb[1].Release() - assertEqual(t, mem, rb[2], []int32{44, 45}) - rb[2].Release() - assertEqual(t, mem, rb[3], []int32{46, 47}) - rb[3].Release() - assertEqual(t, mem, rb[4], []int32{48, 49}) - rb[4].Release() -} diff --git a/go/arrow/compute/exec_internals_test.go b/go/arrow/compute/exec_internals_test.go deleted file mode 100644 index f0c585f557ebc..0000000000000 --- a/go/arrow/compute/exec_internals_test.go +++ /dev/null @@ -1,585 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package compute - -import ( - "bytes" - "context" - "fmt" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/compute/exec" - "github.com/apache/arrow/go/v18/arrow/internal/testing/gen" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/arrow/scalar" - "github.com/stretchr/testify/suite" -) - -type ComputeInternalsTestSuite struct { - suite.Suite - - mem *memory.CheckedAllocator - - execCtx ExecCtx - ctx *exec.KernelCtx - rng gen.RandomArrayGenerator -} - -func (c *ComputeInternalsTestSuite) SetupTest() { - c.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) - c.rng = gen.NewRandomArrayGenerator(0, c.mem) - - c.resetCtx() -} - -func (c *ComputeInternalsTestSuite) TearDownTest() { - c.mem.AssertSize(c.T(), 0) -} - -func (c *ComputeInternalsTestSuite) assertArrayEqual(expected, got arrow.Array) { - c.Truef(array.Equal(expected, got), "expected: %s\ngot: %s", expected, got) -} - -func (c *ComputeInternalsTestSuite) assertDatumEqual(expected arrow.Array, got Datum) { - arr := got.(*ArrayDatum).MakeArray() - defer arr.Release() - c.Truef(array.Equal(expected, arr), "expected: %s\ngot: %s", expected, arr) -} - -func (c *ComputeInternalsTestSuite) resetCtx() { - c.execCtx = ExecCtx{Registry: GetFunctionRegistry(), - ChunkSize: DefaultMaxChunkSize, PreallocContiguous: true} - c.ctx = &exec.KernelCtx{Ctx: SetExecCtx(context.Background(), c.execCtx)} -} - -func (c *ComputeInternalsTestSuite) getBoolArr(sz int64, trueprob, nullprob float64) arrow.Array { - return c.rng.Boolean(sz, trueprob, nullprob) -} - -func (c *ComputeInternalsTestSuite) getUint8Arr(sz int64, nullprob float64) arrow.Array { - return c.rng.Uint8(sz, 0, 100, nullprob) -} - -func (c *ComputeInternalsTestSuite) getInt32Arr(sz int64, nullprob float64) arrow.Array { - return c.rng.Int32(sz, 0, 1000, nullprob) -} - -func (c *ComputeInternalsTestSuite) getFloat64Arr(sz int64, nullprob float64) arrow.Array { - return c.rng.Float64(sz, 0, 1000, nullprob) -} - -func (c *ComputeInternalsTestSuite) getInt32Chunked(szs []int64) *arrow.Chunked { - chunks := make([]arrow.Array, 0) - for i, s := range szs { - chunks = append(chunks, c.getInt32Arr(s, 0.1)) - defer chunks[i].Release() - } - return arrow.NewChunked(arrow.PrimitiveTypes.Int32, chunks) -} - -func (c *ComputeInternalsTestSuite) assertValidityZeroExtraBits(data []byte, length, offset int) { - bitExtent := ((offset + length + 7) / 8) * 8 - for i := offset + length; i < bitExtent; i++ { - c.False(bitutil.BitIsSet(data, i)) - } -} - -type PropagateNullsSuite struct { - ComputeInternalsTestSuite -} - -func (p *PropagateNullsSuite) TestUnknownNullCountWithNullsZeroCopies() { - const length int = 16 - bitmap := [8]byte{254, 0, 0, 0, 0, 0, 0, 0} - nulls := memory.NewBufferBytes(bitmap[:]) - - output := array.NewData(arrow.FixedWidthTypes.Boolean, length, []*memory.Buffer{nil, nil}, nil, 0, 0) - input := array.NewData(arrow.FixedWidthTypes.Boolean, length, []*memory.Buffer{nulls, nil}, nil, array.UnknownNullCount, 0) - - var outSpan exec.ArraySpan - outSpan.SetMembers(output) - batch := ExecBatch{Values: []Datum{NewDatum(input)}, Len: int64(length)} - p.NoError(propagateNulls(p.ctx, ExecSpanFromBatch(&batch), &outSpan)) - p.Same(nulls, outSpan.Buffers[0].Owner) - p.EqualValues(array.UnknownNullCount, outSpan.Nulls) - p.Equal(9, int(outSpan.Len)-bitutil.CountSetBits(outSpan.Buffers[0].Buf, int(outSpan.Offset), int(outSpan.Len))) -} - -func (p *PropagateNullsSuite) TestUnknownNullCountWithoutNulls() { - const length int = 16 - bitmap := [8]byte{255, 255, 0, 0, 0, 0, 0, 0} - nulls := memory.NewBufferBytes(bitmap[:]) - - output := array.NewData(arrow.FixedWidthTypes.Boolean, length, []*memory.Buffer{nil, nil}, nil, 0, 0) - input := array.NewData(arrow.FixedWidthTypes.Boolean, length, []*memory.Buffer{nulls, nil}, nil, array.UnknownNullCount, 0) - - var outSpan exec.ArraySpan - outSpan.SetMembers(output) - batch := ExecBatch{Values: []Datum{NewDatum(input)}, Len: int64(length)} - p.NoError(propagateNulls(p.ctx, ExecSpanFromBatch(&batch), &outSpan)) - p.EqualValues(-1, outSpan.Nulls) - p.Same(nulls, outSpan.Buffers[0].Owner) -} - -func (p *PropagateNullsSuite) TestSetAllNulls() { - const length int = 16 - checkSetAll := func(vals []Datum, prealloc bool) { - // fresh bitmap with all 1s - bitmapData := [2]byte{255, 255} - preallocatedMem := memory.NewBufferBytes(bitmapData[:]) - - output := &exec.ArraySpan{ - Type: arrow.FixedWidthTypes.Boolean, - Len: int64(length), - Nulls: array.UnknownNullCount, - } - - if prealloc { - output.Buffers[0].SetBuffer(preallocatedMem) - } - - batch := &ExecBatch{Values: vals, Len: int64(length)} - p.NoError(propagateNulls(p.ctx, ExecSpanFromBatch(batch), output)) - - if prealloc { - // ensure that the buffer object is the same when we pass preallocated - // memory to it - p.Same(preallocatedMem, output.Buffers[0].Owner) - } else { - defer output.Buffers[0].Owner.Release() - } - - p.NotNil(output.Buffers[0].Buf) - expected := [2]byte{0, 0} - p.True(bytes.Equal(expected[:], output.Buffers[0].Buf)) - } - - var vals []Datum - const trueProb float64 = 0.5 - p.Run("Null Scalar", func() { - i32Val := scalar.MakeScalar(int32(3)) - vals = []Datum{NewDatum(i32Val), NewDatum(scalar.MakeNullScalar(arrow.FixedWidthTypes.Boolean))} - checkSetAll(vals, true) - checkSetAll(vals, false) - - arr := p.getBoolArr(int64(length), trueProb, 0) - defer arr.Release() - vals[0] = NewDatum(arr) - defer vals[0].Release() - checkSetAll(vals, true) - checkSetAll(vals, false) - }) - - p.Run("one all null", func() { - arrAllNulls := p.getBoolArr(int64(length), trueProb, 1) - defer arrAllNulls.Release() - arrHalf := p.getBoolArr(int64(length), trueProb, 0.5) - defer arrHalf.Release() - vals = []Datum{NewDatum(arrHalf), NewDatum(arrAllNulls)} - defer vals[0].Release() - defer vals[1].Release() - - checkSetAll(vals, true) - checkSetAll(vals, false) - }) - - p.Run("one value is NullType", func() { - nullarr := array.NewNull(length) - arr := p.getBoolArr(int64(length), trueProb, 0) - defer arr.Release() - vals = []Datum{NewDatum(arr), NewDatum(nullarr)} - defer vals[0].Release() - checkSetAll(vals, true) - checkSetAll(vals, false) - }) - - p.Run("Other scenarios", func() { - // an all-null bitmap is zero-copied over, even though - // there is a null-scalar earlier in the batch - outSpan := &exec.ArraySpan{ - Type: arrow.FixedWidthTypes.Boolean, - Len: int64(length), - } - arrAllNulls := p.getBoolArr(int64(length), trueProb, 1) - defer arrAllNulls.Release() - - batch := &ExecBatch{ - Values: []Datum{ - NewDatum(scalar.MakeNullScalar(arrow.FixedWidthTypes.Boolean)), - NewDatum(arrAllNulls), - }, - Len: int64(length), - } - defer batch.Values[1].Release() - - p.NoError(propagateNulls(p.ctx, ExecSpanFromBatch(batch), outSpan)) - p.Same(arrAllNulls.Data().Buffers()[0], outSpan.Buffers[0].Owner) - outSpan.Buffers[0].Owner.Release() - }) -} - -func (p *PropagateNullsSuite) TestSingleValueWithNulls() { - const length int64 = 100 - arr := p.getBoolArr(length, 0.5, 0.5) - defer arr.Release() - - checkSliced := func(offset int64, prealloc bool, outOffset int64) { - // unaligned bitmap, zero copy not possible - sliced := array.NewSlice(arr, offset, int64(arr.Len())) - defer sliced.Release() - vals := []Datum{NewDatum(sliced)} - defer vals[0].Release() - - output := &exec.ArraySpan{ - Type: arrow.FixedWidthTypes.Boolean, - Len: vals[0].Len(), - Offset: outOffset, - } - - batch := &ExecBatch{Values: vals, Len: vals[0].Len()} - - var preallocatedBitmap *memory.Buffer - if prealloc { - preallocatedBitmap = memory.NewResizableBuffer(p.mem) - preallocatedBitmap.Resize(int(bitutil.BytesForBits(int64(sliced.Len()) + outOffset))) - defer preallocatedBitmap.Release() - output.Buffers[0].SetBuffer(preallocatedBitmap) - output.Buffers[0].SelfAlloc = true - } else { - p.EqualValues(0, output.Offset) - } - - p.NoError(propagateNulls(p.ctx, ExecSpanFromBatch(batch), output)) - if !prealloc { - parentBuf := arr.Data().Buffers()[0] - if offset == 0 { - // validity bitmap same, no slice - p.Same(parentBuf, output.Buffers[0].Owner) - } else if offset%8 == 0 { - // validity bitmap sliced - p.NotSame(parentBuf, output.Buffers[0].Owner) - p.Same(parentBuf, output.Buffers[0].Owner.Parent()) - defer output.Buffers[0].Owner.Release() - } else { - // new memory for offset not 0 mod 8 - p.NotSame(parentBuf, output.Buffers[0].Owner) - p.Nil(output.Buffers[0].Owner.Parent()) - defer output.Buffers[0].Owner.Release() - } - } else { - // preallocated, so check that the validity bitmap is unbothered - p.Same(preallocatedBitmap, output.Buffers[0].Owner) - } - - p.EqualValues(sliced.NullN(), output.UpdateNullCount()) - p.True(bitutil.BitmapEquals( - sliced.NullBitmapBytes(), output.Buffers[0].Buf, - int64(sliced.Data().Offset()), output.Offset, output.Len)) - p.assertValidityZeroExtraBits(output.Buffers[0].Buf, int(output.Len), int(output.Offset)) - } - - tests := []struct { - offset, outoffset int64 - prealloc bool - }{ - {8, 0, false}, - {7, 0, false}, - {8, 0, true}, - {7, 0, true}, - {8, 4, true}, - {7, 4, true}, - } - - for _, tt := range tests { - name := fmt.Sprintf("off=%d,prealloc=%t,outoff=%d", tt.offset, tt.prealloc, tt.outoffset) - p.Run(name, func() { - checkSliced(tt.offset, tt.prealloc, tt.outoffset) - }) - } -} - -func (p *PropagateNullsSuite) TestIntersectsNulls() { - const length = 16 - var ( - // 0b01111111 0b11001111 - bitmap1 = [8]byte{127, 207, 0, 0, 0, 0, 0, 0} - // 0b11111110 0b01111111 - bitmap2 = [8]byte{254, 127, 0, 0, 0, 0, 0, 0} - // 0b11101111 0b11111110 - bitmap3 = [8]byte{239, 254, 0, 0, 0, 0, 0, 0} - ) - - arr1 := array.NewData(arrow.FixedWidthTypes.Boolean, length, - []*memory.Buffer{memory.NewBufferBytes(bitmap1[:]), nil}, nil, array.UnknownNullCount, 0) - arr2 := array.NewData(arrow.FixedWidthTypes.Boolean, length, - []*memory.Buffer{memory.NewBufferBytes(bitmap2[:]), nil}, nil, array.UnknownNullCount, 0) - arr3 := array.NewData(arrow.FixedWidthTypes.Boolean, length, - []*memory.Buffer{memory.NewBufferBytes(bitmap3[:]), nil}, nil, array.UnknownNullCount, 0) - - checkCase := func(vals []Datum, exNullCount int, exBitmap []byte, prealloc bool, outoffset int) { - batch := &ExecBatch{Values: vals, Len: length} - - output := &exec.ArraySpan{Type: arrow.FixedWidthTypes.Boolean, Len: length} - - var nulls *memory.Buffer - if prealloc { - // make the buffer one byte bigger so we can have non-zero offsets - nulls = memory.NewResizableBuffer(p.mem) - nulls.Resize(3) - defer nulls.Release() - output.Buffers[0].SetBuffer(nulls) - output.Buffers[0].SelfAlloc = true - } else { - // non-zero output offset not permitted unless output memory is preallocated - p.Equal(0, outoffset) - } - - output.Offset = int64(outoffset) - - p.NoError(propagateNulls(p.ctx, ExecSpanFromBatch(batch), output)) - - // preallocated memory used - if prealloc { - p.Same(nulls, output.Buffers[0].Owner) - } else { - defer output.Buffers[0].Owner.Release() - } - - p.EqualValues(array.UnknownNullCount, output.Nulls) - p.EqualValues(exNullCount, output.UpdateNullCount()) - - p.True(bitutil.BitmapEquals(exBitmap, output.Buffers[0].Buf, 0, output.Offset, length)) - p.assertValidityZeroExtraBits(output.Buffers[0].Buf, int(output.Len), int(output.Offset)) - } - - p.Run("0b01101110 0b01001110", func() { - // 0b01101110 0b01001110 - expected := [2]byte{110, 78} - checkCase([]Datum{NewDatum(arr1), NewDatum(arr2), NewDatum(arr3)}, 7, expected[:], false, 0) - checkCase([]Datum{NewDatum(arr1), NewDatum(arr2), NewDatum(arr3)}, 7, expected[:], true, 0) - checkCase([]Datum{NewDatum(arr1), NewDatum(arr2), NewDatum(arr3)}, 7, expected[:], true, 4) - }) - - p.Run("0b01111110 0b01001111", func() { - expected := [2]byte{126, 79} - checkCase([]Datum{NewDatum(arr1), NewDatum(arr2)}, 5, expected[:], false, 0) - checkCase([]Datum{NewDatum(arr1), NewDatum(arr2)}, 5, expected[:], true, 4) - }) -} - -func TestComputeInternals(t *testing.T) { - suite.Run(t, new(PropagateNullsSuite)) -} - -type ExecSpanItrSuite struct { - ComputeInternalsTestSuite - - iter spanIterator -} - -func (e *ExecSpanItrSuite) setupIterator(batch *ExecBatch, maxChunk int64) { - var err error - _, e.iter, err = iterateExecSpans(batch, maxChunk, true) - e.NoError(err) -} - -func (e *ExecSpanItrSuite) checkIteration(input *ExecBatch, chunksize int, exBatchSizes []int) { - e.setupIterator(input, int64(chunksize)) - var ( - batch exec.ExecSpan - curPos int64 - pos int64 - next bool - ) - - for _, sz := range exBatchSizes { - batch, pos, next = e.iter() - e.True(next) - e.EqualValues(sz, batch.Len) - - for j, val := range input.Values { - switch val := val.(type) { - case *ScalarDatum: - e.Truef(scalar.Equals(batch.Values[j].Scalar, val.Value), "expected: %s\ngot: %s", val.Value, batch.Values[j].Scalar) - case *ArrayDatum: - arr := val.MakeArray() - sl := array.NewSlice(arr, curPos, curPos+batch.Len) - got := batch.Values[j].Array.MakeArray() - - e.Truef(array.Equal(sl, got), "expected: %s\ngot: %s", sl, got) - - got.Release() - arr.Release() - sl.Release() - case *ChunkedDatum: - carr := val.Value - if batch.Len == 0 { - e.Zero(carr.Len()) - } else { - chkd := array.NewChunkedSlice(carr, curPos, curPos+batch.Len) - defer chkd.Release() - e.Len(chkd.Chunks(), 1) - got := batch.Values[j].Array.MakeArray() - defer got.Release() - e.Truef(array.Equal(got, chkd.Chunk(0)), "expected: %s\ngot: %s", chkd.Chunk(0), got) - } - } - } - - curPos += int64(sz) - e.EqualValues(curPos, pos) - } - - batch, pos, next = e.iter() - e.Zero(batch) - e.False(next) - e.EqualValues(input.Len, pos) -} - -func (e *ExecSpanItrSuite) TestBasics() { - const length = 100 - - arr1 := e.getInt32Arr(length, 0.1) - defer arr1.Release() - arr2 := e.getFloat64Arr(length, 0.1) - defer arr2.Release() - - input := &ExecBatch{ - Len: length, - Values: []Datum{NewDatum(arr1), NewDatum(arr2), NewDatum(int32(3))}, - } - defer func() { - for _, v := range input.Values { - v.Release() - } - }() - - e.Run("simple", func() { - e.setupIterator(input, DefaultMaxChunkSize) - - batch, pos, next := e.iter() - e.True(next) - e.Len(batch.Values, 3) - e.EqualValues(length, batch.Len) - e.EqualValues(length, pos) - - in1 := input.Values[0].(*ArrayDatum).MakeArray() - defer in1.Release() - in2 := input.Values[1].(*ArrayDatum).MakeArray() - defer in2.Release() - out1 := batch.Values[0].Array.MakeArray() - defer out1.Release() - out2 := batch.Values[1].Array.MakeArray() - defer out2.Release() - - e.Truef(array.Equal(in1, out1), "expected: %s\ngot: %s", in1, out1) - e.Truef(array.Equal(in2, out2), "expected: %s\ngot: %s", in2, out2) - e.True(scalar.Equals(input.Values[2].(*ScalarDatum).Value, batch.Values[2].Scalar), input.Values[2].(*ScalarDatum).Value, batch.Values[2].Scalar) - - _, pos, next = e.iter() - e.EqualValues(length, pos) - e.False(next) - }) - - e.Run("iterations", func() { - e.checkIteration(input, 16, []int{16, 16, 16, 16, 16, 16, 4}) - }) -} - -func (e *ExecSpanItrSuite) TestInputValidation() { - arr1 := e.getInt32Arr(10, 0.1) - defer arr1.Release() - arr2 := e.getInt32Arr(9, 0.1) - defer arr2.Release() - - // length mismatch - batch := &ExecBatch{ - Values: []Datum{&ArrayDatum{arr1.Data()}, &ArrayDatum{arr2.Data()}}, - Len: 10, - } - - _, _, err := iterateExecSpans(batch, DefaultMaxChunkSize, true) - e.ErrorIs(err, arrow.ErrInvalid) - - // swap order of input - batch.Values = []Datum{&ArrayDatum{arr2.Data()}, &ArrayDatum{arr1.Data()}} - - _, _, err = iterateExecSpans(batch, DefaultMaxChunkSize, true) - e.ErrorIs(err, arrow.ErrInvalid) - - batch.Values = []Datum{&ArrayDatum{arr1.Data()}} - _, _, err = iterateExecSpans(batch, DefaultMaxChunkSize, true) - e.NoError(err) -} - -func (e *ExecSpanItrSuite) TestChunkedArrays() { - arr1 := e.getInt32Chunked([]int64{0, 20, 10}) - defer arr1.Release() - arr2 := e.getInt32Chunked([]int64{15, 15}) - defer arr2.Release() - arr3 := e.getInt32Arr(30, 0.1) - defer arr3.Release() - - batch := &ExecBatch{ - Values: []Datum{ - &ChunkedDatum{arr1}, &ChunkedDatum{arr2}, &ArrayDatum{arr3.Data()}, - NewDatum(int32(5)), NewDatum(scalar.MakeNullScalar(arrow.FixedWidthTypes.Boolean))}, - Len: 30, - } - - e.checkIteration(batch, 10, []int{10, 5, 5, 10}) - e.checkIteration(batch, 20, []int{15, 5, 10}) - e.checkIteration(batch, 30, []int{15, 5, 10}) -} - -func (e *ExecSpanItrSuite) TestZeroLengthInput() { - carr := arrow.NewChunked(arrow.PrimitiveTypes.Int32, []arrow.Array{}) - checkArgs := func(batch *ExecBatch) { - _, itr, err := iterateExecSpans(batch, DefaultMaxChunkSize, true) - e.NoError(err) - itrSpan, _, next := itr() - - e.False(next) - e.Zero(itrSpan) - } - - input := &ExecBatch{Len: 0} - - // zero-length chunkedarray with zero chunks - input.Values = []Datum{&ChunkedDatum{carr}} - checkArgs(input) - - // zero-length array - arr := e.getInt32Arr(0, 0.1) - defer arr.Release() - input.Values = []Datum{&ArrayDatum{arr.Data()}} - checkArgs(input) - - // chunkedarray with single empty chunk - carr = e.getInt32Chunked([]int64{0}) - input.Values = []Datum{&ChunkedDatum{carr}} - checkArgs(input) -} - -func TestExecSpanIterator(t *testing.T) { - suite.Run(t, new(ExecSpanItrSuite)) -} diff --git a/go/arrow/compute/exec_test.go b/go/arrow/compute/exec_test.go deleted file mode 100644 index 27f6676f3187c..0000000000000 --- a/go/arrow/compute/exec_test.go +++ /dev/null @@ -1,379 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package compute - -import ( - "strings" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/compute/exec" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/scalar" - "github.com/stretchr/testify/suite" -) - -func ExecCopyArray(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { - debug.Assert(len(batch.Values) == 1, "wrong number of values") - valueSize := int64(batch.Values[0].Type().(arrow.FixedWidthDataType).BitWidth() / 8) - - arg0 := batch.Values[0].Array - dst := out.Buffers[1].Buf[out.Offset*valueSize:] - src := arg0.Buffers[1].Buf[arg0.Offset*valueSize:] - copy(dst, src[:batch.Len*valueSize]) - return nil -} - -func ExecComputedBitmap(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { - // propagate nulls not used. check that out bitmap isn't the same already - // as the input bitmap - arg0 := batch.Values[0].Array - if bitutil.CountSetBits(arg0.Buffers[1].Buf, int(arg0.Offset), int(batch.Len)) > 0 { - // check that the bitmap hasn't already been copied - debug.Assert(!bitutil.BitmapEquals(arg0.Buffers[0].Buf, out.Buffers[0].Buf, - arg0.Offset, out.Offset, batch.Len), "bitmap should not have already been copied") - } - - bitutil.CopyBitmap(arg0.Buffers[0].Buf, int(arg0.Offset), int(batch.Len), out.Buffers[0].Buf, int(out.Offset)) - return ExecCopyArray(ctx, batch, out) -} - -func ExecNoPreallocatedData(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { - // validity preallocated, not data - debug.Assert(out.Offset == 0, "invalid offset for non-prealloc") - valueSize := int64(batch.Values[0].Type().(arrow.FixedWidthDataType).BitWidth() / 8) - out.Buffers[1].SetBuffer(ctx.Allocate(int(out.Len * valueSize))) - out.Buffers[1].SelfAlloc = true - return ExecCopyArray(ctx, batch, out) -} - -func ExecNoPreallocatedAnything(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { - // neither validity nor data preallocated - debug.Assert(out.Offset == 0, "invalid offset for non-prealloc") - out.Buffers[0].SetBuffer(ctx.AllocateBitmap(out.Len)) - out.Buffers[0].SelfAlloc = true - arg0 := batch.Values[0].Array - bitutil.CopyBitmap(arg0.Buffers[0].Buf, int(arg0.Offset), int(batch.Len), out.Buffers[0].Buf, 0) - - // reuse kernel that allocates data - return ExecNoPreallocatedData(ctx, batch, out) -} - -type ExampleOptions struct { - Value scalar.Scalar -} - -func (e *ExampleOptions) TypeName() string { return "example" } - -type ExampleState struct { - Value scalar.Scalar -} - -func InitStateful(_ *exec.KernelCtx, args exec.KernelInitArgs) (exec.KernelState, error) { - value := args.Options.(*ExampleOptions).Value - return &ExampleState{Value: value}, nil -} - -func ExecStateful(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { - state := ctx.State.(*ExampleState) - multiplier := state.Value.(*scalar.Int32).Value - - arg0 := batch.Values[0].Array - arg0Data := exec.GetSpanValues[int32](&arg0, 1) - dst := exec.GetSpanValues[int32](out, 1) - for i, v := range arg0Data { - dst[i] = v * multiplier - } - return nil -} - -func ExecAddInt32(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { - left := exec.GetSpanValues[int32](&batch.Values[0].Array, 1) - right := exec.GetSpanValues[int32](&batch.Values[1].Array, 1) - outValues := exec.GetSpanValues[int32](out, 1) - for i := 0; i < int(batch.Len); i++ { - outValues[i] = left[i] + right[i] - } - return nil -} - -type CallScalarFuncSuite struct { - ComputeInternalsTestSuite -} - -func (c *CallScalarFuncSuite) addCopyFuncs() { - registry = GetFunctionRegistry() - - fn := NewScalarFunction("test_copy", Unary(), EmptyFuncDoc) - types := []arrow.DataType{arrow.PrimitiveTypes.Uint8, arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Float64} - for _, t := range types { - c.NoError(fn.AddNewKernel([]exec.InputType{exec.NewExactInput(t)}, - exec.NewOutputType(t), ExecCopyArray, nil)) - } - c.True(registry.AddFunction(fn, false)) - - // a version which doesn't want the executor to call propagatenulls - fn2 := NewScalarFunction("test_copy_computed_bitmap", Unary(), EmptyFuncDoc) - kernel := exec.NewScalarKernel([]exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Uint8)}, - exec.NewOutputType(arrow.PrimitiveTypes.Uint8), ExecComputedBitmap, nil) - kernel.NullHandling = exec.NullComputedPrealloc - c.NoError(fn2.AddKernel(kernel)) - c.True(registry.AddFunction(fn2, false)) -} - -func (c *CallScalarFuncSuite) addNoPreallocFuncs() { - registry = GetFunctionRegistry() - - // a function that allocates its own output memory. we have cases - // for both non-preallocated data and non-preallocated bitmap - f1 := NewScalarFunction("test_nopre_data", Unary(), EmptyFuncDoc) - f2 := NewScalarFunction("test_nopre_validity_or_data", Unary(), EmptyFuncDoc) - - kernel := exec.NewScalarKernel( - []exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Uint8)}, - exec.NewOutputType(arrow.PrimitiveTypes.Uint8), - ExecNoPreallocatedData, nil) - kernel.MemAlloc = exec.MemNoPrealloc - c.NoError(f1.AddKernel(kernel)) - - kernel.ExecFn = ExecNoPreallocatedAnything - kernel.NullHandling = exec.NullComputedNoPrealloc - c.NoError(f2.AddKernel(kernel)) - - c.True(registry.AddFunction(f1, false)) - c.True(registry.AddFunction(f2, false)) -} - -func (c *CallScalarFuncSuite) addStatefulFunc() { - registry := GetFunctionRegistry() - - // this functions behavior depends on a static parameter that - // is made available to the execution through its options object - fn := NewScalarFunction("test_stateful", Unary(), EmptyFuncDoc) - - c.NoError(fn.AddNewKernel([]exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Int32)}, - exec.NewOutputType(arrow.PrimitiveTypes.Int32), ExecStateful, InitStateful)) - - c.True(registry.AddFunction(fn, false)) -} - -func (c *CallScalarFuncSuite) addScalarFunc() { - registry := GetFunctionRegistry() - - fn := NewScalarFunction("test_scalar_add_int32", Binary(), EmptyFuncDoc) - c.NoError(fn.AddNewKernel([]exec.InputType{ - exec.NewExactInput(arrow.PrimitiveTypes.Int32), - exec.NewExactInput(arrow.PrimitiveTypes.Int32)}, - exec.NewOutputType(arrow.PrimitiveTypes.Int32), ExecAddInt32, nil)) - c.True(registry.AddFunction(fn, false)) -} - -func (c *CallScalarFuncSuite) SetupSuite() { - c.addCopyFuncs() - c.addNoPreallocFuncs() - c.addStatefulFunc() - c.addScalarFunc() -} - -func (c *CallScalarFuncSuite) TestArgumentValidation() { - // copy accepts only a single array arg - arr := c.getInt32Arr(10, 0.1) - defer arr.Release() - d1 := &ArrayDatum{Value: arr.Data()} - - c.Run("too many args", func() { - args := []Datum{d1, d1} - _, err := CallFunction(c.ctx.Ctx, "test_copy", nil, args...) - c.ErrorIs(err, arrow.ErrInvalid) - }) - - c.Run("too few args", func() { - _, err := CallFunction(c.ctx.Ctx, "test_copy", nil) - c.ErrorIs(err, arrow.ErrInvalid) - }) - - d1Scalar := NewDatum(int32(5)) - result, err := CallFunction(c.ctx.Ctx, "test_copy", nil, d1) - c.NoError(err) - result.Release() - result, err = CallFunction(c.ctx.Ctx, "test_copy", nil, d1Scalar) - c.NoError(err) - result.Release() -} - -func (c *CallScalarFuncSuite) TestPreallocationCases() { - nullProb := float64(0.2) - arr := c.getUint8Arr(100, nullProb) - defer arr.Release() - - funcNames := []string{"test_copy", "test_copy_computed_bitmap"} - for _, funcName := range funcNames { - c.Run(funcName, func() { - c.resetCtx() - - c.Run("single output default", func() { - result, err := CallFunction(c.ctx.Ctx, funcName, nil, &ArrayDatum{arr.Data()}) - c.NoError(err) - defer result.Release() - c.Equal(KindArray, result.Kind()) - c.assertDatumEqual(arr, result) - }) - - c.Run("exec chunks", func() { - // set the exec_chunksize to be smaller so now we have - // several invocations of the kernel, - // but still only one output array - c.execCtx.ChunkSize = 80 - result, err := CallFunction(SetExecCtx(c.ctx.Ctx, c.execCtx), funcName, nil, &ArrayDatum{arr.Data()}) - c.NoError(err) - defer result.Release() - c.Equal(KindArray, result.Kind()) - c.assertDatumEqual(arr, result) - }) - - c.Run("not multiple 8 chunk", func() { - // chunksize is not a multiple of 8 - c.execCtx.ChunkSize = 11 - result, err := CallFunction(SetExecCtx(c.ctx.Ctx, c.execCtx), funcName, nil, &ArrayDatum{arr.Data()}) - c.NoError(err) - defer result.Release() - c.Equal(KindArray, result.Kind()) - c.assertDatumEqual(arr, result) - }) - - c.Run("chunked", func() { - // input is chunked, output is one big chunk - chk1, chk2 := array.NewSlice(arr, 0, 10), array.NewSlice(arr, 10, int64(arr.Len())) - defer chk1.Release() - defer chk2.Release() - carr := arrow.NewChunked(arr.DataType(), []arrow.Array{chk1, chk2}) - defer carr.Release() - - result, err := CallFunction(SetExecCtx(c.ctx.Ctx, c.execCtx), funcName, nil, &ChunkedDatum{carr}) - c.NoError(err) - defer result.Release() - c.Equal(KindChunked, result.Kind()) - actual := result.(*ChunkedDatum).Value - c.Len(actual.Chunks(), 1) - c.Truef(array.ChunkedEqual(actual, carr), "expected: %s\ngot: %s", carr, actual) - }) - - c.Run("independent", func() { - // preallocate independently for each batch - c.execCtx.PreallocContiguous = false - c.execCtx.ChunkSize = 40 - result, err := CallFunction(SetExecCtx(c.ctx.Ctx, c.execCtx), funcName, nil, &ArrayDatum{arr.Data()}) - c.NoError(err) - defer result.Release() - c.Equal(KindChunked, result.Kind()) - - carr := result.(*ChunkedDatum).Value - c.Len(carr.Chunks(), 3) - sl := array.NewSlice(arr, 0, 40) - defer sl.Release() - c.assertArrayEqual(sl, carr.Chunk(0)) - sl = array.NewSlice(arr, 40, 80) - defer sl.Release() - c.assertArrayEqual(sl, carr.Chunk(1)) - sl = array.NewSlice(arr, 80, int64(arr.Len())) - defer sl.Release() - c.assertArrayEqual(sl, carr.Chunk(2)) - }) - }) - } -} - -func (c *CallScalarFuncSuite) TestBasicNonStandardCases() { - // test some more cases - // - // * validity bitmap computed by kernel rather than propagate nulls - // * data not pre-allocated - // * validity bitmap not pre-allocated - - nullProb := float64(0.2) - arr := c.getUint8Arr(1000, nullProb) - defer arr.Release() - args := []Datum{&ArrayDatum{arr.Data()}} - - for _, funcName := range []string{"test_nopre_data", "test_nopre_validity_or_data"} { - c.Run("funcName", func() { - c.resetCtx() - c.Run("single output default", func() { - result, err := CallFunction(c.ctx.Ctx, funcName, nil, args...) - c.NoError(err) - defer result.Release() - c.Equal(KindArray, result.Kind()) - c.assertDatumEqual(arr, result) - }) - - c.Run("split into 3 chunks", func() { - c.execCtx.ChunkSize = 400 - result, err := CallFunction(SetExecCtx(c.ctx.Ctx, c.execCtx), funcName, nil, args...) - c.NoError(err) - defer result.Release() - - c.Equal(KindChunked, result.Kind()) - - carr := result.(*ChunkedDatum).Value - c.Len(carr.Chunks(), 3) - sl := array.NewSlice(arr, 0, 400) - defer sl.Release() - c.assertArrayEqual(sl, carr.Chunk(0)) - sl = array.NewSlice(arr, 400, 800) - defer sl.Release() - c.assertArrayEqual(sl, carr.Chunk(1)) - sl = array.NewSlice(arr, 800, int64(arr.Len())) - defer sl.Release() - c.assertArrayEqual(sl, carr.Chunk(2)) - }) - }) - } -} - -func (c *CallScalarFuncSuite) TestStatefulKernel() { - input, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[1, 2, 3, null, 5]`)) - defer input.Release() - - multiplier := scalar.MakeScalar(int32(2)) - expected, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[2, 4, 6, null, 10]`)) - defer expected.Release() - - options := &ExampleOptions{multiplier} - result, err := CallFunction(c.ctx.Ctx, "test_stateful", options, &ArrayDatum{input.Data()}) - c.NoError(err) - defer result.Release() - c.assertDatumEqual(expected, result) -} - -func (c *CallScalarFuncSuite) TestScalarFunction() { - args := []Datum{NewDatum(int32(5)), NewDatum(int32(7))} - result, err := CallFunction(c.ctx.Ctx, "test_scalar_add_int32", nil, args...) - c.NoError(err) - defer result.Release() - - c.Equal(KindScalar, result.Kind()) - expected := scalar.MakeScalar(int32(12)) - c.True(scalar.Equals(expected, result.(*ScalarDatum).Value)) -} - -func TestCallScalarFunctions(t *testing.T) { - suite.Run(t, new(CallScalarFuncSuite)) -} diff --git a/go/arrow/compute/executor.go b/go/arrow/compute/executor.go deleted file mode 100644 index 1d197e4220ab2..0000000000000 --- a/go/arrow/compute/executor.go +++ /dev/null @@ -1,1122 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package compute - -import ( - "context" - "fmt" - "math" - "runtime" - "sync" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/compute/exec" - "github.com/apache/arrow/go/v18/arrow/internal" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/arrow/scalar" -) - -// ExecCtx holds simple contextual information for execution -// such as the default ChunkSize for batch iteration, whether or not -// to ensure contiguous preallocations for kernels that want preallocation, -// and a reference to the desired function registry to use. -// -// An ExecCtx should be placed into a context.Context by using -// SetExecCtx and GetExecCtx to pass it along for execution. -type ExecCtx struct { - // ChunkSize is the size used when iterating batches for execution - // ChunkSize elements will be operated on as a time unless an argument - // is a chunkedarray with a chunk that is smaller - ChunkSize int64 - // PreallocContiguous determines whether preallocating memory for - // execution of compute attempts to preallocate a full contiguous - // buffer for all of the chunks beforehand. - PreallocContiguous bool - // Registry allows specifying the Function Registry to utilize - // when searching for kernel implementations. - Registry FunctionRegistry - // ExecChannelSize is the size of the channel used for passing - // exec results to the WrapResults function. - ExecChannelSize int - // NumParallel determines the number of parallel goroutines - // allowed for parallel executions. - NumParallel int -} - -type ctxExecKey struct{} - -const DefaultMaxChunkSize = math.MaxInt64 - -var ( - // global default ExecCtx object, initialized with the - // default max chunk size, contiguous preallocations, and - // the default function registry. - defaultExecCtx ExecCtx - - // WithAllocator returns a new context with the provided allocator - // embedded into the context. - WithAllocator = exec.WithAllocator - // GetAllocator retrieves the allocator from the context, or returns - // memory.DefaultAllocator if there was no allocator in the provided - // context. - GetAllocator = exec.GetAllocator -) - -// DefaultExecCtx returns the default exec context which will be used -// if there is no ExecCtx set into the context for execution. -// -// This can be called to get a copy of the default values which can -// then be modified to set into a context. -// -// The default exec context uses the following values: -// - ChunkSize = DefaultMaxChunkSize (MaxInt64) -// - PreallocContiguous = true -// - Registry = GetFunctionRegistry() -// - ExecChannelSize = 10 -// - NumParallel = runtime.NumCPU() -func DefaultExecCtx() ExecCtx { return defaultExecCtx } - -func init() { - defaultExecCtx.ChunkSize = DefaultMaxChunkSize - defaultExecCtx.PreallocContiguous = true - defaultExecCtx.Registry = GetFunctionRegistry() - defaultExecCtx.ExecChannelSize = 10 - // default level of parallelism - // set to 1 to disable parallelization - defaultExecCtx.NumParallel = runtime.NumCPU() -} - -// SetExecCtx returns a new child context containing the passed in ExecCtx -func SetExecCtx(ctx context.Context, e ExecCtx) context.Context { - return context.WithValue(ctx, ctxExecKey{}, e) -} - -// GetExecCtx returns an embedded ExecCtx from the provided context. -// If it does not contain an ExecCtx, then the default one is returned. -func GetExecCtx(ctx context.Context) ExecCtx { - e, ok := ctx.Value(ctxExecKey{}).(ExecCtx) - if ok { - return e - } - return defaultExecCtx -} - -// ExecBatch is a unit of work for kernel execution. It contains a collection -// of Array and Scalar values. -// -// ExecBatch is semantically similar to a RecordBatch but for a SQL-style -// execution context. It represents a collection or records, but constant -// "columns" are represented by Scalar values rather than having to be -// converted into arrays with repeated values. -type ExecBatch struct { - Values []Datum - // Guarantee is a predicate Expression guaranteed to evaluate to true for - // all rows in this batch. - // Guarantee Expression - // Len is the semantic length of this ExecBatch. When the values are - // all scalars, the length should be set to 1 for non-aggregate kernels. - // Otherwise the length is taken from the array values. Aggregate kernels - // can have an ExecBatch formed by projecting just the partition columns - // from a batch in which case it would have scalar rows with length > 1 - // - // If the array values are of length 0, then the length is 0 regardless of - // whether any values are Scalar. - Len int64 -} - -func (e ExecBatch) NumValues() int { return len(e.Values) } - -// simple struct for defining how to preallocate a particular buffer. -type bufferPrealloc struct { - bitWidth int - addLen int -} - -func allocateDataBuffer(ctx *exec.KernelCtx, length, bitWidth int) *memory.Buffer { - switch bitWidth { - case 1: - return ctx.AllocateBitmap(int64(length)) - default: - bufsiz := int(bitutil.BytesForBits(int64(length * bitWidth))) - return ctx.Allocate(bufsiz) - } -} - -func addComputeDataPrealloc(dt arrow.DataType, widths []bufferPrealloc) []bufferPrealloc { - if typ, ok := dt.(arrow.FixedWidthDataType); ok { - return append(widths, bufferPrealloc{bitWidth: typ.BitWidth()}) - } - - switch dt.ID() { - case arrow.BINARY, arrow.STRING, arrow.LIST, arrow.MAP: - return append(widths, bufferPrealloc{bitWidth: 32, addLen: 1}) - case arrow.LARGE_BINARY, arrow.LARGE_STRING, arrow.LARGE_LIST: - return append(widths, bufferPrealloc{bitWidth: 64, addLen: 1}) - case arrow.STRING_VIEW, arrow.BINARY_VIEW: - return append(widths, bufferPrealloc{bitWidth: arrow.ViewHeaderSizeBytes * 8}) - } - return widths -} - -// enum to define a generalized assumption of the nulls in the inputs -type nullGeneralization int8 - -const ( - nullGenPerhapsNull nullGeneralization = iota - nullGenAllValid - nullGenAllNull -) - -func getNullGen(val *exec.ExecValue) nullGeneralization { - dtID := val.Type().ID() - switch { - case dtID == arrow.NULL: - return nullGenAllNull - case !internal.DefaultHasValidityBitmap(dtID): - return nullGenAllValid - case val.IsScalar(): - if val.Scalar.IsValid() { - return nullGenAllValid - } - return nullGenAllNull - default: - arr := val.Array - // do not count if they haven't been counted already - if arr.Nulls == 0 || arr.Buffers[0].Buf == nil { - return nullGenAllValid - } - - if arr.Nulls == arr.Len { - return nullGenAllNull - } - } - return nullGenPerhapsNull -} - -func getNullGenDatum(datum Datum) nullGeneralization { - var val exec.ExecValue - switch datum.Kind() { - case KindArray: - val.Array.SetMembers(datum.(*ArrayDatum).Value) - case KindScalar: - val.Scalar = datum.(*ScalarDatum).Value - case KindChunked: - return nullGenPerhapsNull - default: - debug.Assert(false, "should be array, scalar, or chunked!") - return nullGenPerhapsNull - } - return getNullGen(&val) -} - -// populate the validity bitmaps with the intersection of the nullity -// of the arguments. If a preallocated bitmap is not provided, then one -// will be allocated if needed (in some cases a bitmap can be zero-copied -// from the arguments). If any Scalar value is null, then the entire -// validity bitmap will be set to null. -func propagateNulls(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ArraySpan) (err error) { - if out.Type.ID() == arrow.NULL { - // null output type is a no-op (rare but it happens) - return - } - - // this function is ONLY able to write into output with non-zero offset - // when the bitmap is preallocated. - if out.Offset != 0 && out.Buffers[0].Buf == nil { - return fmt.Errorf("%w: can only propagate nulls into pre-allocated memory when output offset is non-zero", arrow.ErrInvalid) - } - - var ( - arrsWithNulls = make([]*exec.ArraySpan, 0, len(batch.Values)) - isAllNull bool - prealloc bool = out.Buffers[0].Buf != nil - ) - - for i := range batch.Values { - v := &batch.Values[i] - nullGen := getNullGen(v) - if nullGen == nullGenAllNull { - isAllNull = true - } - if nullGen != nullGenAllValid && v.IsArray() { - arrsWithNulls = append(arrsWithNulls, &v.Array) - } - } - - outBitmap := out.Buffers[0].Buf - if isAllNull { - // an all-null value gives us a short circuit opportunity - // output should all be null - out.Nulls = out.Len - if prealloc { - bitutil.SetBitsTo(outBitmap, out.Offset, out.Len, false) - return - } - - // walk all the values with nulls instead of breaking on the first - // in case we find a bitmap that can be reused in the non-preallocated case - for _, arr := range arrsWithNulls { - if arr.Nulls == arr.Len && arr.Buffers[0].Owner != nil { - buf := arr.GetBuffer(0) - buf.Retain() - out.Buffers[0].Buf = buf.Bytes() - out.Buffers[0].Owner = buf - return - } - } - - buf := ctx.AllocateBitmap(int64(out.Len)) - out.Buffers[0].Owner = buf - out.Buffers[0].Buf = buf.Bytes() - out.Buffers[0].SelfAlloc = true - bitutil.SetBitsTo(out.Buffers[0].Buf, out.Offset, out.Len, false) - return - } - - out.Nulls = array.UnknownNullCount - switch len(arrsWithNulls) { - case 0: - out.Nulls = 0 - if prealloc { - bitutil.SetBitsTo(outBitmap, out.Offset, out.Len, true) - } - case 1: - arr := arrsWithNulls[0] - out.Nulls = arr.Nulls - if prealloc { - bitutil.CopyBitmap(arr.Buffers[0].Buf, int(arr.Offset), int(arr.Len), outBitmap, int(out.Offset)) - return - } - - switch { - case arr.Offset == 0: - out.Buffers[0] = arr.Buffers[0] - out.Buffers[0].Owner.Retain() - case arr.Offset%8 == 0: - buf := memory.SliceBuffer(arr.GetBuffer(0), int(arr.Offset)/8, int(bitutil.BytesForBits(arr.Len))) - out.Buffers[0].Buf = buf.Bytes() - out.Buffers[0].Owner = buf - default: - buf := ctx.AllocateBitmap(int64(out.Len)) - out.Buffers[0].Owner = buf - out.Buffers[0].Buf = buf.Bytes() - out.Buffers[0].SelfAlloc = true - bitutil.CopyBitmap(arr.Buffers[0].Buf, int(arr.Offset), int(arr.Len), out.Buffers[0].Buf, 0) - } - return - - default: - if !prealloc { - buf := ctx.AllocateBitmap(int64(out.Len)) - out.Buffers[0].Owner = buf - out.Buffers[0].Buf = buf.Bytes() - out.Buffers[0].SelfAlloc = true - outBitmap = out.Buffers[0].Buf - } - - acc := func(left, right *exec.ArraySpan) { - debug.Assert(left.Buffers[0].Buf != nil, "invalid intersection for null propagation") - debug.Assert(right.Buffers[0].Buf != nil, "invalid intersection for null propagation") - bitutil.BitmapAnd(left.Buffers[0].Buf, right.Buffers[0].Buf, left.Offset, right.Offset, outBitmap, out.Offset, out.Len) - } - - acc(arrsWithNulls[0], arrsWithNulls[1]) - for _, arr := range arrsWithNulls[2:] { - acc(out, arr) - } - } - return -} - -func inferBatchLength(values []Datum) (length int64, allSame bool) { - length, allSame = -1, true - areAllScalar := true - for _, arg := range values { - switch arg := arg.(type) { - case *ArrayDatum: - argLength := arg.Len() - if length < 0 { - length = argLength - } else { - if length != argLength { - allSame = false - return - } - } - areAllScalar = false - case *ChunkedDatum: - argLength := arg.Len() - if length < 0 { - length = argLength - } else { - if length != argLength { - allSame = false - return - } - } - areAllScalar = false - } - } - - if areAllScalar && len(values) > 0 { - length = 1 - } else if length < 0 { - length = 0 - } - allSame = true - return -} - -// KernelExecutor is the interface for all executors to initialize and -// call kernel execution functions on batches. -type KernelExecutor interface { - // Init must be called *after* the kernel's init method and any - // KernelState must be set into the KernelCtx *before* calling - // this Init method. This is to facilitate the case where - // Init may be expensive and does not need to be called - // again for each execution of the kernel. For example, - // the same lookup table can be re-used for all scanned batches - // in a dataset filter. - Init(*exec.KernelCtx, exec.KernelInitArgs) error - // Execute the kernel for the provided batch and pass the resulting - // Datum values to the provided channel. - Execute(context.Context, *ExecBatch, chan<- Datum) error - // WrapResults exists for the case where an executor wants to post process - // the batches of result datums. Such as creating a ChunkedArray from - // multiple output batches or so on. Results from individual batch - // executions should be read from the out channel, and WrapResults should - // return the final Datum result. - WrapResults(ctx context.Context, out <-chan Datum, chunkedArgs bool) Datum - // CheckResultType checks the actual result type against the resolved - // output type. If the types don't match an error is returned - CheckResultType(out Datum) error - // Clear resets the state in the executor so that it can be reused. - Clear() -} - -// the base implementation for executing non-aggregate kernels. -type nonAggExecImpl struct { - ctx *exec.KernelCtx - ectx ExecCtx - kernel exec.NonAggKernel - outType arrow.DataType - numOutBuf int - dataPrealloc []bufferPrealloc - preallocValidity bool -} - -func (e *nonAggExecImpl) Clear() { - e.ctx, e.kernel, e.outType = nil, nil, nil - if e.dataPrealloc != nil { - e.dataPrealloc = e.dataPrealloc[:0] - } -} - -func (e *nonAggExecImpl) Init(ctx *exec.KernelCtx, args exec.KernelInitArgs) (err error) { - e.ctx, e.kernel = ctx, args.Kernel.(exec.NonAggKernel) - e.outType, err = e.kernel.GetSig().OutType.Resolve(ctx, args.Inputs) - e.ectx = GetExecCtx(ctx.Ctx) - return -} - -func (e *nonAggExecImpl) prepareOutput(length int) *exec.ExecResult { - var nullCount int = array.UnknownNullCount - - if e.kernel.GetNullHandling() == exec.NullNoOutput { - nullCount = 0 - } - - output := &exec.ArraySpan{ - Type: e.outType, - Len: int64(length), - Nulls: int64(nullCount), - } - - if e.preallocValidity { - buf := e.ctx.AllocateBitmap(int64(length)) - output.Buffers[0].Owner = buf - output.Buffers[0].Buf = buf.Bytes() - output.Buffers[0].SelfAlloc = true - } - - for i, pre := range e.dataPrealloc { - if pre.bitWidth >= 0 { - buf := allocateDataBuffer(e.ctx, length+pre.addLen, pre.bitWidth) - output.Buffers[i+1].Owner = buf - output.Buffers[i+1].Buf = buf.Bytes() - output.Buffers[i+1].SelfAlloc = true - } - } - - return output -} - -func (e *nonAggExecImpl) CheckResultType(out Datum) error { - typ := out.(ArrayLikeDatum).Type() - if typ != nil && !arrow.TypeEqual(e.outType, typ) { - return fmt.Errorf("%w: kernel type result mismatch: declared as %s, actual is %s", - arrow.ErrType, e.outType, typ) - } - return nil -} - -type spanIterator func() (exec.ExecSpan, int64, bool) - -func NewScalarExecutor() KernelExecutor { return &scalarExecutor{} } - -type scalarExecutor struct { - nonAggExecImpl - - elideValidityBitmap bool - preallocAllBufs bool - preallocContiguous bool - allScalars bool - iter spanIterator - iterLen int64 -} - -func (s *scalarExecutor) Execute(ctx context.Context, batch *ExecBatch, data chan<- Datum) (err error) { - s.allScalars, s.iter, err = iterateExecSpans(batch, s.ectx.ChunkSize, true) - if err != nil { - return - } - - s.iterLen = batch.Len - - if batch.Len == 0 { - result := array.MakeArrayOfNull(exec.GetAllocator(s.ctx.Ctx), s.outType, 0) - defer result.Release() - out := &exec.ArraySpan{} - out.SetMembers(result.Data()) - return s.emitResult(out, data) - } - - if err = s.setupPrealloc(batch.Len, batch.Values); err != nil { - return - } - - return s.executeSpans(data) -} - -func (s *scalarExecutor) WrapResults(ctx context.Context, out <-chan Datum, hasChunked bool) Datum { - var ( - output Datum - acc []arrow.Array - ) - - toChunked := func() { - acc = output.(ArrayLikeDatum).Chunks() - output.Release() - output = nil - } - - // get first output - select { - case <-ctx.Done(): - return nil - case output = <-out: - // if the inputs contained at least one chunked array - // then we want to return chunked output - if hasChunked { - toChunked() - } - } - - for { - select { - case <-ctx.Done(): - // context is done, either cancelled or a timeout. - // either way, we end early and return what we've got so far. - return output - case o, ok := <-out: - if !ok { // channel closed, wrap it up - if output != nil { - return output - } - - for _, c := range acc { - defer c.Release() - } - - chkd := arrow.NewChunked(s.outType, acc) - defer chkd.Release() - return NewDatum(chkd) - } - - // if we get multiple batches of output, then we need - // to return it as a chunked array. - if acc == nil { - toChunked() - } - - defer o.Release() - if o.Len() == 0 { // skip any empty batches - continue - } - - acc = append(acc, o.(*ArrayDatum).MakeArray()) - } - } -} - -func (s *scalarExecutor) executeSpans(data chan<- Datum) (err error) { - var ( - input exec.ExecSpan - output exec.ExecResult - next bool - ) - - if s.preallocContiguous { - // make one big output alloc - prealloc := s.prepareOutput(int(s.iterLen)) - output = *prealloc - - output.Offset = 0 - var resultOffset int64 - var nextOffset int64 - for err == nil { - if input, nextOffset, next = s.iter(); !next { - break - } - output.SetSlice(resultOffset, input.Len) - err = s.executeSingleSpan(&input, &output) - resultOffset = nextOffset - } - if err != nil { - prealloc.Release() - return - } - - return s.emitResult(prealloc, data) - } - - // fully preallocating, but not contiguously - // we (maybe) preallocate only for the output of processing - // the current chunk - for err == nil { - if input, _, next = s.iter(); !next { - break - } - - output = *s.prepareOutput(int(input.Len)) - if err = s.executeSingleSpan(&input, &output); err != nil { - output.Release() - return - } - err = s.emitResult(&output, data) - } - - return -} - -func (s *scalarExecutor) executeSingleSpan(input *exec.ExecSpan, out *exec.ExecResult) error { - switch { - case out.Type.ID() == arrow.NULL: - out.Nulls = out.Len - case s.kernel.GetNullHandling() == exec.NullIntersection: - if !s.elideValidityBitmap { - propagateNulls(s.ctx, input, out) - } - case s.kernel.GetNullHandling() == exec.NullNoOutput: - out.Nulls = 0 - } - return s.kernel.Exec(s.ctx, input, out) -} - -func (s *scalarExecutor) setupPrealloc(totalLen int64, args []Datum) error { - s.numOutBuf = len(s.outType.Layout().Buffers) - outTypeID := s.outType.ID() - // default to no validity pre-allocation for the following cases: - // - Output Array is NullArray - // - kernel.NullHandling is ComputeNoPrealloc or OutputNotNull - s.preallocValidity = false - - if outTypeID != arrow.NULL { - switch s.kernel.GetNullHandling() { - case exec.NullComputedPrealloc: - s.preallocValidity = true - case exec.NullIntersection: - s.elideValidityBitmap = true - for _, a := range args { - nullGen := getNullGenDatum(a) == nullGenAllValid - s.elideValidityBitmap = s.elideValidityBitmap && nullGen - } - s.preallocValidity = !s.elideValidityBitmap - case exec.NullNoOutput: - s.elideValidityBitmap = true - } - } - - if s.kernel.GetMemAlloc() == exec.MemPrealloc { - s.dataPrealloc = addComputeDataPrealloc(s.outType, s.dataPrealloc) - } - - // validity bitmap either preallocated or elided, and all data buffers allocated - // this is basically only true for primitive types that are not dict-encoded - s.preallocAllBufs = - ((s.preallocValidity || s.elideValidityBitmap) && len(s.dataPrealloc) == (s.numOutBuf-1) && - !arrow.IsNested(outTypeID) && outTypeID != arrow.DICTIONARY) - - // contiguous prealloc only possible on non-nested types if all - // buffers are preallocated. otherwise we have to go chunk by chunk - // - // some kernels are also unable to write into sliced outputs, so - // we respect the kernel's attributes - s.preallocContiguous = - (s.ectx.PreallocContiguous && s.kernel.CanFillSlices() && - s.preallocAllBufs) - - return nil -} - -func (s *scalarExecutor) emitResult(resultData *exec.ArraySpan, data chan<- Datum) error { - var output Datum - if len(resultData.Buffers[0].Buf) != 0 { - resultData.UpdateNullCount() - } - if s.allScalars { - // we boxed scalar inputs as ArraySpan so now we have to unbox the output - arr := resultData.MakeArray() - defer arr.Release() - sc, err := scalar.GetScalar(arr, 0) - if err != nil { - return err - } - if r, ok := sc.(scalar.Releasable); ok { - defer r.Release() - } - output = NewDatum(sc) - } else { - d := resultData.MakeData() - defer d.Release() - output = NewDatum(d) - } - data <- output - return nil -} - -func checkAllIsValue(vals []Datum) error { - for _, v := range vals { - if !DatumIsValue(v) { - return fmt.Errorf("%w: tried executing function with non-value type: %s", - arrow.ErrInvalid, v) - } - } - return nil -} - -func checkIfAllScalar(batch *ExecBatch) bool { - for _, v := range batch.Values { - if v.Kind() != KindScalar { - return false - } - } - return batch.NumValues() > 0 -} - -// iterateExecSpans sets up and returns a function which can iterate a batch -// according to the chunk sizes. If the inputs contain chunked arrays, then -// we will find the min(chunk sizes, maxChunkSize) to ensure we return -// contiguous spans to execute on. -// -// the iteration function returns the next span to execute on, the current -// position in the full batch, and a boolean indicating whether or not -// a span was actually returned (there is data to process). -func iterateExecSpans(batch *ExecBatch, maxChunkSize int64, promoteIfAllScalar bool) (haveAllScalars bool, itr spanIterator, err error) { - if batch.NumValues() > 0 { - inferred, allArgsSame := inferBatchLength(batch.Values) - if inferred != batch.Len { - return false, nil, fmt.Errorf("%w: value lengths differed from execbatch length", arrow.ErrInvalid) - } - if !allArgsSame { - return false, nil, fmt.Errorf("%w: array args must all be the same length", arrow.ErrInvalid) - } - } - - var ( - args []Datum = batch.Values - haveChunked bool - chunkIdxes = make([]int, len(args)) - valuePositions = make([]int64, len(args)) - valueOffsets = make([]int64, len(args)) - pos, length int64 = 0, batch.Len - ) - haveAllScalars = checkIfAllScalar(batch) - maxChunkSize = exec.Min(length, maxChunkSize) - - span := exec.ExecSpan{Values: make([]exec.ExecValue, len(args)), Len: 0} - for i, a := range args { - switch arg := a.(type) { - case *ScalarDatum: - span.Values[i].Scalar = arg.Value - case *ArrayDatum: - span.Values[i].Array.SetMembers(arg.Value) - valueOffsets[i] = int64(arg.Value.Offset()) - case *ChunkedDatum: - // populate from first chunk - carr := arg.Value - if len(carr.Chunks()) > 0 { - arr := carr.Chunk(0).Data() - span.Values[i].Array.SetMembers(arr) - valueOffsets[i] = int64(arr.Offset()) - } else { - // fill as zero len - exec.FillZeroLength(carr.DataType(), &span.Values[i].Array) - } - haveChunked = true - } - } - - if haveAllScalars && promoteIfAllScalar { - exec.PromoteExecSpanScalars(span) - } - - nextChunkSpan := func(iterSz int64, span exec.ExecSpan) int64 { - for i := 0; i < len(args) && iterSz > 0; i++ { - // if the argument is not chunked, it's either a scalar or an array - // in which case it doesn't influence the size of the span - chunkedArg, ok := args[i].(*ChunkedDatum) - if !ok { - continue - } - - arg := chunkedArg.Value - if len(arg.Chunks()) == 0 { - iterSz = 0 - continue - } - - var curChunk arrow.Array - for { - curChunk = arg.Chunk(chunkIdxes[i]) - if valuePositions[i] == int64(curChunk.Len()) { - // chunk is zero-length, or was exhausted in the previous - // iteration, move to next chunk - chunkIdxes[i]++ - curChunk = arg.Chunk(chunkIdxes[i]) - span.Values[i].Array.SetMembers(curChunk.Data()) - valuePositions[i] = 0 - valueOffsets[i] = int64(curChunk.Data().Offset()) - continue - } - break - } - iterSz = exec.Min(int64(curChunk.Len())-valuePositions[i], iterSz) - } - return iterSz - } - - return haveAllScalars, func() (exec.ExecSpan, int64, bool) { - if pos == length { - return exec.ExecSpan{}, pos, false - } - - iterationSize := exec.Min(length-pos, maxChunkSize) - if haveChunked { - iterationSize = nextChunkSpan(iterationSize, span) - } - - span.Len = iterationSize - for i, a := range args { - if a.Kind() != KindScalar { - span.Values[i].Array.SetSlice(valuePositions[i]+valueOffsets[i], iterationSize) - valuePositions[i] += iterationSize - } - } - - pos += iterationSize - debug.Assert(pos <= length, "bad state for iteration exec span") - return span, pos, true - }, nil -} - -var ( - // have a pool of scalar executors to avoid excessive object creation - scalarExecPool = sync.Pool{ - New: func() any { return &scalarExecutor{} }, - } - vectorExecPool = sync.Pool{ - New: func() any { return &vectorExecutor{} }, - } -) - -func checkCanExecuteChunked(k *exec.VectorKernel) error { - if k.ExecChunked == nil { - return fmt.Errorf("%w: vector kernel cannot execute chunkwise and no chunked exec function defined", arrow.ErrInvalid) - } - - if k.NullHandling == exec.NullIntersection { - return fmt.Errorf("%w: null pre-propagation is unsupported for chunkedarray execution in vector kernels", arrow.ErrInvalid) - } - return nil -} - -type vectorExecutor struct { - nonAggExecImpl - - iter spanIterator - results []*exec.ArraySpan - iterLen int64 - - allScalars bool -} - -func (v *vectorExecutor) Execute(ctx context.Context, batch *ExecBatch, data chan<- Datum) (err error) { - final := v.kernel.(*exec.VectorKernel).Finalize - if final != nil { - if v.results == nil { - v.results = make([]*exec.ArraySpan, 0, 1) - } else { - v.results = v.results[:0] - } - } - // some vector kernels have a separate code path for handling chunked - // arrays (VectorKernel.ExecChunked) so we check for any chunked - // arrays. If we do and an ExecChunked function is defined - // then we call that. - hasChunked := haveChunkedArray(batch.Values) - v.numOutBuf = len(v.outType.Layout().Buffers) - v.preallocValidity = v.kernel.GetNullHandling() != exec.NullComputedNoPrealloc && - v.kernel.GetNullHandling() != exec.NullNoOutput - if v.kernel.GetMemAlloc() == exec.MemPrealloc { - v.dataPrealloc = addComputeDataPrealloc(v.outType, v.dataPrealloc) - } - - if v.kernel.(*exec.VectorKernel).CanExecuteChunkWise { - v.allScalars, v.iter, err = iterateExecSpans(batch, v.ectx.ChunkSize, true) - v.iterLen = batch.Len - - var ( - input exec.ExecSpan - next bool - ) - if v.iterLen == 0 { - input.Values = make([]exec.ExecValue, batch.NumValues()) - for i, v := range batch.Values { - exec.FillZeroLength(v.(ArrayLikeDatum).Type(), &input.Values[i].Array) - } - err = v.exec(&input, data) - } - for err == nil { - if input, _, next = v.iter(); !next { - break - } - err = v.exec(&input, data) - } - if err != nil { - return - } - } else { - // kernel cannot execute chunkwise. if we have any chunked arrays, - // then execchunked must be defined or we raise an error - if hasChunked { - if err = v.execChunked(batch, data); err != nil { - return - } - } else { - // no chunked arrays. we pack the args into an execspan - // and call regular exec code path - span := ExecSpanFromBatch(batch) - if checkIfAllScalar(batch) { - exec.PromoteExecSpanScalars(*span) - } - if err = v.exec(span, data); err != nil { - return - } - } - } - - if final != nil { - // intermediate results require post-processing after execution is - // completed (possibly involving some accumulated state) - output, err := final(v.ctx, v.results) - if err != nil { - return err - } - - for _, r := range output { - d := r.MakeData() - defer d.Release() - data <- NewDatum(d) - } - } - - return nil -} - -func (v *vectorExecutor) WrapResults(ctx context.Context, out <-chan Datum, hasChunked bool) Datum { - // if kernel doesn't output chunked, just grab the one output and return it - if !v.kernel.(*exec.VectorKernel).OutputChunked { - var output Datum - select { - case <-ctx.Done(): - return nil - case output = <-out: - } - - // we got an output datum, but let's wait for the channel to - // close so we don't have any race conditions - select { - case <-ctx.Done(): - output.Release() - return nil - case <-out: - return output - } - } - - // if execution yielded multiple chunks then the result is a chunked array - var ( - output Datum - acc []arrow.Array - ) - - toChunked := func() { - out := output.(ArrayLikeDatum).Chunks() - acc = make([]arrow.Array, 0, len(out)) - for _, o := range out { - if o.Len() > 0 { - acc = append(acc, o) - } - } - if output.Kind() != KindChunked { - output.Release() - } - output = nil - } - - // get first output - select { - case <-ctx.Done(): - return nil - case output = <-out: - if output == nil || ctx.Err() != nil { - return nil - } - - // if the inputs contained at least one chunked array - // then we want to return chunked output - if hasChunked { - toChunked() - } - } - - for { - select { - case <-ctx.Done(): - // context is done, either cancelled or a timeout. - // either way, we end early and return what we've got so far. - return output - case o, ok := <-out: - if !ok { // channel closed, wrap it up - if output != nil { - return output - } - - for _, c := range acc { - defer c.Release() - } - - chkd := arrow.NewChunked(v.outType, acc) - defer chkd.Release() - return NewDatum(chkd) - } - - // if we get multiple batches of output, then we need - // to return it as a chunked array. - if acc == nil { - toChunked() - } - - defer o.Release() - if o.Len() == 0 { // skip any empty batches - continue - } - - acc = append(acc, o.(*ArrayDatum).MakeArray()) - } - } -} - -func (v *vectorExecutor) exec(span *exec.ExecSpan, data chan<- Datum) (err error) { - out := v.prepareOutput(int(span.Len)) - if v.kernel.GetNullHandling() == exec.NullIntersection { - if err = propagateNulls(v.ctx, span, out); err != nil { - return - } - } - if err = v.kernel.Exec(v.ctx, span, out); err != nil { - return - } - return v.emitResult(out, data) -} - -func (v *vectorExecutor) emitResult(result *exec.ArraySpan, data chan<- Datum) (err error) { - if v.kernel.(*exec.VectorKernel).Finalize == nil { - d := result.MakeData() - defer d.Release() - data <- NewDatum(d) - } else { - v.results = append(v.results, result) - } - return nil -} - -func (v *vectorExecutor) execChunked(batch *ExecBatch, out chan<- Datum) error { - if err := checkCanExecuteChunked(v.kernel.(*exec.VectorKernel)); err != nil { - return err - } - - output := v.prepareOutput(int(batch.Len)) - input := make([]*arrow.Chunked, len(batch.Values)) - for i, v := range batch.Values { - switch val := v.(type) { - case *ArrayDatum: - chks := val.Chunks() - input[i] = arrow.NewChunked(val.Type(), chks) - chks[0].Release() - defer input[i].Release() - case *ChunkedDatum: - input[i] = val.Value - default: - return fmt.Errorf("%w: handling with exec chunked", arrow.ErrNotImplemented) - } - } - result, err := v.kernel.(*exec.VectorKernel).ExecChunked(v.ctx, input, output) - if err != nil { - return err - } - - if len(result) == 0 { - empty := output.MakeArray() - defer empty.Release() - out <- &ChunkedDatum{Value: arrow.NewChunked(output.Type, []arrow.Array{empty})} - return nil - } - - for _, r := range result { - if err := v.emitResult(r, out); err != nil { - return err - } - } - return nil -} diff --git a/go/arrow/compute/expression.go b/go/arrow/compute/expression.go deleted file mode 100644 index f6aadeda5634b..0000000000000 --- a/go/arrow/compute/expression.go +++ /dev/null @@ -1,904 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package compute - -import ( - "bytes" - "encoding/hex" - "errors" - "fmt" - "hash/maphash" - "reflect" - "strconv" - "strings" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/compute/exec" - "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/ipc" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/arrow/scalar" -) - -var hashSeed = maphash.MakeSeed() - -// Expression is an interface for mapping one datum to another. An expression -// is one of: -// -// A literal Datum -// A reference to a single (potentially nested) field of an input Datum -// A call to a compute function, with arguments specified by other Expressions -// -// Deprecated: use substrait-go expressions instead. -type Expression interface { - fmt.Stringer - // IsBound returns true if this expression has been bound to a particular - // Datum and/or Schema. - IsBound() bool - // IsScalarExpr returns true if this expression is composed only of scalar - // literals, field references and calls to scalar functions. - IsScalarExpr() bool - // IsNullLiteral returns true if this expression is a literal and entirely - // null. - IsNullLiteral() bool - // IsSatisfiable returns true if this expression could evaluate to true - IsSatisfiable() bool - // FieldRef returns a pointer to the underlying field reference, or nil if - // this expression is not a field reference. - FieldRef() *FieldRef - // Type returns the datatype this expression will evaluate to. - Type() arrow.DataType - - Hash() uint64 - Equals(Expression) bool - - // Release releases the underlying bound C++ memory that is allocated when - // a Bind is performed. Any bound expression should get released to ensure - // no memory leaks. - Release() -} - -func printDatum(datum Datum) string { - switch datum := datum.(type) { - case *ScalarDatum: - if !datum.Value.IsValid() { - return "null" - } - - switch datum.Type().ID() { - case arrow.STRING, arrow.LARGE_STRING: - return strconv.Quote(datum.Value.(scalar.BinaryScalar).String()) - case arrow.BINARY, arrow.FIXED_SIZE_BINARY, arrow.LARGE_BINARY: - return `"` + strings.ToUpper(hex.EncodeToString(datum.Value.(scalar.BinaryScalar).Data())) + `"` - } - - return datum.Value.String() - default: - return datum.String() - } -} - -// Literal is an expression denoting a literal Datum which could be any value -// as a scalar, an array, or so on. -// -// Deprecated: use substrait-go expressions Literal instead. -type Literal struct { - Literal Datum -} - -func (Literal) FieldRef() *FieldRef { return nil } -func (l *Literal) String() string { return printDatum(l.Literal) } -func (l *Literal) Type() arrow.DataType { return l.Literal.(ArrayLikeDatum).Type() } -func (l *Literal) IsBound() bool { return l.Type() != nil } -func (l *Literal) IsScalarExpr() bool { return l.Literal.Kind() == KindScalar } - -func (l *Literal) Equals(other Expression) bool { - if rhs, ok := other.(*Literal); ok { - return l.Literal.Equals(rhs.Literal) - } - return false -} - -func (l *Literal) IsNullLiteral() bool { - if ad, ok := l.Literal.(ArrayLikeDatum); ok { - return ad.NullN() == ad.Len() - } - return true -} - -func (l *Literal) IsSatisfiable() bool { - if l.IsNullLiteral() { - return false - } - - if sc, ok := l.Literal.(*ScalarDatum); ok && sc.Type().ID() == arrow.BOOL { - return sc.Value.(*scalar.Boolean).Value - } - - return true -} - -func (l *Literal) Hash() uint64 { - if l.IsScalarExpr() { - return scalar.Hash(hashSeed, l.Literal.(*ScalarDatum).Value) - } - return 0 -} - -func (l *Literal) Release() { - l.Literal.Release() -} - -// Parameter represents a field reference and needs to be bound in order to determine -// its type and shape. -// -// Deprecated: use substrait-go field references instead. -type Parameter struct { - ref *FieldRef - - // post bind props - dt arrow.DataType - index int -} - -func (Parameter) IsNullLiteral() bool { return false } -func (p *Parameter) Type() arrow.DataType { return p.dt } -func (p *Parameter) IsBound() bool { return p.Type() != nil } -func (p *Parameter) IsScalarExpr() bool { return p.ref != nil } -func (p *Parameter) IsSatisfiable() bool { return p.Type() == nil || p.Type().ID() != arrow.NULL } -func (p *Parameter) FieldRef() *FieldRef { return p.ref } -func (p *Parameter) Hash() uint64 { return p.ref.Hash(hashSeed) } - -func (p *Parameter) String() string { - switch { - case p.ref.IsName(): - return p.ref.Name() - case p.ref.IsFieldPath(): - return p.ref.FieldPath().String() - default: - return p.ref.String() - } -} - -func (p *Parameter) Equals(other Expression) bool { - if rhs, ok := other.(*Parameter); ok { - return p.ref.Equals(*rhs.ref) - } - - return false -} - -func (p *Parameter) Release() {} - -type comparisonType int8 - -const ( - compNA comparisonType = 0 - compEQ comparisonType = 1 - compLT comparisonType = 2 - compGT comparisonType = 4 - compNE comparisonType = compLT | compGT - compLE comparisonType = compLT | compEQ - compGE comparisonType = compGT | compEQ -) - -//lint:ignore U1000 ignore that this is unused for now -func (c comparisonType) name() string { - switch c { - case compEQ: - return "equal" - case compLT: - return "less" - case compGT: - return "greater" - case compNE: - return "not_equal" - case compLE: - return "less_equal" - case compGE: - return "greater_equal" - } - return "na" -} - -func (c comparisonType) getOp() string { - switch c { - case compEQ: - return "==" - case compLT: - return "<" - case compGT: - return ">" - case compNE: - return "!=" - case compLE: - return "<=" - case compGE: - return ">=" - } - debug.Assert(false, "invalid getop") - return "" -} - -var compmap = map[string]comparisonType{ - "equal": compEQ, - "less": compLT, - "greater": compGT, - "not_equal": compNE, - "less_equal": compLE, - "greater_equal": compGE, -} - -func optionsToString(fn FunctionOptions) string { - if s, ok := fn.(fmt.Stringer); ok { - return s.String() - } - - var b strings.Builder - v := reflect.Indirect(reflect.ValueOf(fn)) - b.WriteByte('{') - for i := 0; i < v.Type().NumField(); i++ { - fld := v.Type().Field(i) - tag := fld.Tag.Get("compute") - if tag == "-" { - continue - } - - fldVal := v.Field(i) - fmt.Fprintf(&b, "%s=%v, ", tag, fldVal.Interface()) - } - ret := b.String() - return ret[:len(ret)-2] + "}" -} - -// Call is a function call with specific arguments which are themselves other -// expressions. A call can also have options that are specific to the function -// in question. It must be bound to determine the shape and type. -// -// Deprecated: use substrait-go expression functions instead. -type Call struct { - funcName string - args []Expression - dt arrow.DataType - options FunctionOptions - - cachedHash uint64 -} - -func (c *Call) IsNullLiteral() bool { return false } -func (c *Call) FieldRef() *FieldRef { return nil } -func (c *Call) Type() arrow.DataType { return c.dt } -func (c *Call) IsSatisfiable() bool { return c.Type() == nil || c.Type().ID() != arrow.NULL } - -func (c *Call) String() string { - binary := func(op string) string { - return "(" + c.args[0].String() + " " + op + " " + c.args[1].String() + ")" - } - - if cmp, ok := compmap[c.funcName]; ok { - return binary(cmp.getOp()) - } - - const kleene = "_kleene" - if strings.HasSuffix(c.funcName, kleene) { - return binary(strings.TrimSuffix(c.funcName, kleene)) - } - - if c.funcName == "make_struct" && c.options != nil { - opts := c.options.(*MakeStructOptions) - out := "{" - for i, a := range c.args { - out += opts.FieldNames[i] + "=" + a.String() + ", " - } - return out[:len(out)-2] + "}" - } - - var b strings.Builder - b.WriteString(c.funcName + "(") - for _, a := range c.args { - b.WriteString(a.String() + ", ") - } - - if c.options != nil { - b.WriteString(optionsToString(c.options)) - b.WriteString(" ") - } - - ret := b.String() - return ret[:len(ret)-2] + ")" -} - -func (c *Call) Hash() uint64 { - if c.cachedHash != 0 { - return c.cachedHash - } - - var h maphash.Hash - h.SetSeed(hashSeed) - - h.WriteString(c.funcName) - c.cachedHash = h.Sum64() - for _, arg := range c.args { - c.cachedHash = exec.HashCombine(c.cachedHash, arg.Hash()) - } - return c.cachedHash -} - -func (c *Call) IsScalarExpr() bool { - for _, arg := range c.args { - if !arg.IsScalarExpr() { - return false - } - } - - return false - // return isFuncScalar(c.funcName) -} - -func (c *Call) IsBound() bool { - return c.Type() != nil -} - -func (c *Call) Equals(other Expression) bool { - rhs, ok := other.(*Call) - if !ok { - return false - } - - if c.funcName != rhs.funcName || len(c.args) != len(rhs.args) { - return false - } - - for i := range c.args { - if !c.args[i].Equals(rhs.args[i]) { - return false - } - } - - if opt, ok := c.options.(FunctionOptionsEqual); ok { - return opt.Equals(rhs.options) - } - return reflect.DeepEqual(c.options, rhs.options) -} - -func (c *Call) Release() { - for _, a := range c.args { - a.Release() - } - if r, ok := c.options.(releasable); ok { - r.Release() - } -} - -// FunctionOptions can be any type which has a TypeName function. The fields -// of the type will be used (via reflection) to determine the information to -// propagate when serializing to pass to the C++ for execution. -type FunctionOptions interface { - TypeName() string -} - -type FunctionOptionsEqual interface { - Equals(FunctionOptions) bool -} - -type FunctionOptionsCloneable interface { - Clone() FunctionOptions -} - -type MakeStructOptions struct { - FieldNames []string `compute:"field_names"` - FieldNullability []bool `compute:"field_nullability"` - FieldMetadata []*arrow.Metadata `compute:"field_metadata"` -} - -func (MakeStructOptions) TypeName() string { return "MakeStructOptions" } - -type NullOptions struct { - NanIsNull bool `compute:"nan_is_null"` -} - -func (NullOptions) TypeName() string { return "NullOptions" } - -type StrptimeOptions struct { - Format string `compute:"format"` - Unit arrow.TimeUnit `compute:"unit"` -} - -func (StrptimeOptions) TypeName() string { return "StrptimeOptions" } - -type NullSelectionBehavior = kernels.NullSelectionBehavior - -const ( - SelectionEmitNulls = kernels.EmitNulls - SelectionDropNulls = kernels.DropNulls -) - -type ArithmeticOptions struct { - NoCheckOverflow bool `compute:"check_overflow"` -} - -func (ArithmeticOptions) TypeName() string { return "ArithmeticOptions" } - -type ( - CastOptions = kernels.CastOptions - FilterOptions = kernels.FilterOptions - TakeOptions = kernels.TakeOptions -) - -func DefaultFilterOptions() *FilterOptions { return &FilterOptions{} } - -func DefaultTakeOptions() *TakeOptions { return &TakeOptions{BoundsCheck: true} } - -func DefaultCastOptions(safe bool) *CastOptions { - if safe { - return &CastOptions{} - } - return &CastOptions{ - AllowIntOverflow: true, - AllowTimeTruncate: true, - AllowTimeOverflow: true, - AllowDecimalTruncate: true, - AllowFloatTruncate: true, - AllowInvalidUtf8: true, - } -} - -func UnsafeCastOptions(dt arrow.DataType) *CastOptions { - return NewCastOptions(dt, false) -} - -func SafeCastOptions(dt arrow.DataType) *CastOptions { - return NewCastOptions(dt, true) -} - -func NewCastOptions(dt arrow.DataType, safe bool) *CastOptions { - opts := DefaultCastOptions(safe) - if dt != nil { - opts.ToType = dt - } else { - opts.ToType = arrow.Null - } - return opts -} - -func Cast(ex Expression, dt arrow.DataType) Expression { - opts := &CastOptions{} - if dt == nil { - opts.ToType = arrow.Null - } else { - opts.ToType = dt - } - - return NewCall("cast", []Expression{ex}, opts) -} - -type SetLookupOptions struct { - ValueSet Datum `compute:"value_set"` - SkipNulls bool `compute:"skip_nulls"` -} - -func (SetLookupOptions) TypeName() string { return "SetLookupOptions" } - -func (s *SetLookupOptions) Release() { s.ValueSet.Release() } - -func (s *SetLookupOptions) Equals(other FunctionOptions) bool { - rhs, ok := other.(*SetLookupOptions) - if !ok { - return false - } - - return s.SkipNulls == rhs.SkipNulls && s.ValueSet.Equals(rhs.ValueSet) -} - -func (s *SetLookupOptions) FromStructScalar(sc *scalar.Struct) error { - if v, err := sc.Field("skip_nulls"); err == nil { - s.SkipNulls = v.(*scalar.Boolean).Value - } - - value, err := sc.Field("value_set") - if err != nil { - return err - } - - if v, ok := value.(scalar.ListScalar); ok { - s.ValueSet = NewDatum(v.GetList()) - return nil - } - - return errors.New("set lookup options valueset should be a list") -} - -var ( - funcOptionsMap map[string]reflect.Type - funcOptsTypes = []FunctionOptions{ - SetLookupOptions{}, ArithmeticOptions{}, CastOptions{}, - FilterOptions{}, NullOptions{}, StrptimeOptions{}, MakeStructOptions{}, - } -) - -func init() { - funcOptionsMap = make(map[string]reflect.Type) - for _, ft := range funcOptsTypes { - funcOptionsMap[ft.TypeName()] = reflect.TypeOf(ft) - } -} - -// NewLiteral constructs a new literal expression from any value. It is passed -// to NewDatum which will construct the appropriate Datum and/or scalar -// value for the type provided. -func NewLiteral(arg interface{}) Expression { - return &Literal{Literal: NewDatum(arg)} -} - -func NullLiteral(dt arrow.DataType) Expression { - return &Literal{Literal: NewDatum(scalar.MakeNullScalar(dt))} -} - -// NewRef constructs a parameter expression which refers to a specific field -func NewRef(ref FieldRef) Expression { - return &Parameter{ref: &ref, index: -1} -} - -// NewFieldRef is shorthand for NewRef(FieldRefName(field)) -func NewFieldRef(field string) Expression { - return NewRef(FieldRefName(field)) -} - -// NewCall constructs an expression that represents a specific function call with -// the given arguments and options. -func NewCall(name string, args []Expression, opts FunctionOptions) Expression { - return &Call{funcName: name, args: args, options: opts} -} - -// Project is shorthand for `make_struct` to produce a record batch output -// from a group of expressions. -func Project(values []Expression, names []string) Expression { - nulls := make([]bool, len(names)) - for i := range nulls { - nulls[i] = true - } - meta := make([]*arrow.Metadata, len(names)) - return NewCall("make_struct", values, - &MakeStructOptions{FieldNames: names, FieldNullability: nulls, FieldMetadata: meta}) -} - -// Equal is a convenience function for the equal function -func Equal(lhs, rhs Expression) Expression { - return NewCall("equal", []Expression{lhs, rhs}, nil) -} - -// NotEqual creates a call to not_equal -func NotEqual(lhs, rhs Expression) Expression { - return NewCall("not_equal", []Expression{lhs, rhs}, nil) -} - -// Less is shorthand for NewCall("less",....) -func Less(lhs, rhs Expression) Expression { - return NewCall("less", []Expression{lhs, rhs}, nil) -} - -// LessEqual is shorthand for NewCall("less_equal",....) -func LessEqual(lhs, rhs Expression) Expression { - return NewCall("less_equal", []Expression{lhs, rhs}, nil) -} - -// Greater is shorthand for NewCall("greater",....) -func Greater(lhs, rhs Expression) Expression { - return NewCall("greater", []Expression{lhs, rhs}, nil) -} - -// GreaterEqual is shorthand for NewCall("greater_equal",....) -func GreaterEqual(lhs, rhs Expression) Expression { - return NewCall("greater_equal", []Expression{lhs, rhs}, nil) -} - -// IsNull creates an expression that returns true if the passed in expression is -// null. Optionally treating NaN as null if desired. -func IsNull(lhs Expression, nanIsNull bool) Expression { - return NewCall("less", []Expression{lhs}, &NullOptions{nanIsNull}) -} - -// IsValid is the inverse of IsNull -func IsValid(lhs Expression) Expression { - return NewCall("is_valid", []Expression{lhs}, nil) -} - -type binop func(lhs, rhs Expression) Expression - -func foldLeft(op binop, args ...Expression) Expression { - switch len(args) { - case 0: - return nil - case 1: - return args[0] - } - - folded := args[0] - for _, a := range args[1:] { - folded = op(folded, a) - } - return folded -} - -func and(lhs, rhs Expression) Expression { - return NewCall("and_kleene", []Expression{lhs, rhs}, nil) -} - -// And constructs a tree of calls to and_kleene for boolean And logic taking -// an arbitrary number of values. -func And(lhs, rhs Expression, ops ...Expression) Expression { - folded := foldLeft(and, append([]Expression{lhs, rhs}, ops...)...) - if folded != nil { - return folded - } - return NewLiteral(true) -} - -func or(lhs, rhs Expression) Expression { - return NewCall("or_kleene", []Expression{lhs, rhs}, nil) -} - -// Or constructs a tree of calls to or_kleene for boolean Or logic taking -// an arbitrary number of values. -func Or(lhs, rhs Expression, ops ...Expression) Expression { - folded := foldLeft(or, append([]Expression{lhs, rhs}, ops...)...) - if folded != nil { - return folded - } - return NewLiteral(false) -} - -// Not creates a call to "invert" for the value specified. -func Not(expr Expression) Expression { - return NewCall("invert", []Expression{expr}, nil) -} - -func SerializeOptions(opts FunctionOptions, mem memory.Allocator) (*memory.Buffer, error) { - sc, err := scalar.ToScalar(opts, mem) - if err != nil { - return nil, err - } - if sc, ok := sc.(releasable); ok { - defer sc.Release() - } - - arr, err := scalar.MakeArrayFromScalar(sc, 1, mem) - if err != nil { - return nil, err - } - defer arr.Release() - - batch := array.NewRecord(arrow.NewSchema([]arrow.Field{{Type: arr.DataType(), Nullable: true}}, nil), []arrow.Array{arr}, 1) - defer batch.Release() - - buf := &bufferWriteSeeker{mem: mem} - wr, err := ipc.NewFileWriter(buf, ipc.WithSchema(batch.Schema()), ipc.WithAllocator(mem)) - if err != nil { - return nil, err - } - - wr.Write(batch) - wr.Close() - return buf.buf, nil -} - -// SerializeExpr serializes expressions by converting them to Metadata and -// storing this in the schema of a Record. Embedded arrays and scalars are -// stored in its columns. Finally the record is written as an IPC file -func SerializeExpr(expr Expression, mem memory.Allocator) (*memory.Buffer, error) { - var ( - cols []arrow.Array - metaKey []string - metaValue []string - visit func(Expression) error - ) - - addScalar := func(s scalar.Scalar) (string, error) { - ret := len(cols) - arr, err := scalar.MakeArrayFromScalar(s, 1, mem) - if err != nil { - return "", err - } - cols = append(cols, arr) - return strconv.Itoa(ret), nil - } - - visit = func(e Expression) error { - switch e := e.(type) { - case *Literal: - if !e.IsScalarExpr() { - return errors.New("not implemented: serialization of non-scalar literals") - } - metaKey = append(metaKey, "literal") - s, err := addScalar(e.Literal.(*ScalarDatum).Value) - if err != nil { - return err - } - metaValue = append(metaValue, s) - case *Parameter: - if e.ref.Name() == "" { - return errors.New("not implemented: serialization of non-name field_ref") - } - - metaKey = append(metaKey, "field_ref") - metaValue = append(metaValue, e.ref.Name()) - case *Call: - metaKey = append(metaKey, "call") - metaValue = append(metaValue, e.funcName) - - for _, arg := range e.args { - visit(arg) - } - - if e.options != nil { - st, err := scalar.ToScalar(e.options, mem) - if err != nil { - return err - } - metaKey = append(metaKey, "options") - s, err := addScalar(st) - if err != nil { - return err - } - metaValue = append(metaValue, s) - - for _, f := range st.(*scalar.Struct).Value { - switch s := f.(type) { - case releasable: - defer s.Release() - } - } - } - - metaKey = append(metaKey, "end") - metaValue = append(metaValue, e.funcName) - } - return nil - } - - if err := visit(expr); err != nil { - return nil, err - } - - fields := make([]arrow.Field, len(cols)) - for i, c := range cols { - fields[i].Type = c.DataType() - defer c.Release() - } - - metadata := arrow.NewMetadata(metaKey, metaValue) - rec := array.NewRecord(arrow.NewSchema(fields, &metadata), cols, 1) - defer rec.Release() - - buf := &bufferWriteSeeker{mem: mem} - wr, err := ipc.NewFileWriter(buf, ipc.WithSchema(rec.Schema()), ipc.WithAllocator(mem)) - if err != nil { - return nil, err - } - - wr.Write(rec) - wr.Close() - return buf.buf, nil -} - -func DeserializeExpr(mem memory.Allocator, buf *memory.Buffer) (Expression, error) { - rdr, err := ipc.NewFileReader(bytes.NewReader(buf.Bytes()), ipc.WithAllocator(mem)) - if err != nil { - return nil, err - } - defer rdr.Close() - - batch, err := rdr.Read() - if err != nil { - return nil, err - } - - if !batch.Schema().HasMetadata() { - return nil, errors.New("serialized Expression's batch repr had no metadata") - } - - if batch.NumRows() != 1 { - return nil, fmt.Errorf("serialized Expression's batch repr was not a single row - had %d", batch.NumRows()) - } - - var ( - getone func() (Expression, error) - index int = 0 - metadata = batch.Schema().Metadata() - ) - - getscalar := func(i string) (scalar.Scalar, error) { - colIndex, err := strconv.ParseInt(i, 10, 32) - if err != nil { - return nil, err - } - if colIndex >= batch.NumCols() { - return nil, errors.New("column index out of bounds") - } - return scalar.GetScalar(batch.Column(int(colIndex)), 0) - } - - getone = func() (Expression, error) { - if index >= metadata.Len() { - return nil, errors.New("unterminated serialized Expression") - } - - key, val := metadata.Keys()[index], metadata.Values()[index] - index++ - - switch key { - case "literal": - scalar, err := getscalar(val) - if err != nil { - return nil, err - } - if r, ok := scalar.(releasable); ok { - defer r.Release() - } - return NewLiteral(scalar), err - case "field_ref": - return NewFieldRef(val), nil - case "call": - args := make([]Expression, 0) - for metadata.Keys()[index] != "end" { - if metadata.Keys()[index] == "options" { - optsScalar, err := getscalar(metadata.Values()[index]) - if err != nil { - return nil, err - } - if r, ok := optsScalar.(releasable); ok { - defer r.Release() - } - var opts FunctionOptions - if optsScalar != nil { - typname, err := optsScalar.(*scalar.Struct).Field("_type_name") - if err != nil { - return nil, err - } - if typname.DataType().ID() != arrow.BINARY { - return nil, errors.New("options scalar typename must be binary") - } - - optionsVal := reflect.New(funcOptionsMap[string(typname.(*scalar.Binary).Data())]).Interface() - if err := scalar.FromScalar(optsScalar.(*scalar.Struct), optionsVal); err != nil { - return nil, err - } - opts = optionsVal.(FunctionOptions) - } - index += 2 - return NewCall(val, args, opts), nil - } - - arg, err := getone() - if err != nil { - return nil, err - } - args = append(args, arg) - } - index++ - return NewCall(val, args, nil), nil - default: - return nil, fmt.Errorf("unrecognized serialized Expression key %s", key) - } - } - - return getone() -} diff --git a/go/arrow/compute/expression_test.go b/go/arrow/compute/expression_test.go deleted file mode 100644 index 1898bb3dc92b2..0000000000000 --- a/go/arrow/compute/expression_test.go +++ /dev/null @@ -1,259 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//go:build go1.18 - -package compute_test - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/compute" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/arrow/scalar" - "github.com/stretchr/testify/assert" -) - -func TestExpressionToString(t *testing.T) { - ts, _ := scalar.MakeScalar("1990-10-23 10:23:33.123456").CastTo(arrow.FixedWidthTypes.Timestamp_ns) - - add := compute.NewCall("add", []compute.Expression{compute.NewFieldRef("beta"), compute.NewLiteral(3)}, &compute.ArithmeticOptions{}) - - tests := []struct { - expr compute.Expression - expected string - }{ - {compute.NewFieldRef("alpha"), "alpha"}, - {compute.NewLiteral(3), "3"}, - {compute.NewLiteral("a"), `"a"`}, - {compute.NewLiteral("a\nb"), `"a\nb"`}, - {compute.NewLiteral(&scalar.Boolean{}), "null"}, - {compute.NewLiteral(&scalar.Int64{}), "null"}, - {compute.NewLiteral(scalar.NewBinaryScalar(memory.NewBufferBytes([]byte("az")), - arrow.BinaryTypes.Binary)), `"617A"`}, - {compute.NewLiteral(ts), "1990-10-23 10:23:33.123456"}, - {compute.NewCall("add", []compute.Expression{compute.NewLiteral(3), compute.NewFieldRef("beta")}, nil), "add(3, beta)"}, - {compute.And(compute.NewFieldRef("a"), compute.NewFieldRef("b")), "(a and b)"}, - {compute.Or(compute.NewFieldRef("a"), compute.NewFieldRef("b")), "(a or b)"}, - {compute.Not(compute.NewFieldRef("a")), "invert(a)"}, - {compute.Cast(compute.NewFieldRef("a"), arrow.PrimitiveTypes.Int32), - "cast(a, {to_type=int32, allow_int_overflow=false, allow_time_truncate=false, " + - "allow_time_overflow=false, allow_decimal_truncate=false, " + - "allow_float_truncate=false, allow_invalid_utf8=false})"}, - {compute.Cast(compute.NewFieldRef("a"), nil), - "cast(a, {to_type=null, allow_int_overflow=false, allow_time_truncate=false, " + - "allow_time_overflow=false, allow_decimal_truncate=false, " + - "allow_float_truncate=false, allow_invalid_utf8=false})"}, - {compute.Equal(compute.NewFieldRef("a"), compute.NewLiteral(1)), "(a == 1)"}, - {compute.Less(compute.NewFieldRef("a"), compute.NewLiteral(2)), "(a < 2)"}, - {compute.Greater(compute.NewFieldRef("a"), compute.NewLiteral(3)), "(a > 3)"}, - {compute.NotEqual(compute.NewFieldRef("a"), compute.NewLiteral("a")), `(a != "a")`}, - {compute.LessEqual(compute.NewFieldRef("a"), compute.NewLiteral("b")), `(a <= "b")`}, - {compute.GreaterEqual(compute.NewFieldRef("a"), compute.NewLiteral("c")), `(a >= "c")`}, - {compute.Project( - []compute.Expression{ - compute.NewFieldRef("a"), compute.NewFieldRef("a"), compute.NewLiteral(3), add, - }, []string{"a", "renamed_a", "three", "b"}), - "{a=a, renamed_a=a, three=3, b=" + add.String() + "}"}, - } - - for _, tt := range tests { - t.Run(tt.expected, func(t *testing.T) { - assert.Equal(t, tt.expected, tt.expr.String()) - }) - } -} - -func TestExpressionEquality(t *testing.T) { - tests := []struct { - exp1 compute.Expression - exp2 compute.Expression - equal bool - }{ - {compute.NewLiteral(1), compute.NewLiteral(1), true}, - {compute.NewLiteral(1), compute.NewLiteral(2), false}, - {compute.NewFieldRef("a"), compute.NewFieldRef("a"), true}, - {compute.NewFieldRef("a"), compute.NewFieldRef("b"), false}, - {compute.NewFieldRef("a"), compute.NewLiteral(2), false}, - {compute.NewCall("add", []compute.Expression{compute.NewLiteral(3), compute.NewLiteral("a")}, nil), - compute.NewCall("add", []compute.Expression{compute.NewLiteral(3), compute.NewLiteral("a")}, nil), true}, - {compute.NewCall("add", []compute.Expression{compute.NewLiteral(3), compute.NewLiteral("a")}, nil), - compute.NewCall("add", []compute.Expression{compute.NewLiteral(2), compute.NewLiteral("a")}, nil), false}, - {compute.NewCall("add", []compute.Expression{compute.NewLiteral(3), compute.NewLiteral("a")}, nil), - compute.NewCall("add", []compute.Expression{compute.NewFieldRef("a"), compute.NewLiteral(3)}, nil), false}, - {compute.NewCall("add", []compute.Expression{compute.NewLiteral(3), compute.NewLiteral("a")}, &compute.ArithmeticOptions{true}), - compute.NewCall("add", []compute.Expression{compute.NewLiteral(3), compute.NewLiteral("a")}, &compute.ArithmeticOptions{true}), true}, - {compute.NewCall("add", []compute.Expression{compute.NewLiteral(3), compute.NewLiteral("a")}, &compute.ArithmeticOptions{true}), - compute.NewCall("add", []compute.Expression{compute.NewLiteral(3), compute.NewLiteral("a")}, &compute.ArithmeticOptions{false}), false}, - {compute.Cast(compute.NewFieldRef("a"), arrow.PrimitiveTypes.Int32), compute.Cast(compute.NewFieldRef("a"), arrow.PrimitiveTypes.Int32), true}, - {compute.Cast(compute.NewFieldRef("a"), arrow.PrimitiveTypes.Int32), compute.Cast(compute.NewFieldRef("a"), arrow.PrimitiveTypes.Int64), false}, - {compute.Cast(compute.NewFieldRef("a"), arrow.PrimitiveTypes.Int32), compute.NewCall("cast", []compute.Expression{compute.NewFieldRef("a")}, compute.NewCastOptions(arrow.PrimitiveTypes.Int32, false)), false}, - } - - for _, tt := range tests { - t.Run(tt.exp1.String(), func(t *testing.T) { - assert.Equal(t, tt.equal, tt.exp1.Equals(tt.exp2)) - }) - } -} - -func TestExpressionHashing(t *testing.T) { - set := make(map[uint64]compute.Expression) - - e := compute.NewFieldRef("alpha") - set[e.Hash()] = e - - e = compute.NewFieldRef("beta") - _, ok := set[e.Hash()] - assert.False(t, ok) - set[e.Hash()] = e - - e = compute.NewFieldRef("beta") - ex, ok := set[e.Hash()] - assert.True(t, ok) - assert.True(t, e.Equals(ex)) - - e = compute.NewLiteral(1) - set[e.Hash()] = e - _, ok = set[compute.NewLiteral(1).Hash()] - assert.True(t, ok) - _, ok = set[compute.NewLiteral(3).Hash()] - assert.False(t, ok) - set[compute.NewLiteral(3).Hash()] = compute.NewLiteral(3) - - e = compute.NullLiteral(arrow.PrimitiveTypes.Int32) - set[e.Hash()] = e - _, ok = set[compute.NullLiteral(arrow.PrimitiveTypes.Int32).Hash()] - assert.True(t, ok) - e = compute.NullLiteral(arrow.PrimitiveTypes.Float32) - _, ok = set[e.Hash()] - assert.False(t, ok) - set[e.Hash()] = e - - e = compute.NewCall("add", []compute.Expression{}, nil) - set[e.Hash()] = e - _, ok = set[compute.NewCall("add", nil, nil).Hash()] - assert.True(t, ok) - e = compute.NewCall("widgetify", nil, nil) - _, ok = set[e.Hash()] - assert.False(t, ok) - set[e.Hash()] = e - - assert.Len(t, set, 8) -} - -func TestIsScalarExpression(t *testing.T) { - assert.True(t, compute.NewLiteral(true).IsScalarExpr()) - arr := array.MakeFromData(array.NewData(arrow.PrimitiveTypes.Int8, 0, []*memory.Buffer{nil, nil}, nil, 0, 0)) - defer arr.Release() - - assert.False(t, compute.NewLiteral(arr).IsScalarExpr()) - assert.True(t, compute.NewFieldRef("a").IsScalarExpr()) -} - -func TestExpressionIsSatisfiable(t *testing.T) { - assert.True(t, compute.NewLiteral(true).IsSatisfiable()) - assert.False(t, compute.NewLiteral(false).IsSatisfiable()) - - null := scalar.MakeNullScalar(arrow.FixedWidthTypes.Boolean) - assert.False(t, compute.NewLiteral(null).IsSatisfiable()) - assert.True(t, compute.NewFieldRef("a").IsSatisfiable()) - assert.True(t, compute.Equal(compute.NewFieldRef("a"), compute.NewLiteral(1)).IsSatisfiable()) - // no constant folding here - assert.True(t, compute.Equal(compute.NewLiteral(0), compute.NewLiteral(1)).IsSatisfiable()) - - // when a top level conjunction contains an Expression which is certain to - // evaluate to null, it can only evaluate to null or false - neverTrue := compute.And(compute.NewLiteral(null), compute.NewFieldRef("a")) - // this may appear in satisfiable filters if coalesced (for example, wrapped in fill_na) - assert.True(t, compute.NewCall("is_null", []compute.Expression{neverTrue}, nil).IsSatisfiable()) -} - -func TestExpressionSerializationRoundTrip(t *testing.T) { - bldr := array.NewInt32Builder(memory.DefaultAllocator) - defer bldr.Release() - - bldr.AppendValues([]int32{1, 2, 3}, nil) - lookupArr := bldr.NewArray() - defer lookupArr.Release() - - intvalueset := compute.NewDatum(lookupArr) - defer intvalueset.Release() - - bldr2 := array.NewFloat64Builder(memory.DefaultAllocator) - defer bldr2.Release() - - bldr2.AppendValues([]float64{0.5, 1.0, 2.0}, nil) - lookupArr = bldr2.NewArray() - defer lookupArr.Release() - - fltvalueset := compute.NewDatum(lookupArr) - defer fltvalueset.Release() - - tests := []struct { - name string - expr compute.Expression - }{ - {"null literal", compute.NewLiteral(scalar.MakeNullScalar(arrow.Null))}, - {"null int32 literal", compute.NewLiteral(scalar.MakeNullScalar(arrow.PrimitiveTypes.Int32))}, - {"null struct literal", compute.NewLiteral(scalar.MakeNullScalar(arrow.StructOf( - arrow.Field{Name: "i", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, - arrow.Field{Name: "s", Type: arrow.BinaryTypes.String, Nullable: true}, - )))}, - {"literal true", compute.NewLiteral(true)}, - {"literal false", compute.NewLiteral(false)}, - {"literal int", compute.NewLiteral(1)}, - {"literal float", compute.NewLiteral(1.125)}, - {"stringy strings", compute.NewLiteral("stringy strings")}, - {"field ref", compute.NewFieldRef("field")}, - {"greater", compute.Greater(compute.NewFieldRef("a"), compute.NewLiteral(0.25))}, - {"or", compute.Or( - compute.Equal(compute.NewFieldRef("a"), compute.NewLiteral(1)), - compute.NotEqual(compute.NewFieldRef("b"), compute.NewLiteral("hello")), - compute.Equal(compute.NewFieldRef("b"), compute.NewLiteral("foo bar")))}, - {"not", compute.Not(compute.NewFieldRef("alpha"))}, - {"is_in", compute.NewCall("is_in", []compute.Expression{compute.NewLiteral(1)}, &compute.SetLookupOptions{ValueSet: intvalueset})}, - {"is_in cast", compute.NewCall("is_in", []compute.Expression{ - compute.NewCall("cast", []compute.Expression{compute.NewFieldRef("version")}, compute.NewCastOptions(arrow.PrimitiveTypes.Float64, true))}, - &compute.SetLookupOptions{ValueSet: fltvalueset})}, - {"is valid", compute.IsValid(compute.NewFieldRef("validity"))}, - {"lots and", compute.And( - compute.And( - compute.GreaterEqual(compute.NewFieldRef("x"), compute.NewLiteral(-1.5)), - compute.Less(compute.NewFieldRef("x"), compute.NewLiteral(0.0))), - compute.And(compute.GreaterEqual(compute.NewFieldRef("y"), compute.NewLiteral(0.0)), - compute.Less(compute.NewFieldRef("y"), compute.NewLiteral(1.5))), - compute.And(compute.Greater(compute.NewFieldRef("z"), compute.NewLiteral(1.5)), - compute.LessEqual(compute.NewFieldRef("z"), compute.NewLiteral(3.0))))}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - serialized, err := compute.SerializeExpr(tt.expr, mem) - assert.NoError(t, err) - defer serialized.Release() - roundTripped, err := compute.DeserializeExpr(mem, serialized) - assert.NoError(t, err) - defer roundTripped.Release() - assert.Truef(t, tt.expr.Equals(roundTripped), "started with: %s, got: %s", tt.expr, roundTripped) - }) - } -} diff --git a/go/arrow/compute/exprs/builders.go b/go/arrow/compute/exprs/builders.go deleted file mode 100644 index a3af8dd6f287d..0000000000000 --- a/go/arrow/compute/exprs/builders.go +++ /dev/null @@ -1,445 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//go:build go1.18 - -package exprs - -import ( - "fmt" - "strconv" - "strings" - "unicode" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/compute" - "github.com/substrait-io/substrait-go/expr" - "github.com/substrait-io/substrait-go/extensions" - "github.com/substrait-io/substrait-go/types" -) - -// NewDefaultExtensionSet constructs an empty extension set using the default -// Arrow Extension registry and the default collection of substrait extensions -// from the Substrait-go repo. -func NewDefaultExtensionSet() ExtensionIDSet { - return NewExtensionSetDefault(expr.NewEmptyExtensionRegistry(&extensions.DefaultCollection)) -} - -// NewScalarCall constructs a substrait ScalarFunction expression with the provided -// options and arguments. -// -// The function name (fn) is looked up in the internal Arrow DefaultExtensionIDRegistry -// to ensure it exists and to convert from the Arrow function name to the substrait -// function name. It is then looked up using the DefaultCollection from the -// substrait extensions module to find the declaration. If it cannot be found, -// we try constructing the compound signature name by getting the types of the -// arguments which were passed and appending them to the function name appropriately. -// -// An error is returned if the function cannot be resolved. -func NewScalarCall(reg ExtensionIDSet, fn string, opts []*types.FunctionOption, args ...types.FuncArg) (*expr.ScalarFunction, error) { - conv, ok := reg.GetArrowRegistry().GetArrowToSubstrait(fn) - if !ok { - return nil, arrow.ErrNotFound - } - - id, convOpts, err := conv(fn) - if err != nil { - return nil, err - } - - opts = append(opts, convOpts...) - return expr.NewScalarFunc(reg.GetSubstraitRegistry(), id, opts, args...) -} - -// NewFieldRefFromDotPath constructs a substrait reference segment from -// a dot path and the base schema. -// -// dot_path = '.' name -// -// | '[' digit+ ']' -// | dot_path+ -// -// # Examples -// -// Assume root schema of {alpha: i32, beta: struct>, delta: map} -// -// ".alpha" => StructFieldRef(0) -// "[2]" => StructFieldRef(2) -// ".beta[0]" => StructFieldRef(1, StructFieldRef(0)) -// "[1].gamma[3]" => StructFieldRef(1, StructFieldRef(0, ListElementRef(3))) -// ".delta.foobar" => StructFieldRef(2, MapKeyRef("foobar")) -// -// Note: when parsing a name, a '\' preceding any other character -// will be dropped from the resulting name. Therefore if a name must -// contain the characters '.', '\', '[', or ']' then they must be escaped -// with a preceding '\'. -func NewFieldRefFromDotPath(dotpath string, rootSchema *arrow.Schema) (expr.ReferenceSegment, error) { - if len(dotpath) == 0 { - return nil, fmt.Errorf("%w dotpath was empty", arrow.ErrInvalid) - } - - parseName := func() string { - var name string - for { - idx := strings.IndexAny(dotpath, `\[.`) - if idx == -1 { - name += dotpath - dotpath = "" - break - } - - if dotpath[idx] != '\\' { - // subscript for a new field ref - name += dotpath[:idx] - dotpath = dotpath[idx:] - break - } - - if len(dotpath) == idx+1 { - // dotpath ends with a backslash; consume it all - name += dotpath - dotpath = "" - break - } - - // append all characters before backslash, then the character which follows it - name += dotpath[:idx] + string(dotpath[idx+1]) - dotpath = dotpath[idx+2:] - } - return name - } - - var curType arrow.DataType = arrow.StructOf(rootSchema.Fields()...) - children := make([]expr.ReferenceSegment, 0) - - for len(dotpath) > 0 { - subscript := dotpath[0] - dotpath = dotpath[1:] - switch subscript { - case '.': - // next element is a name - n := parseName() - switch ct := curType.(type) { - case *arrow.StructType: - idx, found := ct.FieldIdx(n) - if !found { - return nil, fmt.Errorf("%w: dot path '%s' referenced invalid field", arrow.ErrInvalid, dotpath) - } - children = append(children, &expr.StructFieldRef{Field: int32(idx)}) - curType = ct.Field(idx).Type - case *arrow.MapType: - curType = ct.KeyType() - switch ct.KeyType().ID() { - case arrow.BINARY, arrow.LARGE_BINARY: - children = append(children, &expr.MapKeyRef{MapKey: expr.NewByteSliceLiteral([]byte(n), false)}) - case arrow.STRING, arrow.LARGE_STRING: - children = append(children, &expr.MapKeyRef{MapKey: expr.NewPrimitiveLiteral(n, false)}) - default: - return nil, fmt.Errorf("%w: MapKeyRef to non-binary/string map not supported", arrow.ErrNotImplemented) - } - default: - return nil, fmt.Errorf("%w: dot path names must refer to struct fields or map keys", arrow.ErrInvalid) - } - case '[': - subend := strings.IndexFunc(dotpath, func(r rune) bool { return !unicode.IsDigit(r) }) - if subend == -1 || dotpath[subend] != ']' { - return nil, fmt.Errorf("%w: dot path '%s' contained an unterminated index", arrow.ErrInvalid, dotpath) - } - idx, _ := strconv.Atoi(dotpath[:subend]) - switch ct := curType.(type) { - case *arrow.StructType: - if idx > ct.NumFields() { - return nil, fmt.Errorf("%w: field out of bounds in dotpath", arrow.ErrIndex) - } - curType = ct.Field(idx).Type - children = append(children, &expr.StructFieldRef{Field: int32(idx)}) - case *arrow.MapType: - curType = ct.KeyType() - var keyLiteral expr.Literal - // TODO: implement user defined types and variations - switch ct.KeyType().ID() { - case arrow.INT8: - keyLiteral = expr.NewPrimitiveLiteral(int8(idx), false) - case arrow.INT16: - keyLiteral = expr.NewPrimitiveLiteral(int16(idx), false) - case arrow.INT32: - keyLiteral = expr.NewPrimitiveLiteral(int32(idx), false) - case arrow.INT64: - keyLiteral = expr.NewPrimitiveLiteral(int64(idx), false) - case arrow.FLOAT32: - keyLiteral = expr.NewPrimitiveLiteral(float32(idx), false) - case arrow.FLOAT64: - keyLiteral = expr.NewPrimitiveLiteral(float64(idx), false) - default: - return nil, fmt.Errorf("%w: dotpath ref to map key type %s", arrow.ErrNotImplemented, ct.KeyType()) - } - children = append(children, &expr.MapKeyRef{MapKey: keyLiteral}) - case *arrow.ListType: - curType = ct.Elem() - children = append(children, &expr.ListElementRef{Offset: int32(idx)}) - case *arrow.LargeListType: - curType = ct.Elem() - children = append(children, &expr.ListElementRef{Offset: int32(idx)}) - case *arrow.FixedSizeListType: - curType = ct.Elem() - children = append(children, &expr.ListElementRef{Offset: int32(idx)}) - default: - return nil, fmt.Errorf("%w: %s type not supported for dotpath ref", arrow.ErrInvalid, ct) - } - dotpath = dotpath[subend+1:] - default: - return nil, fmt.Errorf("%w: dot path must begin with '[' or '.' got '%s'", - arrow.ErrInvalid, dotpath) - } - } - - out := children[0] - if len(children) > 1 { - cur := out - for _, c := range children[1:] { - switch r := cur.(type) { - case *expr.StructFieldRef: - r.Child = c - case *expr.MapKeyRef: - r.Child = c - case *expr.ListElementRef: - r.Child = c - } - cur = c - } - } - - return out, nil -} - -// RefFromFieldPath constructs a substrait field reference segment -// from a compute.FieldPath which should be a slice of integers -// indicating nested field paths to travel. This will return a -// series of StructFieldRef's whose child is the next element in -// the field path. -func RefFromFieldPath(field compute.FieldPath) expr.ReferenceSegment { - if len(field) == 0 { - return nil - } - - seg := expr.NewStructFieldRef(int32(field[0])) - parent := seg - for _, ref := range field[1:] { - next := expr.NewStructFieldRef(int32(ref)) - parent.Child = next - parent = next - } - - return seg -} - -// NewFieldRef constructs a properly typed substrait field reference segment, -// from a given arrow field reference, schema and extension set (for resolving -// substrait types). -func NewFieldRef(ref compute.FieldRef, schema *arrow.Schema, ext ExtensionIDSet) (*expr.FieldReference, error) { - path, err := ref.FindOne(schema) - if err != nil { - return nil, err - } - - st, err := ToSubstraitType(arrow.StructOf(schema.Fields()...), false, ext) - if err != nil { - return nil, err - } - - return expr.NewRootFieldRef(RefFromFieldPath(path), st.(*types.StructType)) -} - -// Builder wraps the substrait-go expression Builder and FuncArgBuilder -// interfaces for a simple interface that can be passed around to build -// substrait expressions from Arrow data. -type Builder interface { - expr.Builder - expr.FuncArgBuilder -} - -// ExprBuilder is the parent for building substrait expressions -// via Arrow types and functions. -// -// The expectation is that it should be utilized like so: -// -// bldr := NewExprBuilder(extSet) -// bldr.SetInputSchema(arrowschema) -// call, err := bldr.CallScalar("equal", nil, -// bldr.FieldRef("i32"), -// bldr.Literal(expr.NewPrimitiveLiteral( -// int32(0), false))) -// ex, err := call.BuildExpr() -// ... -// result, err := exprs.ExecuteScalarExpression(ctx, arrowschema, -// ex, input) -type ExprBuilder struct { - b expr.ExprBuilder - extSet ExtensionIDSet - inputSchema *arrow.Schema -} - -// NewExprBuilder constructs a new Expression Builder that will use the -// provided extension set and registry. -func NewExprBuilder(extSet ExtensionIDSet) ExprBuilder { - return ExprBuilder{ - b: expr.ExprBuilder{Reg: extSet.GetSubstraitRegistry()}, - extSet: extSet, - } -} - -// SetInputSchema sets the current Arrow schema that will be utilized -// for performing field reference and field type resolutions. -func (e *ExprBuilder) SetInputSchema(s *arrow.Schema) error { - st, err := ToSubstraitType(arrow.StructOf(s.Fields()...), false, e.extSet) - if err != nil { - return err - } - - e.inputSchema = s - e.b.BaseSchema = st.(*types.StructType) - return nil -} - -// MustCallScalar is like CallScalar, but will panic on error rather than -// return it. -func (e *ExprBuilder) MustCallScalar(fn string, opts []*types.FunctionOption, args ...expr.FuncArgBuilder) Builder { - b, err := e.CallScalar(fn, opts, args...) - if err != nil { - panic(err) - } - return b -} - -// CallScalar constructs a builder for a scalar function call. The function -// name is expected to be valid in the Arrow function registry which will -// map it properly to a substrait expression by resolving the types of -// the arguments. Examples are: "greater", "multiply", "equal", etc. -// -// Can return arrow.ErrNotFound if there is no function mapping found. -// Or will forward any error encountered when converting from an Arrow -// function to a substrait one. -func (e *ExprBuilder) CallScalar(fn string, opts []*types.FunctionOption, args ...expr.FuncArgBuilder) (Builder, error) { - conv, ok := e.extSet.GetArrowRegistry().GetArrowToSubstrait(fn) - if !ok { - return nil, arrow.ErrNotFound - } - - id, convOpts, err := conv(fn) - if err != nil { - return nil, err - } - - opts = append(opts, convOpts...) - return e.b.ScalarFunc(id, opts...).Args(args...), nil -} - -// FieldPath uses a field path to construct a Field Reference -// expression. -func (e *ExprBuilder) FieldPath(path compute.FieldPath) Builder { - segments := make([]expr.ReferenceSegment, len(path)) - for i, p := range path { - segments[i] = expr.NewStructFieldRef(int32(p)) - } - - return e.b.RootRef(expr.FlattenRefSegments(segments...)) -} - -// FieldIndex is shorthand for creating a single field reference -// to the struct field index provided. -func (e *ExprBuilder) FieldIndex(i int) Builder { - return e.b.RootRef(expr.NewStructFieldRef(int32(i))) -} - -// FieldRef constructs a field reference expression to the field with -// the given name from the input. It will be resolved to a field -// index when calling BuildExpr. -func (e *ExprBuilder) FieldRef(field string) Builder { - return &refBuilder{eb: e, fieldRef: compute.FieldRefName(field)} -} - -// FieldRefList accepts a list of either integers or strings to -// construct a field reference expression from. This will panic -// if any of elems are not a string or int. -// -// Field names will be resolved to their indexes when BuildExpr is called -// by using the provided Arrow schema. -func (e *ExprBuilder) FieldRefList(elems ...any) Builder { - return &refBuilder{eb: e, fieldRef: compute.FieldRefList(elems...)} -} - -// Literal wraps a substrait literal to be used as an argument to -// building other expressions. -func (e *ExprBuilder) Literal(l expr.Literal) Builder { - return e.b.Literal(l) -} - -// WrapLiteral is a convenience for accepting functions like NewLiteral -// which can potentially return an error. If an error is encountered, -// it will be surfaced when BuildExpr is called. -func (e *ExprBuilder) WrapLiteral(l expr.Literal, err error) Builder { - return e.b.Wrap(l, err) -} - -// Must is a convenience wrapper for any method that returns a Builder -// and error, panic'ing if it received an error or otherwise returning -// the Builder. -func (*ExprBuilder) Must(b Builder, err error) Builder { - if err != nil { - panic(err) - } - return b -} - -// Cast returns a Cast expression with the FailBehavior of ThrowException, -// erroring for invalid casts. -func (e *ExprBuilder) Cast(from Builder, to arrow.DataType) (Builder, error) { - t, err := ToSubstraitType(to, true, e.extSet) - if err != nil { - return nil, err - } - - return e.b.Cast(from, t).FailBehavior(types.BehaviorThrowException), nil -} - -type refBuilder struct { - eb *ExprBuilder - - fieldRef compute.FieldRef -} - -func (r *refBuilder) BuildFuncArg() (types.FuncArg, error) { - return r.BuildExpr() -} - -func (r *refBuilder) BuildExpr() (expr.Expression, error) { - if r.eb.inputSchema == nil { - return nil, fmt.Errorf("%w: no input schema specified for ref", arrow.ErrInvalid) - } - - path, err := r.fieldRef.FindOne(r.eb.inputSchema) - if err != nil { - return nil, err - } - - segments := make([]expr.ReferenceSegment, len(path)) - for i, p := range path { - segments[i] = expr.NewStructFieldRef(int32(p)) - } - - return r.eb.b.RootRef(expr.FlattenRefSegments(segments...)).Build() -} diff --git a/go/arrow/compute/exprs/builders_test.go b/go/arrow/compute/exprs/builders_test.go deleted file mode 100644 index 21ad3bd642030..0000000000000 --- a/go/arrow/compute/exprs/builders_test.go +++ /dev/null @@ -1,92 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package exprs_test - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/compute/exprs" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "github.com/substrait-io/substrait-go/expr" -) - -func TestNewScalarFunc(t *testing.T) { - reg := exprs.NewDefaultExtensionSet() - - fn, err := exprs.NewScalarCall(reg, "add", nil, - expr.NewPrimitiveLiteral(int32(1), false), - expr.NewPrimitiveLiteral(int32(10), false)) - require.NoError(t, err) - - assert.Equal(t, "add(i32(1), i32(10), {overflow: [ERROR]}) => i32", fn.String()) - assert.Equal(t, "add:i32_i32", fn.CompoundName()) -} - -func TestFieldRefDotPath(t *testing.T) { - f0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f1_0 := arrow.Field{Name: "be.ta", Type: arrow.PrimitiveTypes.Int32} - f1 := arrow.Field{Name: "beta", Type: arrow.StructOf(f1_0)} - f2_0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f2_1_0 := arrow.Field{Name: "[alpha]", Type: arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32)} - f2_1_1 := arrow.Field{Name: "beta", Type: arrow.ListOf(arrow.PrimitiveTypes.Int32)} - f2_1 := arrow.Field{Name: "gamma", Type: arrow.StructOf(f2_1_0, f2_1_1)} - f2 := arrow.Field{Name: "gamma", Type: arrow.StructOf(f2_0, f2_1)} - s := arrow.NewSchema([]arrow.Field{f0, f1, f2}, nil) - - tests := []struct { - dotpath string - shouldErr bool - expected expr.ReferenceSegment - }{ - {".alpha", false, &expr.StructFieldRef{Field: 0}}, - {"[2]", false, &expr.StructFieldRef{Field: 2}}, - {".beta[0]", false, &expr.StructFieldRef{Field: 1, Child: &expr.StructFieldRef{Field: 0}}}, - {"[2].gamma[1][5]", false, &expr.StructFieldRef{Field: 2, - Child: &expr.StructFieldRef{Field: 1, - Child: &expr.StructFieldRef{Field: 1, - Child: &expr.ListElementRef{Offset: 5}}}}}, - {"[2].gamma[0].foobar", false, &expr.StructFieldRef{Field: 2, - Child: &expr.StructFieldRef{Field: 1, - Child: &expr.StructFieldRef{Field: 0, - Child: &expr.MapKeyRef{MapKey: expr.NewPrimitiveLiteral("foobar", false)}}}}}, - {`[1].be\.ta`, false, &expr.StructFieldRef{Field: 1, Child: &expr.StructFieldRef{Field: 0}}}, - {`[2].gamma.\[alpha\]`, false, &expr.StructFieldRef{Field: 2, - Child: &expr.StructFieldRef{Field: 1, - Child: &expr.StructFieldRef{Field: 0}}}}, - {`[5]`, true, nil}, // bad struct index - {``, true, nil}, // empty - {`delta`, true, nil}, // not found - {`[1234`, true, nil}, // bad syntax - {`[1stuf]`, true, nil}, // bad syntax - } - - for _, tt := range tests { - t.Run(tt.dotpath, func(t *testing.T) { - ref, err := exprs.NewFieldRefFromDotPath(tt.dotpath, s) - if tt.shouldErr { - assert.Error(t, err) - } else { - assert.NoError(t, err) - assert.Truef(t, tt.expected.Equals(ref), "expected: %s\ngot: %s", tt.expected, ref) - } - }) - } -} diff --git a/go/arrow/compute/exprs/exec.go b/go/arrow/compute/exprs/exec.go deleted file mode 100644 index 850acbb3cd492..0000000000000 --- a/go/arrow/compute/exprs/exec.go +++ /dev/null @@ -1,620 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package exprs - -import ( - "context" - "fmt" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/compute" - "github.com/apache/arrow/go/v18/arrow/compute/exec" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/endian" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/arrow/scalar" - "github.com/substrait-io/substrait-go/expr" - "github.com/substrait-io/substrait-go/extensions" - "github.com/substrait-io/substrait-go/types" -) - -func makeExecBatch(ctx context.Context, schema *arrow.Schema, partial compute.Datum) (out compute.ExecBatch, err error) { - // cleanup if we get an error - defer func() { - if err != nil { - for _, v := range out.Values { - if v != nil { - v.Release() - } - } - } - }() - - if partial.Kind() == compute.KindRecord { - partialBatch := partial.(*compute.RecordDatum).Value - batchSchema := partialBatch.Schema() - - out.Values = make([]compute.Datum, schema.NumFields()) - out.Len = partialBatch.NumRows() - - for i, field := range schema.Fields() { - idxes := batchSchema.FieldIndices(field.Name) - switch len(idxes) { - case 0: - out.Values[i] = compute.NewDatum(scalar.MakeNullScalar(field.Type)) - case 1: - col := partialBatch.Column(idxes[0]) - if !arrow.TypeEqual(col.DataType(), field.Type) { - // referenced field was present but didn't have expected type - // we'll cast this case for now - col, err = compute.CastArray(ctx, col, compute.SafeCastOptions(field.Type)) - if err != nil { - return compute.ExecBatch{}, err - } - defer col.Release() - } - out.Values[i] = compute.NewDatum(col) - default: - err = fmt.Errorf("%w: exec batch field '%s' ambiguous, more than one match", - arrow.ErrInvalid, field.Name) - return compute.ExecBatch{}, err - } - } - return - } - - part, ok := partial.(compute.ArrayLikeDatum) - if !ok { - return out, fmt.Errorf("%w: MakeExecBatch from %s", arrow.ErrNotImplemented, partial) - } - - // wasteful but useful for testing - if part.Type().ID() == arrow.STRUCT { - switch part := part.(type) { - case *compute.ArrayDatum: - arr := part.MakeArray().(*array.Struct) - defer arr.Release() - - batch := array.RecordFromStructArray(arr, nil) - defer batch.Release() - return makeExecBatch(ctx, schema, compute.NewDatumWithoutOwning(batch)) - case *compute.ScalarDatum: - out.Len = 1 - out.Values = make([]compute.Datum, schema.NumFields()) - - s := part.Value.(*scalar.Struct) - dt := s.Type.(*arrow.StructType) - - for i, field := range schema.Fields() { - idx, found := dt.FieldIdx(field.Name) - if !found { - out.Values[i] = compute.NewDatum(scalar.MakeNullScalar(field.Type)) - continue - } - - val := s.Value[idx] - if !arrow.TypeEqual(val.DataType(), field.Type) { - // referenced field was present but didn't have the expected - // type. for now we'll cast this - val, err = val.CastTo(field.Type) - if err != nil { - return compute.ExecBatch{}, err - } - } - out.Values[i] = compute.NewDatum(val) - } - return - } - } - - return out, fmt.Errorf("%w: MakeExecBatch from %s", arrow.ErrNotImplemented, partial) -} - -// ToArrowSchema takes a substrait NamedStruct and an extension set (for -// type resolution mapping) and creates the equivalent Arrow Schema. -func ToArrowSchema(base types.NamedStruct, ext ExtensionIDSet) (*arrow.Schema, error) { - fields := make([]arrow.Field, len(base.Names)) - for i, typ := range base.Struct.Types { - dt, nullable, err := FromSubstraitType(typ, ext) - if err != nil { - return nil, err - } - fields[i] = arrow.Field{ - Name: base.Names[i], - Type: dt, - Nullable: nullable, - } - } - - return arrow.NewSchema(fields, nil), nil -} - -type ( - regCtxKey struct{} - extCtxKey struct{} -) - -func WithExtensionRegistry(ctx context.Context, reg *ExtensionIDRegistry) context.Context { - return context.WithValue(ctx, regCtxKey{}, reg) -} - -func GetExtensionRegistry(ctx context.Context) *ExtensionIDRegistry { - v, ok := ctx.Value(regCtxKey{}).(*ExtensionIDRegistry) - if !ok { - v = DefaultExtensionIDRegistry - } - return v -} - -func WithExtensionIDSet(ctx context.Context, ext ExtensionIDSet) context.Context { - return context.WithValue(ctx, extCtxKey{}, ext) -} - -func GetExtensionIDSet(ctx context.Context) ExtensionIDSet { - v, ok := ctx.Value(extCtxKey{}).(ExtensionIDSet) - if !ok { - return NewExtensionSet( - expr.NewEmptyExtensionRegistry(&extensions.DefaultCollection), - GetExtensionRegistry(ctx)) - } - return v -} - -func literalToDatum(mem memory.Allocator, lit expr.Literal, ext ExtensionIDSet) (compute.Datum, error) { - switch v := lit.(type) { - case *expr.PrimitiveLiteral[bool]: - return compute.NewDatum(scalar.NewBooleanScalar(v.Value)), nil - case *expr.PrimitiveLiteral[int8]: - return compute.NewDatum(scalar.NewInt8Scalar(v.Value)), nil - case *expr.PrimitiveLiteral[int16]: - return compute.NewDatum(scalar.NewInt16Scalar(v.Value)), nil - case *expr.PrimitiveLiteral[int32]: - return compute.NewDatum(scalar.NewInt32Scalar(v.Value)), nil - case *expr.PrimitiveLiteral[int64]: - return compute.NewDatum(scalar.NewInt64Scalar(v.Value)), nil - case *expr.PrimitiveLiteral[float32]: - return compute.NewDatum(scalar.NewFloat32Scalar(v.Value)), nil - case *expr.PrimitiveLiteral[float64]: - return compute.NewDatum(scalar.NewFloat64Scalar(v.Value)), nil - case *expr.PrimitiveLiteral[string]: - return compute.NewDatum(scalar.NewStringScalar(v.Value)), nil - case *expr.PrimitiveLiteral[types.Timestamp]: - return compute.NewDatum(scalar.NewTimestampScalar(arrow.Timestamp(v.Value), &arrow.TimestampType{Unit: arrow.Microsecond})), nil - case *expr.PrimitiveLiteral[types.TimestampTz]: - return compute.NewDatum(scalar.NewTimestampScalar(arrow.Timestamp(v.Value), - &arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: TimestampTzTimezone})), nil - case *expr.PrimitiveLiteral[types.Date]: - return compute.NewDatum(scalar.NewDate32Scalar(arrow.Date32(v.Value))), nil - case *expr.PrimitiveLiteral[types.Time]: - return compute.NewDatum(scalar.NewTime64Scalar(arrow.Time64(v.Value), &arrow.Time64Type{Unit: arrow.Microsecond})), nil - case *expr.PrimitiveLiteral[types.FixedChar]: - length := int(v.Type.(*types.FixedCharType).Length) - return compute.NewDatum(scalar.NewExtensionScalar( - scalar.NewFixedSizeBinaryScalar(memory.NewBufferBytes([]byte(v.Value)), - &arrow.FixedSizeBinaryType{ByteWidth: length}), fixedChar(int32(length)))), nil - case *expr.ByteSliceLiteral[[]byte]: - return compute.NewDatum(scalar.NewBinaryScalar(memory.NewBufferBytes(v.Value), arrow.BinaryTypes.Binary)), nil - case *expr.ByteSliceLiteral[types.UUID]: - return compute.NewDatum(scalar.NewExtensionScalar(scalar.NewFixedSizeBinaryScalar( - memory.NewBufferBytes(v.Value), uuid().(arrow.ExtensionType).StorageType()), uuid())), nil - case *expr.ByteSliceLiteral[types.FixedBinary]: - return compute.NewDatum(scalar.NewFixedSizeBinaryScalar(memory.NewBufferBytes(v.Value), - &arrow.FixedSizeBinaryType{ByteWidth: int(v.Type.(*types.FixedBinaryType).Length)})), nil - case *expr.NullLiteral: - dt, _, err := FromSubstraitType(v.Type, ext) - if err != nil { - return nil, err - } - return compute.NewDatum(scalar.MakeNullScalar(dt)), nil - case *expr.ListLiteral: - var elemType arrow.DataType - - values := make([]scalar.Scalar, len(v.Value)) - for i, val := range v.Value { - d, err := literalToDatum(mem, val, ext) - if err != nil { - return nil, err - } - defer d.Release() - values[i] = d.(*compute.ScalarDatum).Value - if elemType != nil { - if !arrow.TypeEqual(values[i].DataType(), elemType) { - return nil, fmt.Errorf("%w: %s has a value whose type doesn't match the other list values", - arrow.ErrInvalid, v) - } - } else { - elemType = values[i].DataType() - } - } - - bldr := array.NewBuilder(memory.DefaultAllocator, elemType) - defer bldr.Release() - if err := scalar.AppendSlice(bldr, values); err != nil { - return nil, err - } - arr := bldr.NewArray() - defer arr.Release() - return compute.NewDatum(scalar.NewListScalar(arr)), nil - case *expr.MapLiteral: - dt, _, err := FromSubstraitType(v.Type, ext) - if err != nil { - return nil, err - } - - mapType, ok := dt.(*arrow.MapType) - if !ok { - return nil, fmt.Errorf("%w: map literal with non-map type", arrow.ErrInvalid) - } - - keys, values := make([]scalar.Scalar, len(v.Value)), make([]scalar.Scalar, len(v.Value)) - for i, kv := range v.Value { - k, err := literalToDatum(mem, kv.Key, ext) - if err != nil { - return nil, err - } - defer k.Release() - scalarKey := k.(*compute.ScalarDatum).Value - - v, err := literalToDatum(mem, kv.Value, ext) - if err != nil { - return nil, err - } - defer v.Release() - scalarValue := v.(*compute.ScalarDatum).Value - - if !arrow.TypeEqual(mapType.KeyType(), scalarKey.DataType()) { - return nil, fmt.Errorf("%w: key type mismatch for %s, got key with type %s", - arrow.ErrInvalid, mapType, scalarKey.DataType()) - } - if !arrow.TypeEqual(mapType.ItemType(), scalarValue.DataType()) { - return nil, fmt.Errorf("%w: value type mismatch for %s, got value with type %s", - arrow.ErrInvalid, mapType, scalarValue.DataType()) - } - - keys[i], values[i] = scalarKey, scalarValue - } - - keyBldr, valBldr := array.NewBuilder(mem, mapType.KeyType()), array.NewBuilder(mem, mapType.ItemType()) - defer keyBldr.Release() - defer valBldr.Release() - - if err := scalar.AppendSlice(keyBldr, keys); err != nil { - return nil, err - } - if err := scalar.AppendSlice(valBldr, values); err != nil { - return nil, err - } - - keyArr, valArr := keyBldr.NewArray(), valBldr.NewArray() - defer keyArr.Release() - defer valArr.Release() - - kvArr, err := array.NewStructArray([]arrow.Array{keyArr, valArr}, []string{"key", "value"}) - if err != nil { - return nil, err - } - defer kvArr.Release() - - return compute.NewDatumWithoutOwning(scalar.NewMapScalar(kvArr)), nil - case *expr.StructLiteral: - fields := make([]scalar.Scalar, len(v.Value)) - names := make([]string, len(v.Value)) - - for i, l := range v.Value { - lit, err := literalToDatum(mem, l, ext) - if err != nil { - return nil, err - } - fields[i] = lit.(*compute.ScalarDatum).Value - } - - s, err := scalar.NewStructScalarWithNames(fields, names) - return compute.NewDatum(s), err - case *expr.ProtoLiteral: - switch v := v.Value.(type) { - case *types.Decimal: - if len(v.Value) != arrow.Decimal128SizeBytes { - return nil, fmt.Errorf("%w: decimal literal had %d bytes (expected %d)", - arrow.ErrInvalid, len(v.Value), arrow.Decimal128SizeBytes) - } - - var val decimal128.Num - data := (*(*[arrow.Decimal128SizeBytes]byte)(unsafe.Pointer(&val)))[:] - copy(data, v.Value) - if endian.IsBigEndian { - // reverse the bytes - for i := len(data)/2 - 1; i >= 0; i-- { - opp := len(data) - 1 - i - data[i], data[opp] = data[opp], data[i] - } - } - - return compute.NewDatum(scalar.NewDecimal128Scalar(val, - &arrow.Decimal128Type{Precision: v.Precision, Scale: v.Scale})), nil - case *types.UserDefinedLiteral: // not yet implemented - case *types.IntervalYearToMonth: - bldr := array.NewInt32Builder(memory.DefaultAllocator) - defer bldr.Release() - typ := intervalYear() - bldr.Append(v.Years) - bldr.Append(v.Months) - arr := bldr.NewArray() - defer arr.Release() - return &compute.ScalarDatum{Value: scalar.NewExtensionScalar( - scalar.NewFixedSizeListScalar(arr), typ)}, nil - case *types.IntervalDayToSecond: - bldr := array.NewInt32Builder(memory.DefaultAllocator) - defer bldr.Release() - typ := intervalDay() - bldr.Append(v.Days) - bldr.Append(v.Seconds) - arr := bldr.NewArray() - defer arr.Release() - return &compute.ScalarDatum{Value: scalar.NewExtensionScalar( - scalar.NewFixedSizeListScalar(arr), typ)}, nil - case *types.VarChar: - return compute.NewDatum(scalar.NewExtensionScalar( - scalar.NewStringScalar(v.Value), varChar(int32(v.Length)))), nil - } - } - - return nil, arrow.ErrNotImplemented -} - -// ExecuteScalarExpression executes the given substrait expression using the provided datum as input. -// It will first create an exec batch using the input schema and the datum. -// The datum may have missing or incorrectly ordered columns while the input schema -// should describe the expected input schema for the expression. Missing fields will -// be replaced with null scalars and incorrectly ordered columns will be re-ordered -// according to the schema. -// -// You can provide an allocator to use through the context via compute.WithAllocator. -// -// You can provide the ExtensionIDSet to use through the context via WithExtensionIDSet. -func ExecuteScalarExpression(ctx context.Context, inputSchema *arrow.Schema, expression expr.Expression, partialInput compute.Datum) (compute.Datum, error) { - if expression == nil { - return nil, arrow.ErrInvalid - } - - batch, err := makeExecBatch(ctx, inputSchema, partialInput) - if err != nil { - return nil, err - } - defer func() { - for _, v := range batch.Values { - v.Release() - } - }() - - return executeScalarBatch(ctx, batch, expression, GetExtensionIDSet(ctx)) -} - -// ExecuteScalarSubstrait uses the provided Substrait extended expression to -// determine the expected input schema (replacing missing fields in the partial -// input datum with null scalars and re-ordering columns if necessary) and -// ExtensionIDSet to use. You can provide the extension registry to use -// through the context via WithExtensionRegistry, otherwise the default -// Arrow registry will be used. You can provide a memory.Allocator to use -// the same way via compute.WithAllocator. -func ExecuteScalarSubstrait(ctx context.Context, expression *expr.Extended, partialInput compute.Datum) (compute.Datum, error) { - if expression == nil { - return nil, arrow.ErrInvalid - } - - var toExecute expr.Expression - - switch len(expression.ReferredExpr) { - case 0: - return nil, fmt.Errorf("%w: no referred expression to execute", arrow.ErrInvalid) - case 1: - if toExecute = expression.ReferredExpr[0].GetExpr(); toExecute == nil { - return nil, fmt.Errorf("%w: measures not implemented", arrow.ErrNotImplemented) - } - default: - return nil, fmt.Errorf("%w: only single referred expression implemented", arrow.ErrNotImplemented) - } - - reg := GetExtensionRegistry(ctx) - set := NewExtensionSet(expr.NewExtensionRegistry(expression.Extensions, &extensions.DefaultCollection), reg) - sc, err := ToArrowSchema(expression.BaseSchema, set) - if err != nil { - return nil, err - } - - return ExecuteScalarExpression(WithExtensionIDSet(ctx, set), sc, toExecute, partialInput) -} - -func execFieldRef(ctx context.Context, e *expr.FieldReference, input compute.ExecBatch, ext ExtensionIDSet) (compute.Datum, error) { - if e.Root != expr.RootReference { - return nil, fmt.Errorf("%w: only RootReference is implemented", arrow.ErrNotImplemented) - } - - ref, ok := e.Reference.(expr.ReferenceSegment) - if !ok { - return nil, fmt.Errorf("%w: only direct references are implemented", arrow.ErrNotImplemented) - } - - expectedType, _, err := FromSubstraitType(e.GetType(), ext) - if err != nil { - return nil, err - } - - var param compute.Datum - if sref, ok := ref.(*expr.StructFieldRef); ok { - if sref.Field < 0 || sref.Field >= int32(len(input.Values)) { - return nil, arrow.ErrInvalid - } - param = input.Values[sref.Field] - ref = ref.GetChild() - } - - out, err := GetReferencedValue(compute.GetAllocator(ctx), ref, param, ext) - if err == compute.ErrEmpty { - out = compute.NewDatum(param) - } else if err != nil { - return nil, err - } - if !arrow.TypeEqual(out.(compute.ArrayLikeDatum).Type(), expectedType) { - return nil, fmt.Errorf("%w: referenced field %s was %s, but should have been %s", - arrow.ErrInvalid, ref, out.(compute.ArrayLikeDatum).Type(), expectedType) - } - - return out, nil -} - -func executeScalarBatch(ctx context.Context, input compute.ExecBatch, exp expr.Expression, ext ExtensionIDSet) (compute.Datum, error) { - if !exp.IsScalar() { - return nil, fmt.Errorf("%w: ExecuteScalarExpression cannot execute non-scalar expressions", - arrow.ErrInvalid) - } - - switch e := exp.(type) { - case expr.Literal: - return literalToDatum(compute.GetAllocator(ctx), e, ext) - case *expr.FieldReference: - return execFieldRef(ctx, e, input, ext) - case *expr.Cast: - if e.Input == nil { - return nil, fmt.Errorf("%w: cast without argument to cast", arrow.ErrInvalid) - } - - arg, err := executeScalarBatch(ctx, input, e.Input, ext) - if err != nil { - return nil, err - } - defer arg.Release() - - dt, _, err := FromSubstraitType(e.Type, ext) - if err != nil { - return nil, fmt.Errorf("%w: could not determine type for cast", err) - } - - var opts *compute.CastOptions - switch e.FailureBehavior { - case types.BehaviorThrowException: - opts = compute.UnsafeCastOptions(dt) - case types.BehaviorUnspecified: - return nil, fmt.Errorf("%w: cast behavior unspecified", arrow.ErrInvalid) - case types.BehaviorReturnNil: - return nil, fmt.Errorf("%w: cast behavior return nil", arrow.ErrNotImplemented) - } - return compute.CastDatum(ctx, arg, opts) - case *expr.ScalarFunction: - var ( - err error - allScalar = true - args = make([]compute.Datum, e.NArgs()) - argTypes = make([]arrow.DataType, e.NArgs()) - ) - for i := 0; i < e.NArgs(); i++ { - switch v := e.Arg(i).(type) { - case types.Enum: - args[i] = compute.NewDatum(scalar.NewStringScalar(string(v))) - case expr.Expression: - args[i], err = executeScalarBatch(ctx, input, v, ext) - if err != nil { - return nil, err - } - defer args[i].Release() - - if args[i].Kind() != compute.KindScalar { - allScalar = false - } - default: - return nil, arrow.ErrNotImplemented - } - - argTypes[i] = args[i].(compute.ArrayLikeDatum).Type() - } - - _, conv, ok := ext.DecodeFunction(e.FuncRef()) - if !ok { - return nil, arrow.ErrNotImplemented - } - - fname, opts, err := conv(e) - if err != nil { - return nil, err - } - - ectx := compute.GetExecCtx(ctx) - fn, ok := ectx.Registry.GetFunction(fname) - if !ok { - return nil, arrow.ErrInvalid - } - - if fn.Kind() != compute.FuncScalar { - return nil, arrow.ErrInvalid - } - - k, err := fn.DispatchBest(argTypes...) - if err != nil { - return nil, err - } - - kctx := &exec.KernelCtx{Ctx: ctx, Kernel: k} - init := k.GetInitFn() - kinitArgs := exec.KernelInitArgs{Kernel: k, Inputs: argTypes, Options: opts} - if init != nil { - kctx.State, err = init(kctx, kinitArgs) - if err != nil { - return nil, err - } - } - - executor := compute.NewScalarExecutor() - if err := executor.Init(kctx, kinitArgs); err != nil { - return nil, err - } - - batch := compute.ExecBatch{Values: args} - if allScalar { - batch.Len = 1 - } else { - batch.Len = input.Len - } - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - ch := make(chan compute.Datum, ectx.ExecChannelSize) - go func() { - defer close(ch) - if err = executor.Execute(ctx, &batch, ch); err != nil { - cancel() - } - }() - - result := executor.WrapResults(ctx, ch, false) - if err == nil { - debug.Assert(executor.CheckResultType(result) == nil, "invalid result type") - } - - if ctx.Err() == context.Canceled && result != nil { - result.Release() - } - - return result, nil - } - - return nil, arrow.ErrNotImplemented -} diff --git a/go/arrow/compute/exprs/exec_internal_test.go b/go/arrow/compute/exprs/exec_internal_test.go deleted file mode 100644 index 450db139e9357..0000000000000 --- a/go/arrow/compute/exprs/exec_internal_test.go +++ /dev/null @@ -1,114 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package exprs - -import ( - "context" - "strings" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/compute" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -var ( - boringArrowSchema = arrow.NewSchema([]arrow.Field{ - {Name: "bool", Type: arrow.FixedWidthTypes.Boolean, Nullable: true}, - {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - {Name: "i32", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, - {Name: "i32_req", Type: arrow.PrimitiveTypes.Int32}, - {Name: "u32", Type: arrow.PrimitiveTypes.Uint32, Nullable: true}, - {Name: "i64", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - {Name: "f32", Type: arrow.PrimitiveTypes.Float32, Nullable: true}, - {Name: "f32_req", Type: arrow.PrimitiveTypes.Float32}, - {Name: "f64", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - {Name: "date32", Type: arrow.FixedWidthTypes.Date32, Nullable: true}, - {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "bin", Type: arrow.BinaryTypes.Binary, Nullable: true}, - }, nil) -) - -func TestMakeExecBatch(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - const numRows = 3 - var ( - ctx = compute.WithAllocator(context.Background(), mem) - i32, _, _ = array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[1, 2, 3]`)) - f32, _, _ = array.FromJSON(mem, arrow.PrimitiveTypes.Float32, strings.NewReader(`[1.5, 2.25, 3.125]`)) - empty, _, _ = array.RecordFromJSON(mem, boringArrowSchema, strings.NewReader(`[]`)) - ) - defer i32.Release() - defer f32.Release() - - getField := func(n string) arrow.Field { - f, _ := boringArrowSchema.FieldsByName(n) - return f[0] - } - - tests := []struct { - name string - batch arrow.Record - }{ - {"empty", empty}, - {"subset", array.NewRecord(arrow.NewSchema([]arrow.Field{getField("i32"), getField("f32")}, nil), - []arrow.Array{i32, f32}, numRows)}, - {"flipped subset", array.NewRecord(arrow.NewSchema([]arrow.Field{getField("f32"), getField("i32")}, nil), - []arrow.Array{f32, i32}, numRows)}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - defer tt.batch.Release() - batch, err := makeExecBatch(ctx, boringArrowSchema, compute.NewDatumWithoutOwning(tt.batch)) - require.NoError(t, err) - require.Equal(t, tt.batch.NumRows(), batch.Len) - - defer func() { - for _, v := range batch.Values { - v.Release() - } - }() - - for i, field := range boringArrowSchema.Fields() { - typ := batch.Values[i].(compute.ArrayLikeDatum).Type() - assert.Truef(t, arrow.TypeEqual(typ, field.Type), - "expected: %s\ngot: %s", field.Type, typ) - - idxes := tt.batch.Schema().FieldIndices(field.Name) - if batch.Values[i].Kind() == compute.KindScalar { - assert.False(t, batch.Values[i].(*compute.ScalarDatum).Value.IsValid(), - "null placeholder should be injected") - assert.Len(t, idxes, 0, "should only happen when column isn't found") - } else { - col := tt.batch.Column(idxes[0]) - val := batch.Values[i].(*compute.ArrayDatum).MakeArray() - defer val.Release() - - assert.Truef(t, array.Equal(col, val), "expected: %s\ngot: %s", col, val) - } - } - }) - } -} diff --git a/go/arrow/compute/exprs/exec_test.go b/go/arrow/compute/exprs/exec_test.go deleted file mode 100644 index b74f80057a0d7..0000000000000 --- a/go/arrow/compute/exprs/exec_test.go +++ /dev/null @@ -1,461 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package exprs_test - -import ( - "context" - "strings" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/compute" - "github.com/apache/arrow/go/v18/arrow/compute/exprs" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/arrow/scalar" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "github.com/substrait-io/substrait-go/expr" - "github.com/substrait-io/substrait-go/types" -) - -var ( - extSet = exprs.NewDefaultExtensionSet() - _, u32TypeRef, _ = extSet.EncodeTypeVariation(arrow.PrimitiveTypes.Uint32) - - boringSchema = types.NamedStruct{ - Names: []string{ - "bool", "i8", "i32", "i32_req", - "u32", "i64", "f32", "f32_req", - "f64", "date32", "str", "bin"}, - Struct: types.StructType{ - Nullability: types.NullabilityRequired, - Types: []types.Type{ - &types.BooleanType{}, - &types.Int8Type{}, - &types.Int32Type{}, - &types.Int32Type{Nullability: types.NullabilityRequired}, - &types.Int32Type{ - TypeVariationRef: u32TypeRef, - }, - &types.Int64Type{}, - &types.Float32Type{}, - &types.Float32Type{Nullability: types.NullabilityRequired}, - &types.Float64Type{}, - &types.DateType{}, - &types.StringType{}, - &types.BinaryType{}, - }, - }, - } - - boringArrowSchema = arrow.NewSchema([]arrow.Field{ - {Name: "bool", Type: arrow.FixedWidthTypes.Boolean, Nullable: true}, - {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - {Name: "i32", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, - {Name: "u32", Type: arrow.PrimitiveTypes.Uint32, Nullable: true}, - {Name: "i64", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - {Name: "f32", Type: arrow.PrimitiveTypes.Float32, Nullable: true}, - {Name: "f64", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - {Name: "date32", Type: arrow.FixedWidthTypes.Date32, Nullable: true}, - {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "bin", Type: arrow.BinaryTypes.Binary, Nullable: true}, - }, nil) -) - -func TestToArrowSchema(t *testing.T) { - expectedSchema := arrow.NewSchema([]arrow.Field{ - {Name: "bool", Type: arrow.FixedWidthTypes.Boolean, Nullable: true}, - {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - {Name: "i32", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, - {Name: "i32_req", Type: arrow.PrimitiveTypes.Int32}, - {Name: "u32", Type: arrow.PrimitiveTypes.Uint32, Nullable: true}, - {Name: "i64", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - {Name: "f32", Type: arrow.PrimitiveTypes.Float32, Nullable: true}, - {Name: "f32_req", Type: arrow.PrimitiveTypes.Float32}, - {Name: "f64", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - {Name: "date32", Type: arrow.FixedWidthTypes.Date32, Nullable: true}, - {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "bin", Type: arrow.BinaryTypes.Binary, Nullable: true}, - }, nil) - - sc, err := exprs.ToArrowSchema(boringSchema, extSet) - assert.NoError(t, err) - - assert.Truef(t, expectedSchema.Equal(sc), "expected: %s\ngot: %s", expectedSchema, sc) -} - -func assertEqual(t *testing.T, expected, actual any) bool { - switch e := expected.(type) { - case compute.Datum: - return assert.Truef(t, e.Equals(compute.NewDatumWithoutOwning(actual)), - "expected: %s\ngot: %s", e, actual) - case arrow.Array: - switch a := actual.(type) { - case compute.Datum: - if a.Kind() == compute.KindArray { - actual := a.(*compute.ArrayDatum).MakeArray() - defer actual.Release() - return assert.Truef(t, array.Equal(e, actual), "expected: %s\ngot: %s", - e, actual) - } - case arrow.Array: - return assert.Truef(t, array.Equal(e, a), "expected: %s\ngot: %s", - e, actual) - } - t.Errorf("expected arrow Array, got %s", actual) - return false - } - panic("unimplemented comparison") -} - -func TestComparisons(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - var ( - ctx = compute.WithAllocator(context.Background(), mem) - zero = scalar.MakeScalar(int32(0)) - one = scalar.MakeScalar(int32(1)) - two = scalar.MakeScalar(int32(2)) - - str = scalar.MakeScalar("hello") - bin = scalar.MakeScalar([]byte("hello")) - ) - - getArgType := func(dt arrow.DataType) types.Type { - switch dt.ID() { - case arrow.INT32: - return &types.Int32Type{} - case arrow.STRING: - return &types.StringType{} - case arrow.BINARY: - return &types.BinaryType{} - } - panic("wtf") - } - - expect := func(t *testing.T, fn string, arg1, arg2 scalar.Scalar, res bool) { - baseStruct := types.NamedStruct{ - Names: []string{"arg1", "arg2"}, - Struct: types.StructType{ - Types: []types.Type{getArgType(arg1.DataType()), getArgType(arg2.DataType())}, - }, - } - - ex, err := exprs.NewScalarCall(extSet, fn, nil, - expr.MustExpr(expr.NewRootFieldRef(expr.NewStructFieldRef(0), &baseStruct.Struct)), - expr.MustExpr(expr.NewRootFieldRef(expr.NewStructFieldRef(1), &baseStruct.Struct))) - require.NoError(t, err) - - expression := &expr.Extended{ - Extensions: extSet.GetSubstraitRegistry().Set, - ReferredExpr: []expr.ExpressionReference{ - expr.NewExpressionReference([]string{"out"}, ex), - }, - BaseSchema: baseStruct, - } - - input, _ := scalar.NewStructScalarWithNames([]scalar.Scalar{arg1, arg2}, []string{"arg1", "arg2"}) - out, err := exprs.ExecuteScalarSubstrait(ctx, expression, compute.NewDatum(input)) - require.NoError(t, err) - require.Equal(t, compute.KindScalar, out.Kind()) - - result := out.(*compute.ScalarDatum).Value - assert.Equal(t, res, result.(*scalar.Boolean).Value) - } - - expect(t, "equal", one, one, true) - expect(t, "equal", one, two, false) - expect(t, "less", one, two, true) - expect(t, "less", one, zero, false) - expect(t, "greater", one, zero, true) - expect(t, "greater", one, two, false) - - expect(t, "equal", str, bin, true) - expect(t, "equal", bin, str, true) -} - -func TestExecuteFieldRef(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - fromJSON := func(ty arrow.DataType, json string) arrow.Array { - arr, _, err := array.FromJSON(mem, ty, strings.NewReader(json)) - require.NoError(t, err) - return arr - } - - scalarFromJSON := func(ty arrow.DataType, json string) scalar.Scalar { - arr, _, err := array.FromJSON(mem, ty, strings.NewReader(json)) - require.NoError(t, err) - defer arr.Release() - s, err := scalar.GetScalar(arr, 0) - require.NoError(t, err) - return s - } - - tests := []struct { - testName string - ref compute.FieldRef - input compute.Datum - expected compute.Datum - }{ - {"basic ref", compute.FieldRefName("a"), compute.NewDatumWithoutOwning(fromJSON( - arrow.StructOf(arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Float64, Nullable: true}), - `[ - {"a": 6.125}, - {"a": 0.0}, - {"a": -1} - ]`)), compute.NewDatumWithoutOwning(fromJSON( - arrow.PrimitiveTypes.Float64, `[6.125, 0.0, -1]`))}, - {"ref one field", compute.FieldRefName("a"), compute.NewDatumWithoutOwning(fromJSON( - arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Float64, Nullable: true}), - `[ - {"a": 6.125, "b": 7.5}, - {"a": 0.0, "b": 2.125}, - {"a": -1, "b": 4.0} - ]`)), compute.NewDatumWithoutOwning(fromJSON( - arrow.PrimitiveTypes.Float64, `[6.125, 0.0, -1]`))}, - {"second field", compute.FieldRefName("b"), compute.NewDatumWithoutOwning(fromJSON( - arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Float64, Nullable: true}), - `[ - {"a": 6.125, "b": 7.5}, - {"a": 0.0, "b": 2.125}, - {"a": -1, "b": 4.0} - ]`)), compute.NewDatumWithoutOwning(fromJSON( - arrow.PrimitiveTypes.Float64, `[7.5, 2.125, 4.0]`))}, - {"nested field by path", compute.FieldRefPath(compute.FieldPath{0, 0}), compute.NewDatumWithoutOwning(fromJSON( - arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.StructOf( - arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Float64, Nullable: true}), - Nullable: true}), - `[ - {"a": {"b": 6.125}}, - {"a": {"b": 0.0}}, - {"a": {"b": -1}} - ]`)), compute.NewDatumWithoutOwning(fromJSON( - arrow.PrimitiveTypes.Float64, `[6.125, 0.0, -1]`))}, - {"nested field by name", compute.FieldRefList("a", "b"), compute.NewDatumWithoutOwning(fromJSON( - arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.StructOf( - arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Float64, Nullable: true}), - Nullable: true}), - `[ - {"a": {"b": 6.125}}, - {"a": {"b": 0.0}}, - {"a": {"b": -1}} - ]`)), compute.NewDatumWithoutOwning(fromJSON( - arrow.PrimitiveTypes.Float64, `[6.125, 0.0, -1]`))}, - {"nested field with nulls", compute.FieldRefList("a", "b"), compute.NewDatumWithoutOwning(fromJSON( - arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.StructOf( - arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Float64, Nullable: true}), - Nullable: true}), - `[ - {"a": {"b": 6.125}}, - {"a": null}, - {"a": {"b": null}} - ]`)), compute.NewDatumWithoutOwning(fromJSON( - arrow.PrimitiveTypes.Float64, `[6.125, null, null]`))}, - {"nested scalar", compute.FieldRefList("a", "b"), compute.NewDatumWithoutOwning( - scalarFromJSON(arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.StructOf( - arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Float64, Nullable: true}), - Nullable: true}), `[{"a": {"b": 64.0}}]`)), - compute.NewDatum(scalar.NewFloat64Scalar(64.0))}, - {"nested scalar with null", compute.FieldRefList("a", "b"), compute.NewDatumWithoutOwning( - scalarFromJSON(arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.StructOf( - arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Float64, Nullable: true}), - Nullable: true}), `[{"a": {"b": null}}]`)), - compute.NewDatum(scalar.MakeNullScalar(arrow.PrimitiveTypes.Float64))}, - {"nested scalar null", compute.FieldRefList("a", "b"), compute.NewDatumWithoutOwning( - scalarFromJSON(arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.StructOf( - arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Float64, Nullable: true}), - Nullable: true}), `[{"a": null}]`)), - compute.NewDatum(scalar.MakeNullScalar(arrow.PrimitiveTypes.Float64))}, - } - - for _, tt := range tests { - t.Run(tt.testName, func(t *testing.T) { - scoped := memory.NewCheckedAllocatorScope(mem) - defer scoped.CheckSize(t) - - ctx := exprs.WithExtensionIDSet(compute.WithAllocator(context.Background(), mem), extSet) - dt := tt.input.(compute.ArrayLikeDatum).Type().(arrow.NestedType) - schema := arrow.NewSchema(dt.Fields(), nil) - ref, err := exprs.NewFieldRef(tt.ref, schema, extSet) - require.NoError(t, err) - assert.NotNil(t, ref) - - actual, err := exprs.ExecuteScalarExpression(ctx, schema, ref, tt.input) - require.NoError(t, err) - defer actual.Release() - - assert.Truef(t, tt.expected.Equals(actual), "expected: %s\ngot: %s", tt.expected, actual) - }) - } -} - -func TestExecuteScalarFuncCall(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - fromJSON := func(ty arrow.DataType, json string) arrow.Array { - arr, _, err := array.FromJSON(mem, ty, strings.NewReader(json)) - require.NoError(t, err) - return arr - } - - basicSchema := arrow.NewSchema([]arrow.Field{ - {Name: "a", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - {Name: "b", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - }, nil) - - nestedSchema := arrow.NewSchema([]arrow.Field{ - {Name: "a", Type: arrow.StructOf(basicSchema.Fields()...), Nullable: false}, - }, nil) - - bldr := exprs.NewExprBuilder(extSet) - - tests := []struct { - name string - ex exprs.Builder - sc *arrow.Schema - input compute.Datum - expected compute.Datum - }{ - {"add", bldr.MustCallScalar("add", nil, bldr.FieldRef("a"), - bldr.Literal(expr.NewPrimitiveLiteral(float64(3.5), false))), - basicSchema, - compute.NewDatumWithoutOwning(fromJSON(arrow.StructOf(basicSchema.Fields()...), - `[ - {"a": 6.125, "b": 3.375}, - {"a": 0.0, "b": 1}, - {"a": -1, "b": 4.75} - ]`)), compute.NewDatumWithoutOwning(fromJSON(arrow.PrimitiveTypes.Float64, - `[9.625, 3.5, 2.5]`))}, - {"add sub", bldr.MustCallScalar("add", nil, bldr.FieldRef("a"), - bldr.MustCallScalar("subtract", nil, - bldr.WrapLiteral(expr.NewLiteral(float64(3.5), false)), - bldr.FieldRef("b"))), - basicSchema, - compute.NewDatumWithoutOwning(fromJSON(arrow.StructOf(basicSchema.Fields()...), - `[ - {"a": 6.125, "b": 3.375}, - {"a": 0.0, "b": 1}, - {"a": -1, "b": 4.75} - ]`)), compute.NewDatumWithoutOwning(fromJSON(arrow.PrimitiveTypes.Float64, - `[6.25, 2.5, -2.25]`))}, - {"add nested", bldr.MustCallScalar("add", nil, - bldr.FieldRefList("a", "a"), bldr.FieldRefList("a", "b")), nestedSchema, - compute.NewDatumWithoutOwning(fromJSON(arrow.StructOf(nestedSchema.Fields()...), - `[ - {"a": {"a": 6.125, "b": 3.375}}, - {"a": {"a": 0.0, "b": 1}}, - {"a": {"a": -1, "b": 4.75}} - ]`)), compute.NewDatumWithoutOwning(fromJSON(arrow.PrimitiveTypes.Float64, - `[9.5, 1, 3.75]`))}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - scoped := memory.NewCheckedAllocatorScope(mem) - defer scoped.CheckSize(t) - - bldr.SetInputSchema(tt.sc) - ex, err := tt.ex.BuildExpr() - require.NoError(t, err) - - ctx := exprs.WithExtensionIDSet(compute.WithAllocator(context.Background(), mem), extSet) - dt := tt.input.(compute.ArrayLikeDatum).Type().(arrow.NestedType) - schema := arrow.NewSchema(dt.Fields(), nil) - - actual, err := exprs.ExecuteScalarExpression(ctx, schema, ex, tt.input) - require.NoError(t, err) - defer actual.Release() - - assert.Truef(t, tt.expected.Equals(actual), "expected: %s\ngot: %s", tt.expected, actual) - }) - } -} - -func TestGenerateMask(t *testing.T) { - sc, err := boringArrowSchema.AddField(0, arrow.Field{ - Name: "in", Type: arrow.FixedWidthTypes.Boolean, Nullable: true}) - require.NoError(t, err) - - bldr := exprs.NewExprBuilder(extSet) - require.NoError(t, bldr.SetInputSchema(sc)) - - tests := []struct { - name string - json string - filter exprs.Builder - }{ - {"simple", `[ - {"i32": 0, "f32": -0.1, "in": true}, - {"i32": 0, "f32": 0.3, "in": true}, - {"i32": 1, "f32": 0.2, "in": false}, - {"i32": 2, "f32": -0.1, "in": false}, - {"i32": 0, "f32": 0.1, "in": true}, - {"i32": 0, "f32": null, "in": true}, - {"i32": 0, "f32": 1.0, "in": true} - ]`, bldr.MustCallScalar("equal", nil, - bldr.FieldRef("i32"), bldr.Literal(expr.NewPrimitiveLiteral(int32(0), false)))}, - {"complex", `[ - {"f64": 0.3, "f32": 0.1, "in": true}, - {"f64": -0.1, "f32": 0.3, "in": false}, - {"f64": 0.1, "f32": 0.2, "in": true}, - {"f64": 0.0, "f32": -0.1, "in": false}, - {"f64": 1.0, "f32": 0.1, "in": true}, - {"f64": -2.0, "f32": null, "in": null}, - {"f64": 3.0, "f32": 1.0, "in": true} - ]`, bldr.MustCallScalar("greater", nil, - bldr.MustCallScalar("multiply", nil, - bldr.Must(bldr.Cast(bldr.FieldRef("f32"), arrow.PrimitiveTypes.Float64)), - bldr.FieldRef("f64")), - bldr.Literal(expr.NewPrimitiveLiteral(float64(0), false)))}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ctx := exprs.WithExtensionIDSet(compute.WithAllocator(context.Background(), mem), extSet) - - rec, _, err := array.RecordFromJSON(mem, sc, strings.NewReader(tt.json)) - require.NoError(t, err) - defer rec.Release() - - input := compute.NewDatumWithoutOwning(rec) - expectedMask := rec.Column(0) - - mask, err := exprs.ExecuteScalarExpression(ctx, sc, - expr.MustExpr(tt.filter.BuildExpr()), input) - require.NoError(t, err) - defer mask.Release() - - assertEqual(t, expectedMask, mask) - }) - } -} diff --git a/go/arrow/compute/exprs/extension_types.go b/go/arrow/compute/exprs/extension_types.go deleted file mode 100644 index 8177675592fc9..0000000000000 --- a/go/arrow/compute/exprs/extension_types.go +++ /dev/null @@ -1,149 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package exprs - -import ( - "encoding/json" - "fmt" - "reflect" - "strings" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" -) - -type simpleExtensionTypeFactory[P comparable] struct { - arrow.ExtensionBase - - params P - name string - getStorage func(P) arrow.DataType -} - -func (ef *simpleExtensionTypeFactory[P]) String() string { return "extension<" + ef.Serialize() + ">" } -func (ef *simpleExtensionTypeFactory[P]) ExtensionName() string { return ef.name } -func (ef *simpleExtensionTypeFactory[P]) Serialize() string { - s, _ := json.Marshal(ef.params) - return ef.name + string(s) -} -func (ef *simpleExtensionTypeFactory[P]) Deserialize(storage arrow.DataType, data string) (arrow.ExtensionType, error) { - if !strings.HasPrefix(data, ef.name) { - return nil, fmt.Errorf("%w: invalid deserialization of extension type %s", arrow.ErrInvalid, ef.name) - } - - data = strings.TrimPrefix(data, ef.name) - if err := json.Unmarshal([]byte(data), &ef.params); err != nil { - return nil, fmt.Errorf("%w: failed parsing parameters for extension type", err) - } - - if !arrow.TypeEqual(storage, ef.getStorage(ef.params)) { - return nil, fmt.Errorf("%w: invalid storage type for %s: %s (expected: %s)", - arrow.ErrInvalid, ef.name, storage, ef.getStorage(ef.params)) - } - - return &simpleExtensionTypeFactory[P]{ - name: ef.name, - params: ef.params, - getStorage: ef.getStorage, - ExtensionBase: arrow.ExtensionBase{ - Storage: storage, - }, - }, nil -} -func (ef *simpleExtensionTypeFactory[P]) ExtensionEquals(other arrow.ExtensionType) bool { - if ef.name != other.ExtensionName() { - return false - } - - rhs := other.(*simpleExtensionTypeFactory[P]) - return ef.params == rhs.params -} -func (ef *simpleExtensionTypeFactory[P]) ArrayType() reflect.Type { - return reflect.TypeOf(array.ExtensionArrayBase{}) -} - -func (ef *simpleExtensionTypeFactory[P]) CreateType(params P) arrow.DataType { - storage := ef.getStorage(params) - - return &simpleExtensionTypeFactory[P]{ - name: ef.name, - params: params, - getStorage: ef.getStorage, - ExtensionBase: arrow.ExtensionBase{ - Storage: storage, - }, - } -} - -type uuidExtParams struct{} - -var uuidType = simpleExtensionTypeFactory[uuidExtParams]{ - name: "uuid", getStorage: func(uuidExtParams) arrow.DataType { - return &arrow.FixedSizeBinaryType{ByteWidth: 16} - }} - -type fixedCharExtensionParams struct { - Length int32 `json:"length"` -} - -var fixedCharType = simpleExtensionTypeFactory[fixedCharExtensionParams]{ - name: "fixed_char", getStorage: func(p fixedCharExtensionParams) arrow.DataType { - return &arrow.FixedSizeBinaryType{ByteWidth: int(p.Length)} - }, -} - -type varCharExtensionParams struct { - Length int32 `json:"length"` -} - -var varCharType = simpleExtensionTypeFactory[varCharExtensionParams]{ - name: "varchar", getStorage: func(varCharExtensionParams) arrow.DataType { - return arrow.BinaryTypes.String - }, -} - -type intervalYearExtensionParams struct{} - -var intervalYearType = simpleExtensionTypeFactory[intervalYearExtensionParams]{ - name: "interval_year", getStorage: func(intervalYearExtensionParams) arrow.DataType { - return arrow.FixedSizeListOf(2, arrow.PrimitiveTypes.Int32) - }, -} - -type intervalDayExtensionParams struct{} - -var intervalDayType = simpleExtensionTypeFactory[intervalDayExtensionParams]{ - name: "interval_day", getStorage: func(intervalDayExtensionParams) arrow.DataType { - return arrow.FixedSizeListOf(2, arrow.PrimitiveTypes.Int32) - }, -} - -func uuid() arrow.DataType { return uuidType.CreateType(uuidExtParams{}) } -func fixedChar(length int32) arrow.DataType { - return fixedCharType.CreateType(fixedCharExtensionParams{Length: length}) -} -func varChar(length int32) arrow.DataType { - return varCharType.CreateType(varCharExtensionParams{Length: length}) -} -func intervalYear() arrow.DataType { - return intervalYearType.CreateType(intervalYearExtensionParams{}) -} -func intervalDay() arrow.DataType { - return intervalDayType.CreateType(intervalDayExtensionParams{}) -} diff --git a/go/arrow/compute/exprs/field_refs.go b/go/arrow/compute/exprs/field_refs.go deleted file mode 100644 index 0e039d9e26601..0000000000000 --- a/go/arrow/compute/exprs/field_refs.go +++ /dev/null @@ -1,254 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package exprs - -import ( - "fmt" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/compute" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/arrow/scalar" - "github.com/substrait-io/substrait-go/expr" -) - -func getFields(typ arrow.DataType) []arrow.Field { - if nested, ok := typ.(arrow.NestedType); ok { - return nested.Fields() - } - return nil -} - -// GetRefField evaluates the substrait field reference to retrieve the -// referenced field or return an error. -func GetRefField(ref expr.ReferenceSegment, fields []arrow.Field) (*arrow.Field, error) { - if ref == nil { - return nil, compute.ErrEmpty - } - - var ( - out *arrow.Field - ) - - for ref != nil { - if len(fields) == 0 { - return nil, fmt.Errorf("%w: %s", compute.ErrNoChildren, out.Type) - } - - switch f := ref.(type) { - case *expr.StructFieldRef: - if f.Field < 0 || f.Field >= int32(len(fields)) { - return nil, fmt.Errorf("%w: indices=%s", compute.ErrIndexRange, f) - } - - out = &fields[f.Field] - fields = getFields(out.Type) - default: - return nil, arrow.ErrNotImplemented - } - - ref = ref.GetChild() - } - - return out, nil -} - -// GetRefSchema evaluates the provided substrait field reference against -// the schema to retrieve the referenced (potentially nested) field. -func GetRefSchema(ref expr.ReferenceSegment, schema *arrow.Schema) (*arrow.Field, error) { - return GetRefField(ref, schema.Fields()) -} - -// GetScalar returns the evaluated referenced scalar value from the provided -// scalar which must be appropriate to the type of reference. -// -// A StructFieldRef can only reference against a Struct-type scalar, a -// ListElementRef can only reference against a List or LargeList scalar, -// and a MapKeyRef will only reference against a Map scalar. An error is -// returned if following the reference children ends up with an invalid -// nested reference object. -func GetScalar(ref expr.ReferenceSegment, s scalar.Scalar, mem memory.Allocator, ext ExtensionIDSet) (scalar.Scalar, error) { - if ref == nil { - return nil, compute.ErrEmpty - } - - var out scalar.Scalar - for ref != nil { - switch f := ref.(type) { - case *expr.StructFieldRef: - if s.DataType().ID() != arrow.STRUCT { - return nil, fmt.Errorf("%w: attempting to reference field from non-struct scalar %s", - arrow.ErrInvalid, s) - } - - st := s.(*scalar.Struct) - if f.Field < 0 || f.Field >= int32(len(st.Value)) { - return nil, fmt.Errorf("%w: indices=%s", compute.ErrIndexRange, ref) - } - - out = st.Value[f.Field] - case *expr.ListElementRef: - switch v := s.(type) { - case *scalar.List: - sc, err := scalar.GetScalar(v.Value, int(f.Offset)) - if err != nil { - return nil, err - } - out = sc - case *scalar.LargeList: - sc, err := scalar.GetScalar(v.Value, int(f.Offset)) - if err != nil { - return nil, err - } - out = sc - default: - return nil, fmt.Errorf("%w: cannot get ListElementRef from non-list scalar %s", - arrow.ErrInvalid, v) - } - case *expr.MapKeyRef: - v, ok := s.(*scalar.Map) - if !ok { - return nil, arrow.ErrInvalid - } - - dt, _, err := FromSubstraitType(f.MapKey.GetType(), ext) - if err != nil { - return nil, err - } - - if !arrow.TypeEqual(dt, v.Type.(*arrow.MapType).KeyType()) { - return nil, arrow.ErrInvalid - } - - keyvalDatum, err := literalToDatum(mem, f.MapKey, ext) - if err != nil { - return nil, err - } - - var ( - keyval = keyvalDatum.(*compute.ScalarDatum) - m = v.Value.(*array.Struct) - keys = m.Field(0) - valueScalar scalar.Scalar - ) - for i := 0; i < v.Value.Len(); i++ { - kv, err := scalar.GetScalar(keys, i) - if err != nil { - return nil, err - } - if scalar.Equals(kv, keyval.Value) { - valueScalar, err = scalar.GetScalar(m.Field(1), i) - if err != nil { - return nil, err - } - break - } - } - - if valueScalar == nil { - return nil, arrow.ErrNotFound - } - - out = valueScalar - } - s = out - ref = ref.GetChild() - } - - return out, nil -} - -// GetReferencedValue retrieves the referenced (potentially nested) value from -// the provided datum which may be a scalar, array, or record batch. -func GetReferencedValue(mem memory.Allocator, ref expr.ReferenceSegment, value compute.Datum, ext ExtensionIDSet) (compute.Datum, error) { - if ref == nil { - return nil, compute.ErrEmpty - } - - for ref != nil { - // process the rest of the refs for the scalars - // since arrays can go down to a scalar, but you - // won't get an array from a scalar via ref - if v, ok := value.(*compute.ScalarDatum); ok { - out, err := GetScalar(ref, v.Value, mem, ext) - if err != nil { - return nil, err - } - - return &compute.ScalarDatum{Value: out}, nil - } - - switch r := ref.(type) { - case *expr.MapKeyRef: - return nil, arrow.ErrNotImplemented - case *expr.StructFieldRef: - switch v := value.(type) { - case *compute.ArrayDatum: - if v.Type().ID() != arrow.STRUCT { - return nil, fmt.Errorf("%w: struct field ref for non struct type %s", - arrow.ErrInvalid, v.Type()) - } - - if r.Field < 0 || r.Field >= int32(len(v.Value.Children())) { - return nil, fmt.Errorf("%w: indices=%s", compute.ErrIndexRange, ref) - } - - value = &compute.ArrayDatum{Value: v.Value.Children()[r.Field]} - case *compute.RecordDatum: - if r.Field < 0 || r.Field >= int32(v.Value.NumCols()) { - return nil, fmt.Errorf("%w: indices=%s", compute.ErrIndexRange, ref) - } - - value = &compute.ArrayDatum{Value: v.Value.Column(int(r.Field)).Data()} - default: - return nil, arrow.ErrNotImplemented - } - case *expr.ListElementRef: - switch v := value.(type) { - case *compute.ArrayDatum: - switch v.Type().ID() { - case arrow.LIST, arrow.LARGE_LIST, arrow.FIXED_SIZE_LIST: - arr := v.MakeArray() - defer arr.Release() - - sc, err := scalar.GetScalar(arr, int(r.Offset)) - if err != nil { - return nil, err - } - if s, ok := sc.(scalar.Releasable); ok { - defer s.Release() - } - - value = &compute.ScalarDatum{Value: sc} - default: - return nil, fmt.Errorf("%w: cannot reference list element in non-list array type %s", - arrow.ErrInvalid, v.Type()) - } - - default: - return nil, arrow.ErrNotImplemented - } - } - - ref = ref.GetChild() - } - - return value, nil -} diff --git a/go/arrow/compute/exprs/types.go b/go/arrow/compute/exprs/types.go deleted file mode 100644 index 594a55c9041a8..0000000000000 --- a/go/arrow/compute/exprs/types.go +++ /dev/null @@ -1,745 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package exprs - -import ( - "fmt" - "hash/maphash" - "strconv" - "strings" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/compute" - "github.com/substrait-io/substrait-go/expr" - "github.com/substrait-io/substrait-go/extensions" - "github.com/substrait-io/substrait-go/types" -) - -const ( - // URI for official Arrow Substrait Extension Types - ArrowExtTypesUri = "https://github.com/apache/arrow/blob/main/format/substrait/extension_types.yaml" - SubstraitDefaultURIPrefix = extensions.SubstraitDefaultURIPrefix - // URI for official Substrait Arithmetic funcs extensions - SubstraitArithmeticFuncsURI = SubstraitDefaultURIPrefix + "functions_arithmetic.yaml" - // URI for official Substrait Comparison funcs extensions - SubstraitComparisonFuncsURI = SubstraitDefaultURIPrefix + "functions_comparison.yaml" - - TimestampTzTimezone = "UTC" -) - -var hashSeed maphash.Seed - -// the default extension registry that will contain the Arrow extension -// type variations and types. -var DefaultExtensionIDRegistry = NewExtensionIDRegistry() - -func init() { - hashSeed = maphash.MakeSeed() - - types := []struct { - dt arrow.DataType - name string - }{ - {arrow.PrimitiveTypes.Uint8, "u8"}, - {arrow.PrimitiveTypes.Uint16, "u16"}, - {arrow.PrimitiveTypes.Uint32, "u32"}, - {arrow.PrimitiveTypes.Uint64, "u64"}, - {arrow.FixedWidthTypes.Float16, "fp16"}, - {arrow.Null, "null"}, - {arrow.FixedWidthTypes.MonthInterval, "interval_month"}, - {arrow.FixedWidthTypes.DayTimeInterval, "interval_day_milli"}, - {arrow.FixedWidthTypes.MonthDayNanoInterval, "interval_month_day_nano"}, - } - - for _, t := range types { - err := DefaultExtensionIDRegistry.RegisterType(extensions.ID{ - URI: ArrowExtTypesUri, Name: t.name}, t.dt) - if err != nil { - panic(err) - } - } - - for _, fn := range []string{"add", "subtract", "multiply", "divide", "power", "sqrt", "abs"} { - err := DefaultExtensionIDRegistry.AddSubstraitScalarToArrow( - extensions.ID{URI: SubstraitArithmeticFuncsURI, Name: fn}, - decodeOptionlessOverflowableArithmetic(fn)) - if err != nil { - panic(err) - } - } - - for _, fn := range []string{"add", "subtract", "multiply", "divide"} { - err := DefaultExtensionIDRegistry.AddArrowToSubstrait(fn, - encodeOptionlessOverflowableArithmetic(extensions.ID{ - URI: SubstraitArithmeticFuncsURI, Name: fn})) - if err != nil { - panic(err) - } - } - - for _, fn := range []string{"equal", "not_equal", "lt", "lte", "gt", "gte"} { - err := DefaultExtensionIDRegistry.AddSubstraitScalarToArrow( - extensions.ID{URI: SubstraitComparisonFuncsURI, Name: fn}, - simpleMapSubstraitToArrowFunc) - if err != nil { - panic(err) - } - } - - for _, fn := range []string{"equal", "not_equal", "less", "less_equal", "greater", "greater_equal"} { - err := DefaultExtensionIDRegistry.AddArrowToSubstrait(fn, - simpleMapArrowToSubstraitFunc(SubstraitComparisonFuncsURI)) - if err != nil { - panic(err) - } - } -} - -type overflowBehavior string - -const ( - overflowSILENT = "SILENT" - overflowSATURATE = "SATURATE" - overflowERROR = "ERROR" -) - -type enumParser[typ ~string] struct { - values map[typ]struct{} -} - -func (e *enumParser[typ]) parse(v string) (typ, error) { - out := typ(v) - if _, ok := e.values[out]; ok { - return out, nil - } - return "", arrow.ErrNotFound -} - -var overflowParser = enumParser[overflowBehavior]{ - values: map[overflowBehavior]struct{}{ - overflowSILENT: {}, - overflowSATURATE: {}, - overflowERROR: {}, - }, -} - -func parseOption[typ ~string](sf *expr.ScalarFunction, optionName string, parser *enumParser[typ], implemented []typ, def typ) (typ, error) { - opts := sf.GetOption(optionName) - if len(opts) == 0 { - return def, nil - } - - for _, o := range opts { - p, err := parser.parse(o) - if err != nil { - return def, arrow.ErrInvalid - } - for _, i := range implemented { - if i == p { - return p, nil - } - } - } - - return def, arrow.ErrNotImplemented -} - -type substraitToArrow = func(*expr.ScalarFunction) (fname string, opts compute.FunctionOptions, err error) -type arrowToSubstrait = func(fname string) (extensions.ID, []*types.FunctionOption, error) - -var substraitToArrowFuncMap = map[string]string{ - "lt": "less", - "gt": "greater", - "lte": "less_equal", - "gte": "greater_equal", -} - -var arrowToSubstraitFuncMap = map[string]string{ - "less": "lt", - "greater": "gt", - "less_equal": "lte", - "greater_equal": "gte", -} - -func simpleMapSubstraitToArrowFunc(sf *expr.ScalarFunction) (fname string, opts compute.FunctionOptions, err error) { - fname, _, _ = strings.Cut(sf.Name(), ":") - f, ok := substraitToArrowFuncMap[fname] - if ok { - fname = f - } - return -} - -func simpleMapArrowToSubstraitFunc(uri string) arrowToSubstrait { - return func(fname string) (extensions.ID, []*types.FunctionOption, error) { - f, ok := arrowToSubstraitFuncMap[fname] - if ok { - fname = f - } - return extensions.ID{URI: uri, Name: fname}, nil, nil - } -} - -func decodeOptionlessOverflowableArithmetic(n string) substraitToArrow { - return func(sf *expr.ScalarFunction) (fname string, opts compute.FunctionOptions, err error) { - overflow, err := parseOption(sf, "overflow", &overflowParser, []overflowBehavior{overflowSILENT, overflowERROR}, overflowSILENT) - if err != nil { - return n, nil, err - } - - switch overflow { - case overflowSILENT: - return n + "_unchecked", nil, nil - case overflowERROR: - return n, nil, nil - default: - return n, nil, arrow.ErrNotImplemented - } - } -} - -func encodeOptionlessOverflowableArithmetic(id extensions.ID) arrowToSubstrait { - return func(fname string) (extensions.ID, []*types.FunctionOption, error) { - fn, _, ok := strings.Cut(fname, ":") - if ok { - id.Name = fname - fname = fn - } - - opts := make([]*types.FunctionOption, 0, 1) - if strings.HasSuffix(fname, "_unchecked") { - opts = append(opts, &types.FunctionOption{ - Name: "overflow", Preference: []string{"SILENT"}}) - } else { - opts = append(opts, &types.FunctionOption{ - Name: "overflow", Preference: []string{"ERROR"}}) - } - - return id, opts, nil - } -} - -// NewExtensionSetDefault is a convenience function to create a new extension -// set using the Default arrow extension ID registry. -// -// See NewExtensionSet for more info. -func NewExtensionSetDefault(set expr.ExtensionRegistry) ExtensionIDSet { - return &extensionSet{ExtensionRegistry: set, reg: DefaultExtensionIDRegistry} -} - -// NewExtensionSet creates a new extension set given a substrait extension registry, -// and an Arrow <--> Substrait registry for mapping substrait extensions to -// their Arrow equivalents. This extension set can then be used to manage a -// particular set of extensions in use by an expression or plan, so when -// serializing you only need to serialize the extensions that have been -// inserted into the extension set. -func NewExtensionSet(set expr.ExtensionRegistry, reg *ExtensionIDRegistry) ExtensionIDSet { - return &extensionSet{ExtensionRegistry: set, reg: reg} -} - -type extensionSet struct { - expr.ExtensionRegistry - reg *ExtensionIDRegistry -} - -func (e *extensionSet) GetArrowRegistry() *ExtensionIDRegistry { return e.reg } -func (e *extensionSet) GetSubstraitRegistry() expr.ExtensionRegistry { return e.ExtensionRegistry } - -func (e *extensionSet) DecodeTypeArrow(anchor uint32) (extensions.ID, arrow.DataType, bool) { - id, ok := e.Set.DecodeType(anchor) - if !ok { - if id, ok = e.Set.DecodeTypeVariation(anchor); !ok { - return id, nil, false - } - } - - dt, ok := e.reg.GetTypeByID(id) - return id, dt, ok -} - -func (e *extensionSet) DecodeFunction(ref uint32) (extensions.ID, substraitToArrow, bool) { - id, ok := e.Set.DecodeFunc(ref) - if !ok { - return id, nil, false - } - - conv, ok := e.reg.GetSubstraitScalarToArrow(id) - if !ok { - id.Name, _, ok = strings.Cut(id.Name, ":") - if ok { - conv, ok = e.reg.GetSubstraitScalarToArrow(id) - } - } - return id, conv, ok -} - -func (e *extensionSet) EncodeTypeVariation(dt arrow.DataType) (extensions.ID, uint32, bool) { - id, ok := e.reg.GetIDByType(dt) - if !ok { - return extensions.ID{}, 0, false - } - - return id, e.Set.GetTypeVariationAnchor(id), true -} - -func (e *extensionSet) EncodeType(dt arrow.DataType) (extensions.ID, uint32, bool) { - id, ok := e.reg.GetIDByType(dt) - if !ok { - return extensions.ID{}, 0, false - } - - return id, e.Set.GetTypeAnchor(id), true -} - -func (e *extensionSet) EncodeFunction(id extensions.ID) uint32 { - return e.Set.GetFuncAnchor(id) -} - -// ExtensionIDRegistry manages a set of mappings between Arrow types -// and functions and their substrait equivalents. -type ExtensionIDRegistry struct { - typeList []arrow.DataType - ids []extensions.ID - - substraitToIdx map[extensions.ID]int - arrowToIdx map[uint64]int - - substraitToArrowFn map[extensions.ID]substraitToArrow - arrowToSubstrait map[string]arrowToSubstrait -} - -// NewExtensionIDRegistry initializes a new registry for use. -func NewExtensionIDRegistry() *ExtensionIDRegistry { - return &ExtensionIDRegistry{ - typeList: make([]arrow.DataType, 0), - ids: make([]extensions.ID, 0), - substraitToIdx: make(map[extensions.ID]int), - arrowToIdx: make(map[uint64]int), - substraitToArrowFn: make(map[extensions.ID]substraitToArrow), - arrowToSubstrait: make(map[string]arrowToSubstrait), - } -} - -// RegisterType creates a mapping between the given extension ID and the -// provided Arrow data type. If this extension ID or arrow type are already -// registered, an arrow.ErrInvalid error will be returned. -func (e *ExtensionIDRegistry) RegisterType(id extensions.ID, dt arrow.DataType) error { - if _, ok := e.substraitToIdx[id]; ok { - return fmt.Errorf("%w: type id already registered", arrow.ErrInvalid) - } - - dthash := arrow.HashType(hashSeed, dt) - if _, ok := e.arrowToIdx[dthash]; ok { - return fmt.Errorf("%w: type already registered", arrow.ErrInvalid) - } - - idx := len(e.ids) - e.typeList = append(e.typeList, dt) - e.ids = append(e.ids, id) - e.substraitToIdx[id] = idx - e.arrowToIdx[dthash] = idx - return nil -} - -// AddSubstraitScalarToArrow creates a mapping between a given extension ID -// and a function which should return the corresponding Arrow compute function -// name along with any relevant FunctionOptions based on the ScalarFunction -// instance passed to it. -// -// Any relevant options should be parsed from the ScalarFunction's options -// and used to ensure the correct arrow compute function is used and necessary -// options are passed. -func (e *ExtensionIDRegistry) AddSubstraitScalarToArrow(id extensions.ID, toArrow substraitToArrow) error { - if _, ok := e.substraitToArrowFn[id]; ok { - return fmt.Errorf("%w: extension id already registered as function", arrow.ErrInvalid) - } - - e.substraitToArrowFn[id] = toArrow - return nil -} - -// AddArrowToSubstrait creates a mapping between the provided arrow compute function -// and a function which should provide the correct substrait ExtensionID and function -// options from that name. -func (e *ExtensionIDRegistry) AddArrowToSubstrait(name string, fn arrowToSubstrait) error { - if _, ok := e.arrowToSubstrait[name]; ok { - return fmt.Errorf("%w: function name '%s' already registered for conversion to substrait", arrow.ErrInvalid, name) - } - - e.arrowToSubstrait[name] = fn - return nil -} - -// GetTypeByID returns the mapped arrow data type from the provided substrait -// extension id. If no mapping exists for this substrait extension id, -// the second return value will be false. -func (e *ExtensionIDRegistry) GetTypeByID(id extensions.ID) (arrow.DataType, bool) { - idx, ok := e.substraitToIdx[id] - if !ok { - return nil, false - } - - return e.typeList[idx], true -} - -// GetIDByType is the inverse of GetTypeByID, returning the mapped substrait -// extension ID corresponding to the provided arrow data type. The second -// return is false if there is no mapping found. -func (e *ExtensionIDRegistry) GetIDByType(typ arrow.DataType) (extensions.ID, bool) { - dthash := arrow.HashType(hashSeed, typ) - idx, ok := e.arrowToIdx[dthash] - if !ok { - return extensions.ID{}, false - } - - return e.ids[idx], true -} - -// GetSubstraitScalarToArrow returns the mapped conversion function for a -// given substrait extension ID to convert a substrait ScalarFunction to -// the corresponding Arrow compute function call. False is returned as -// the second value if there is no mapping available. -func (e *ExtensionIDRegistry) GetSubstraitScalarToArrow(id extensions.ID) (substraitToArrow, bool) { - conv, ok := e.substraitToArrowFn[id] - if !ok { - return nil, ok - } - - return conv, true -} - -// GetArrowToSubstrait returns the mapped function to convert an arrow compute -// function to the corresponding Substrait ScalarFunction extension ID and options. -// False is returned as the second value if there is no mapping found. -func (e *ExtensionIDRegistry) GetArrowToSubstrait(name string) (conv arrowToSubstrait, ok bool) { - conv, ok = e.arrowToSubstrait[name] - if !ok { - fn, _, found := strings.Cut(name, ":") - if found { - conv, ok = e.arrowToSubstrait[fn] - } - } - return -} - -// ExtensionIDSet is an interface for managing the mapping between arrow -// and substrait types and function extensions. -type ExtensionIDSet interface { - GetArrowRegistry() *ExtensionIDRegistry - GetSubstraitRegistry() expr.ExtensionRegistry - - DecodeTypeArrow(anchor uint32) (extensions.ID, arrow.DataType, bool) - DecodeFunction(ref uint32) (extensions.ID, substraitToArrow, bool) - - EncodeType(dt arrow.DataType) (extensions.ID, uint32, bool) - EncodeTypeVariation(dt arrow.DataType) (extensions.ID, uint32, bool) -} - -// IsNullable is a convenience method to return whether or not -// a substrait type has Nullability set to NullabilityRequired or not. -func IsNullable(t types.Type) bool { - return t.GetNullability() != types.NullabilityRequired -} - -// FieldsFromSubstrait produces a list of arrow fields from a list of -// substrait types (such as the fields of a StructType) using nextName -// to determine the names for the fields. -func FieldsFromSubstrait(typeList []types.Type, nextName func() string, ext ExtensionIDSet) (out []arrow.Field, err error) { - out = make([]arrow.Field, len(typeList)) - for i, t := range typeList { - out[i].Name = nextName() - out[i].Nullable = IsNullable(t) - - if st, ok := t.(*types.StructType); ok { - fields, err := FieldsFromSubstrait(st.Types, nextName, ext) - if err != nil { - return nil, err - } - out[i].Type = arrow.StructOf(fields...) - } else { - out[i].Type, _, err = FromSubstraitType(t, ext) - if err != nil { - return nil, err - } - } - } - return -} - -// ToSubstraitType converts an arrow data type to a Substrait Type. Since -// arrow types don't have a nullable flag (it is in the arrow.Field) but -// Substrait types do, the nullability must be passed in here. -func ToSubstraitType(dt arrow.DataType, nullable bool, ext ExtensionIDSet) (types.Type, error) { - var nullability types.Nullability - if nullable { - nullability = types.NullabilityNullable - } else { - nullability = types.NullabilityRequired - } - - switch dt.ID() { - case arrow.BOOL: - return &types.BooleanType{Nullability: nullability}, nil - case arrow.INT8: - return &types.Int8Type{Nullability: nullability}, nil - case arrow.INT16: - return &types.Int16Type{Nullability: nullability}, nil - case arrow.INT32: - return &types.Int32Type{Nullability: nullability}, nil - case arrow.INT64: - return &types.Int64Type{Nullability: nullability}, nil - case arrow.UINT8: - _, anchor, ok := ext.EncodeTypeVariation(dt) - if !ok { - return nil, arrow.ErrNotFound - } - return &types.Int8Type{ - Nullability: nullability, - TypeVariationRef: anchor, - }, nil - case arrow.UINT16: - _, anchor, ok := ext.EncodeTypeVariation(dt) - if !ok { - return nil, arrow.ErrNotFound - } - return &types.Int16Type{ - Nullability: nullability, - TypeVariationRef: anchor, - }, nil - case arrow.UINT32: - _, anchor, ok := ext.EncodeTypeVariation(dt) - if !ok { - return nil, arrow.ErrNotFound - } - return &types.Int32Type{ - Nullability: nullability, - TypeVariationRef: anchor, - }, nil - case arrow.UINT64: - _, anchor, ok := ext.EncodeTypeVariation(dt) - if !ok { - return nil, arrow.ErrNotFound - } - return &types.Int64Type{ - Nullability: nullability, - TypeVariationRef: anchor, - }, nil - case arrow.FLOAT16: - _, anchor, ok := ext.EncodeTypeVariation(dt) - if !ok { - return nil, arrow.ErrNotFound - } - return &types.Int16Type{ - Nullability: nullability, - TypeVariationRef: anchor, - }, nil - case arrow.FLOAT32: - return &types.Float32Type{Nullability: nullability}, nil - case arrow.FLOAT64: - return &types.Float64Type{Nullability: nullability}, nil - case arrow.STRING: - return &types.StringType{Nullability: nullability}, nil - case arrow.BINARY: - return &types.BinaryType{Nullability: nullability}, nil - case arrow.DATE32: - return &types.DateType{Nullability: nullability}, nil - case arrow.EXTENSION: - dt := dt.(arrow.ExtensionType) - switch dt.ExtensionName() { - case "uuid": - return &types.UUIDType{Nullability: nullability}, nil - case "fixed_char": - return &types.FixedCharType{ - Nullability: nullability, - Length: int32(dt.StorageType().(*arrow.FixedSizeBinaryType).ByteWidth), - }, nil - case "varchar": - return &types.VarCharType{Nullability: nullability, Length: -1}, nil - case "interval_year": - return &types.IntervalYearType{Nullability: nullability}, nil - case "interval_day": - return &types.IntervalDayType{Nullability: nullability}, nil - default: - _, anchor, ok := ext.EncodeType(dt) - if !ok { - return nil, arrow.ErrNotFound - } - return &types.UserDefinedType{ - Nullability: nullability, - TypeReference: anchor, - }, nil - } - case arrow.FIXED_SIZE_BINARY: - return &types.FixedBinaryType{Nullability: nullability, - Length: int32(dt.(*arrow.FixedSizeBinaryType).ByteWidth)}, nil - case arrow.DECIMAL128, arrow.DECIMAL256: - dt := dt.(arrow.DecimalType) - return &types.DecimalType{Nullability: nullability, - Precision: dt.GetPrecision(), Scale: dt.GetScale()}, nil - case arrow.STRUCT: - dt := dt.(*arrow.StructType) - fields := make([]types.Type, dt.NumFields()) - var err error - for i, f := range dt.Fields() { - fields[i], err = ToSubstraitType(f.Type, f.Nullable, ext) - if err != nil { - return nil, err - } - } - - return &types.StructType{ - Nullability: nullability, - Types: fields, - }, nil - case arrow.LIST, arrow.FIXED_SIZE_LIST, arrow.LARGE_LIST: - dt := dt.(arrow.NestedType) - elemType, err := ToSubstraitType(dt.Fields()[0].Type, dt.Fields()[0].Nullable, ext) - if err != nil { - return nil, err - } - return &types.ListType{ - Nullability: nullability, - Type: elemType, - }, nil - case arrow.MAP: - dt := dt.(*arrow.MapType) - keyType, err := ToSubstraitType(dt.KeyType(), false, ext) - if err != nil { - return nil, err - } - valueType, err := ToSubstraitType(dt.ItemType(), dt.ItemField().Nullable, ext) - if err != nil { - return nil, err - } - - return &types.MapType{ - Nullability: nullability, - Key: keyType, - Value: valueType, - }, nil - } - - return nil, arrow.ErrNotImplemented -} - -// FromSubstraitType returns the appropriate Arrow data type for the given -// substrait type, using the extension set if necessary. -// Since Substrait types contain their nullability also, the nullability -// returned along with the data type. -func FromSubstraitType(t types.Type, ext ExtensionIDSet) (arrow.DataType, bool, error) { - nullable := IsNullable(t) - - if t.GetTypeVariationReference() > 0 { - _, dt, ok := ext.DecodeTypeArrow(t.GetTypeVariationReference()) - if ok { - return dt, nullable, nil - } - } - - switch t := t.(type) { - case *types.BooleanType: - return arrow.FixedWidthTypes.Boolean, nullable, nil - case *types.Int8Type: - return arrow.PrimitiveTypes.Int8, nullable, nil - case *types.Int16Type: - return arrow.PrimitiveTypes.Int16, nullable, nil - case *types.Int32Type: - return arrow.PrimitiveTypes.Int32, nullable, nil - case *types.Int64Type: - return arrow.PrimitiveTypes.Int64, nullable, nil - case *types.Float32Type: - return arrow.PrimitiveTypes.Float32, nullable, nil - case *types.Float64Type: - return arrow.PrimitiveTypes.Float64, nullable, nil - case *types.StringType: - return arrow.BinaryTypes.String, nullable, nil - case *types.BinaryType: - return arrow.BinaryTypes.Binary, nullable, nil - case *types.TimestampType: - return &arrow.TimestampType{Unit: arrow.Microsecond}, nullable, nil - case *types.TimestampTzType: - return &arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: TimestampTzTimezone}, - nullable, nil - case *types.DateType: - return arrow.FixedWidthTypes.Date32, nullable, nil - case *types.TimeType: - return &arrow.Time64Type{Unit: arrow.Microsecond}, nullable, nil - case *types.IntervalYearType: - return intervalYear(), nullable, nil - case *types.IntervalDayType: - return intervalDay(), nullable, nil - case *types.UUIDType: - return uuid(), nullable, nil - case *types.FixedCharType: - return fixedChar(t.Length), nullable, nil - case *types.VarCharType: - return varChar(t.Length), nullable, nil - case *types.FixedBinaryType: - return &arrow.FixedSizeBinaryType{ByteWidth: int(t.Length)}, nullable, nil - case *types.DecimalType: - return &arrow.Decimal128Type{ - Precision: t.Precision, - Scale: t.Scale, - }, nullable, nil - case *types.StructType: - i := 0 - fields, err := FieldsFromSubstrait(t.Types, func() string { - i++ - return strconv.Itoa(i) - }, ext) - if err != nil { - return nil, false, err - } - - return arrow.StructOf(fields...), nullable, nil - case *types.ListType: - elem, elemNullable, err := FromSubstraitType(t.Type, ext) - if err != nil { - return nil, false, err - } - return arrow.ListOfField(arrow.Field{Name: "item", Type: elem, Nullable: elemNullable}), - nullable, nil - case *types.MapType: - key, keyNullable, err := FromSubstraitType(t.Key, ext) - if err != nil { - return nil, false, err - } - if keyNullable { - return nil, false, fmt.Errorf("%w: encountered nullable key field when converting to arrow.Map", - arrow.ErrInvalid) - } - - value, valueNullable, err := FromSubstraitType(t.Value, ext) - if err != nil { - return nil, false, err - } - ret := arrow.MapOf(key, value) - ret.SetItemNullable(valueNullable) - return ret, nullable, nil - case *types.UserDefinedType: - anchor := t.TypeReference - _, dt, ok := ext.DecodeTypeArrow(anchor) - if !ok { - return nil, false, arrow.ErrNotImplemented - } - return dt, nullable, nil - } - - return nil, false, arrow.ErrNotImplemented -} diff --git a/go/arrow/compute/fieldref.go b/go/arrow/compute/fieldref.go deleted file mode 100644 index d69c7d91044c7..0000000000000 --- a/go/arrow/compute/fieldref.go +++ /dev/null @@ -1,587 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package compute - -import ( - "errors" - "fmt" - "hash/maphash" - "reflect" - "strconv" - "strings" - "unicode" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" -) - -var ( - ErrEmpty = errors.New("cannot traverse empty field path") - ErrNoChildren = errors.New("trying to get child of type with no children") - ErrIndexRange = errors.New("index out of range") - ErrMultipleMatches = errors.New("multiple matches") - ErrNoMatch = errors.New("no match") - ErrInvalid = errors.New("field ref invalid") -) - -func getFields(typ arrow.DataType) []arrow.Field { - if nested, ok := typ.(arrow.NestedType); ok { - return nested.Fields() - } - return nil -} - -type listvals interface { - ListValues() arrow.Array -} - -func getChildren(arr arrow.Array) (ret []arrow.Array) { - switch arr := arr.(type) { - case *array.Struct: - ret = make([]arrow.Array, arr.NumField()) - for i := 0; i < arr.NumField(); i++ { - ret[i] = arr.Field(i) - } - case listvals: - ret = []arrow.Array{arr.ListValues()} - } - return -} - -// FieldPath represents a path to a nested field using indices of child fields. -// For example, given the indices {5, 9, 3} the field could be retrieved with: -// schema.Field(5).Type().(*arrow.StructType).Field(9).Type().(*arrow.StructType).Field(3) -// -// Attempting to retrieve a child field using a FieldPath which is not valid for a given -// schema will get an error such as an out of range index, or an empty path. -// -// FieldPaths provide for drilling down to potentially nested children for convenience -// of accepting a slice of fields, a schema or a datatype (which should contain child fields). -// -// A fieldpath can also be used to retrieve a child arrow.Array or column from a record batch. -type FieldPath []int - -func (f FieldPath) String() string { - if len(f) == 0 { - return "FieldPath(empty)" - } - - var b strings.Builder - b.WriteString("FieldPath(") - for _, i := range f { - fmt.Fprint(&b, i) - b.WriteByte(' ') - } - ret := b.String() - return ret[:len(ret)-1] + ")" -} - -// Get retrieves the corresponding nested child field by drilling through the schema's -// fields as per the field path. -func (f FieldPath) Get(s *arrow.Schema) (*arrow.Field, error) { - return f.GetFieldFromSlice(s.Fields()) -} - -// GetFieldFromSlice treats the slice as the top layer of fields, so the first value -// in the field path will index into the slice, and then drill down from there. -func (f FieldPath) GetFieldFromSlice(fields []arrow.Field) (*arrow.Field, error) { - if len(f) == 0 { - return nil, ErrEmpty - } - - var ( - depth = 0 - out *arrow.Field - ) - for _, idx := range f { - if len(fields) == 0 { - return nil, fmt.Errorf("%w: %s", ErrNoChildren, out.Type) - } - - if idx < 0 || idx >= len(fields) { - return nil, fmt.Errorf("%w: indices=%s", ErrIndexRange, f[:depth+1]) - } - - out = &fields[idx] - fields = getFields(out.Type) - depth++ - } - - return out, nil -} - -func (f FieldPath) getArray(arrs []arrow.Array) (arrow.Array, error) { - if len(f) == 0 { - return nil, ErrEmpty - } - - var ( - depth = 0 - out arrow.Array - ) - for _, idx := range f { - if len(arrs) == 0 { - return nil, fmt.Errorf("%w: %s", ErrNoChildren, out.DataType()) - } - - if idx < 0 || idx >= len(arrs) { - return nil, fmt.Errorf("%w. indices=%s", ErrIndexRange, f[:depth+1]) - } - - out = arrs[idx] - arrs = getChildren(out) - depth++ - } - return out, nil -} - -// GetFieldFromType returns the nested field from a datatype by drilling into it's -// child fields. -func (f FieldPath) GetFieldFromType(typ arrow.DataType) (*arrow.Field, error) { - return f.GetFieldFromSlice(getFields(typ)) -} - -// GetField is equivalent to GetFieldFromType(field.Type) -func (f FieldPath) GetField(field arrow.Field) (*arrow.Field, error) { - return f.GetFieldFromType(field.Type) -} - -// GetColumn will return the correct child array by traversing the fieldpath -// going to the nested arrays of the columns in the record batch. -func (f FieldPath) GetColumn(batch arrow.Record) (arrow.Array, error) { - return f.getArray(batch.Columns()) -} - -func (f FieldPath) findAll(fields []arrow.Field) []FieldPath { - _, err := f.GetFieldFromSlice(fields) - if err == nil { - return []FieldPath{f} - } - return nil -} - -// a nameref represents a FieldRef by name of the field -type nameRef string - -func (n nameRef) String() string { - return "Name(" + string(n) + ")" -} - -func (ref nameRef) findAll(fields []arrow.Field) []FieldPath { - out := []FieldPath{} - for i, f := range fields { - if f.Name == string(ref) { - out = append(out, FieldPath{i}) - } - } - return out -} - -func (ref nameRef) hash(h *maphash.Hash) { h.WriteString(string(ref)) } - -type matches struct { - prefixes []FieldPath - refs []*arrow.Field -} - -func (m *matches) add(prefix, suffix FieldPath, fields []arrow.Field) { - f, err := suffix.GetFieldFromSlice(fields) - if err != nil { - panic(err) - } - - m.refs = append(m.refs, f) - m.prefixes = append(m.prefixes, append(prefix, suffix...)) -} - -// refList represents a list of references to use to determine which nested -// field is being referenced. allowing combinations of field indices and names -type refList []FieldRef - -func (r refList) String() string { - var b strings.Builder - b.WriteString("Nested(") - for _, f := range r { - fmt.Fprint(&b, f) - b.WriteByte(' ') - } - ret := b.String() - return ret[:len(ret)-1] + ")" -} - -func (ref refList) hash(h *maphash.Hash) { - for _, r := range ref { - r.hash(h) - } -} - -func (ref refList) findAll(fields []arrow.Field) []FieldPath { - if len(ref) == 0 { - return nil - } - - m := matches{} - for _, list := range ref[0].FindAll(fields) { - m.add(FieldPath{}, list, fields) - } - - for _, r := range ref[1:] { - next := matches{} - for i, f := range m.refs { - for _, match := range r.FindAllField(*f) { - next.add(m.prefixes[i], match, getFields(f.Type)) - } - } - m = next - } - return m.prefixes -} - -type refImpl interface { - fmt.Stringer - findAll(fields []arrow.Field) []FieldPath - hash(h *maphash.Hash) -} - -// FieldRef is a descriptor of a (potentially nested) field within a schema. -// -// Unlike FieldPath (which is exclusively indices of child fields), FieldRef -// may reference a field by name. It can be constructed from either -// a field index, field name, or field path. -// -// Nested fields can be referenced as well, given the schema: -// -// arrow.NewSchema([]arrow.Field{ -// {Name: "a", Type: arrow.StructOf(arrow.Field{Name: "n", Type: arrow.Null})}, -// {Name: "b", Type: arrow.PrimitiveTypes.Int32}, -// }) -// -// the following all indicate the nested field named "n": -// -// FieldRefPath(FieldPath{0, 0}) -// FieldRefList("a", 0) -// FieldRefList("a", "n") -// FieldRefList(0, "n") -// NewFieldRefFromDotPath(".a[0]") -// -// FieldPaths matching a FieldRef are retrieved with the FindAll* functions -// Multiple matches are possible because field names may be duplicated within -// a schema. For example: -// -// aIsAmbiguous := arrow.NewSchema([]arrow.Field{ -// {Name: "a", Type: arrow.PrimitiveTypes.Int32}, -// {Name: "a", Type: arrow.PrimitiveTypes.Float32}, -// }) -// matches := FieldRefName("a").FindAll(aIsAmbiguous) -// assert.Len(matches, 2) -// assert.True(matches[0].Get(aIsAmbiguous).Equals(aIsAmbiguous.Field(0)) -// assert.True(matches[1].Get(aIsAmbiguous).Equals(aIsAmbiguous.Field(1)) -type FieldRef struct { - impl refImpl -} - -// FieldRefPath constructs a FieldRef from a given FieldPath -func FieldRefPath(p FieldPath) FieldRef { - return FieldRef{impl: p} -} - -// FieldRefIndex is a convenience function to construct a FieldPath reference -// of a single index -func FieldRefIndex(i int) FieldRef { - return FieldRef{impl: FieldPath{i}} -} - -// FieldRefName constructs a FieldRef by name -func FieldRefName(n string) FieldRef { - return FieldRef{impl: nameRef(n)} -} - -// FieldRefList takes an arbitrary number of arguments which can be either -// strings or ints. This will panic if anything other than a string or int -// is passed in. -func FieldRefList(elems ...interface{}) FieldRef { - list := make(refList, len(elems)) - for i, e := range elems { - switch e := e.(type) { - case string: - list[i] = FieldRefName(e) - case int: - list[i] = FieldRefIndex(e) - } - } - return FieldRef{impl: list} -} - -// NewFieldRefFromDotPath parses a dot path into a field ref. -// -// dot_path = '.' name -// -// | '[' digit+ ']' -// | dot_path+ -// -// Examples -// -// ".alpha" => FieldRefName("alpha") -// "[2]" => FieldRefIndex(2) -// ".beta[3]" => FieldRefList("beta", 3) -// "[5].gamma.delta[7]" => FieldRefList(5, "gamma", "delta", 7) -// ".hello world" => FieldRefName("hello world") -// `.\[y\]\\tho\.\` => FieldRef(`[y]\tho.\`) -// -// Note: when parsing a name, a '\' preceding any other character will be -// dropped from the resulting name. therefore if a name must contain the characters -// '.', '\', '[' or ']' then they must be escaped with a preceding '\'. -func NewFieldRefFromDotPath(dotpath string) (out FieldRef, err error) { - if len(dotpath) == 0 { - return out, fmt.Errorf("%w dotpath was empty", ErrInvalid) - } - - parseName := func() string { - var name string - for { - idx := strings.IndexAny(dotpath, `\[.`) - if idx == -1 { - name += dotpath - dotpath = "" - break - } - - if dotpath[idx] != '\\' { - // subscript for a new field ref - name += dotpath[:idx] - dotpath = dotpath[idx:] - break - } - - if len(dotpath) == idx+1 { - // dotpath ends with a backslash; consume it all - name += dotpath - dotpath = "" - break - } - - // append all characters before backslash, then the character which follows it - name += dotpath[:idx] + string(dotpath[idx+1]) - dotpath = dotpath[idx+2:] - } - return name - } - - children := make([]FieldRef, 0) - - for len(dotpath) > 0 { - subscript := dotpath[0] - dotpath = dotpath[1:] - switch subscript { - case '.': - // next element is a name - children = append(children, FieldRef{nameRef(parseName())}) - case '[': - subend := strings.IndexFunc(dotpath, func(r rune) bool { return !unicode.IsDigit(r) }) - if subend == -1 || dotpath[subend] != ']' { - return out, fmt.Errorf("%w: dot path '%s' contained an unterminated index", ErrInvalid, dotpath) - } - idx, _ := strconv.Atoi(dotpath[:subend]) - children = append(children, FieldRef{FieldPath{idx}}) - dotpath = dotpath[subend+1:] - default: - return out, fmt.Errorf("%w: dot path must begin with '[' or '.' got '%s'", ErrInvalid, dotpath) - } - } - - out.flatten(children) - return -} - -func (f FieldRef) hash(h *maphash.Hash) { f.impl.hash(h) } - -// Hash produces a hash of this field reference and takes in a seed so that -// it can maintain consistency across multiple places / processes /etc. -func (f FieldRef) Hash(seed maphash.Seed) uint64 { - h := maphash.Hash{} - h.SetSeed(seed) - f.hash(&h) - return h.Sum64() -} - -// IsName returns true if this fieldref is a name reference -func (f *FieldRef) IsName() bool { - _, ok := f.impl.(nameRef) - return ok -} - -// IsFieldPath returns true if this FieldRef uses a fieldpath -func (f *FieldRef) IsFieldPath() bool { - _, ok := f.impl.(FieldPath) - return ok -} - -// IsNested returns true if this FieldRef expects to represent -// a nested field. -func (f *FieldRef) IsNested() bool { - switch impl := f.impl.(type) { - case nameRef: - return false - case FieldPath: - return len(impl) > 1 - default: - return true - } -} - -// Name returns the name of the field this references if it is -// a Name reference, otherwise the empty string -func (f *FieldRef) Name() string { - n, _ := f.impl.(nameRef) - return string(n) -} - -// FieldPath returns the fieldpath that this FieldRef uses, otherwise -// an empty FieldPath if it's not a FieldPath reference -func (f *FieldRef) FieldPath() FieldPath { - p, _ := f.impl.(FieldPath) - return p -} - -func (f *FieldRef) Equals(other FieldRef) bool { - return reflect.DeepEqual(f.impl, other.impl) -} - -func (f *FieldRef) flatten(children []FieldRef) { - out := make([]FieldRef, 0, len(children)) - - var populate func(refImpl) - populate = func(refs refImpl) { - switch r := refs.(type) { - case nameRef: - out = append(out, FieldRef{r}) - case FieldPath: - out = append(out, FieldRef{r}) - case refList: - for _, c := range r { - populate(c.impl) - } - } - } - - populate(refList(children)) - - if len(out) == 1 { - f.impl = out[0].impl - } else { - f.impl = refList(out) - } -} - -// FindAll returns all the fieldpaths which this FieldRef matches in the given -// slice of fields. -func (f FieldRef) FindAll(fields []arrow.Field) []FieldPath { - return f.impl.findAll(fields) -} - -// FindAllField returns all the fieldpaths that this FieldRef matches against -// the type of the given field. -func (f FieldRef) FindAllField(field arrow.Field) []FieldPath { - return f.impl.findAll(getFields(field.Type)) -} - -// FindOneOrNone is a convenience helper that will either return 1 fieldpath, -// or an empty fieldpath, and will return an error if there are multiple matches. -func (f FieldRef) FindOneOrNone(schema *arrow.Schema) (FieldPath, error) { - matches := f.FindAll(schema.Fields()) - if len(matches) > 1 { - return nil, fmt.Errorf("%w for %s in %s", ErrMultipleMatches, f, schema) - } - if len(matches) == 0 { - return nil, nil - } - return matches[0], nil -} - -// FindOneOrNoneRecord is like FindOneOrNone but for the schema of a record, -// returning an error only if there are multiple matches. -func (f FieldRef) FindOneOrNoneRecord(root arrow.Record) (FieldPath, error) { - return f.FindOneOrNone(root.Schema()) -} - -// FindOne returns an error if the field isn't matched or if there are multiple matches -// otherwise it returns the path to the single valid match. -func (f FieldRef) FindOne(schema *arrow.Schema) (FieldPath, error) { - matches := f.FindAll(schema.Fields()) - if len(matches) == 0 { - return nil, fmt.Errorf("%w for %s in %s", ErrNoMatch, f, schema) - } - if len(matches) > 1 { - return nil, fmt.Errorf("%w for %s in %s", ErrMultipleMatches, f, schema) - } - return matches[0], nil -} - -// GetAllColumns gets all the matching column arrays from the given record that -// this FieldRef references. -func (f FieldRef) GetAllColumns(root arrow.Record) ([]arrow.Array, error) { - out := make([]arrow.Array, 0) - for _, m := range f.FindAll(root.Schema().Fields()) { - n, err := m.GetColumn(root) - if err != nil { - return nil, err - } - out = append(out, n) - } - return out, nil -} - -// GetOneField will return a pointer to a field or an error if it is not found -// or if there are multiple matches. -func (f FieldRef) GetOneField(schema *arrow.Schema) (*arrow.Field, error) { - match, err := f.FindOne(schema) - if err != nil { - return nil, err - } - - return match.GetFieldFromSlice(schema.Fields()) -} - -// GetOneOrNone will return a field or a nil if the field is found or not, and -// only errors if there are multiple matches. -func (f FieldRef) GetOneOrNone(schema *arrow.Schema) (*arrow.Field, error) { - match, err := f.FindOneOrNone(schema) - if err != nil { - return nil, err - } - if len(match) == 0 { - return nil, nil - } - return match.GetFieldFromSlice(schema.Fields()) -} - -// GetOneColumnOrNone returns either a nil or the referenced array if it can be -// found, erroring only if there is an ambiguous multiple matches. -func (f FieldRef) GetOneColumnOrNone(root arrow.Record) (arrow.Array, error) { - match, err := f.FindOneOrNoneRecord(root) - if err != nil { - return nil, err - } - if len(match) == 0 { - return nil, nil - } - return match.GetColumn(root) -} - -func (f FieldRef) String() string { - return "FieldRef." + f.impl.String() -} diff --git a/go/arrow/compute/fieldref_hash.go b/go/arrow/compute/fieldref_hash.go deleted file mode 100644 index 21ef88f1ecb4f..0000000000000 --- a/go/arrow/compute/fieldref_hash.go +++ /dev/null @@ -1,39 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.20 || tinygo - -package compute - -import ( - "hash/maphash" - "math/bits" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" -) - -func (f FieldPath) hash(h *maphash.Hash) { - raw := unsafe.Pointer(unsafe.SliceData(f)) - var byteLen int - if bits.UintSize == 32 { - byteLen = arrow.Int32Traits.BytesRequired(len(f)) - } else { - byteLen = arrow.Int64Traits.BytesRequired(len(f)) - } - - h.Write(unsafe.Slice((*byte)(raw), byteLen)) -} diff --git a/go/arrow/compute/fieldref_test.go b/go/arrow/compute/fieldref_test.go deleted file mode 100644 index ce2051f942271..0000000000000 --- a/go/arrow/compute/fieldref_test.go +++ /dev/null @@ -1,316 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package compute_test - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/compute" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestFieldPathBasics(t *testing.T) { - f0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f1 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} - f2 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f3 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} - - s := arrow.NewSchema([]arrow.Field{f0, f1, f2, f3}, nil) - - for i := range s.Fields() { - f, err := compute.FieldPath{i}.Get(s) - assert.NoError(t, err) - assert.Equal(t, s.Field(i), *f) - } - - f, err := compute.FieldPath{}.Get(s) - assert.Nil(t, f) - assert.ErrorIs(t, err, compute.ErrEmpty) - - f, err = compute.FieldPath{s.NumFields() * 2}.Get(s) - assert.Nil(t, f) - assert.ErrorIs(t, err, compute.ErrIndexRange) -} - -func TestFieldRefBasics(t *testing.T) { - f0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f1 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} - f2 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f3 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} - - s := arrow.NewSchema([]arrow.Field{f0, f1, f2, f3}, nil) - - // lookup by index returns Indices{index} - for i := range s.Fields() { - assert.ElementsMatch(t, []compute.FieldPath{{i}}, compute.FieldRefIndex(i).FindAll(s.Fields())) - } - - // out of range index results in failure to match - assert.Empty(t, compute.FieldRefIndex(s.NumFields()*2).FindAll(s.Fields())) - - // lookup by name returns the indices of both matching fields - assert.Equal(t, []compute.FieldPath{{0}, {2}}, compute.FieldRefName("alpha").FindAll(s.Fields())) - assert.Equal(t, []compute.FieldPath{{1}, {3}}, compute.FieldRefName("beta").FindAll(s.Fields())) -} - -func TestFieldRefDotPath(t *testing.T) { - ref, err := compute.NewFieldRefFromDotPath(`.alpha`) - assert.True(t, ref.IsName()) - assert.Equal(t, "alpha", ref.Name()) - assert.False(t, ref.IsFieldPath()) - assert.False(t, ref.IsNested()) - assert.NoError(t, err) - assert.Equal(t, compute.FieldRefName("alpha"), ref) - assert.True(t, ref.Equals(compute.FieldRefName("alpha"))) - - ref, err = compute.NewFieldRefFromDotPath(`..`) - assert.Empty(t, ref.Name()) - assert.False(t, ref.IsName()) - assert.False(t, ref.IsFieldPath()) - assert.Nil(t, ref.FieldPath()) - assert.True(t, ref.IsNested()) - assert.NoError(t, err) - assert.Equal(t, compute.FieldRefList("", ""), ref) - - ref, err = compute.NewFieldRefFromDotPath(`[2]`) - assert.False(t, ref.IsName()) - assert.True(t, ref.IsFieldPath()) - assert.Equal(t, compute.FieldPath{2}, ref.FieldPath()) - assert.False(t, ref.IsNested()) - assert.NoError(t, err) - assert.Equal(t, compute.FieldRefIndex(2), ref) - - ref, err = compute.NewFieldRefFromDotPath(`.beta[3]`) - assert.NoError(t, err) - assert.Equal(t, compute.FieldRefList("beta", 3), ref) - - ref, err = compute.NewFieldRefFromDotPath(`[5].gamma.delta[7]`) - assert.NoError(t, err) - assert.Equal(t, compute.FieldRefList(5, "gamma", "delta", 7), ref) - - ref, err = compute.NewFieldRefFromDotPath(`.hello world`) - assert.NoError(t, err) - assert.Equal(t, compute.FieldRefName("hello world"), ref) - - ref, err = compute.NewFieldRefFromDotPath(`.\[y\]\\tho\.\`) - assert.NoError(t, err) - assert.Equal(t, compute.FieldRefName(`[y]\tho.\`), ref) - - _, err = compute.NewFieldRefFromDotPath(``) - assert.ErrorIs(t, err, compute.ErrInvalid) - - _, err = compute.NewFieldRefFromDotPath(`alpha`) - assert.ErrorIs(t, err, compute.ErrInvalid) - - _, err = compute.NewFieldRefFromDotPath(`[134234`) - assert.ErrorIs(t, err, compute.ErrInvalid) - - _, err = compute.NewFieldRefFromDotPath(`[1stuf]`) - assert.ErrorIs(t, err, compute.ErrInvalid) -} - -func TestFieldPathNested(t *testing.T) { - f0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f1_0 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} - f1 := arrow.Field{Name: "beta", Type: arrow.StructOf(f1_0)} - f2_0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f2_1_0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f2_1_1 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} - f2_1 := arrow.Field{Name: "gamma", Type: arrow.StructOf(f2_1_0, f2_1_1)} - f2 := arrow.Field{Name: "beta", Type: arrow.StructOf(f2_0, f2_1)} - s := arrow.NewSchema([]arrow.Field{f0, f1, f2}, nil) - - f, err := compute.FieldPath{0}.Get(s) - assert.NoError(t, err) - assert.Equal(t, f0, *f) - - f, err = compute.FieldPath{0, 0}.Get(s) - assert.ErrorIs(t, err, compute.ErrNoChildren) - assert.Nil(t, f) - - f, err = compute.FieldPath{1, 0}.Get(s) - assert.NoError(t, err) - assert.Equal(t, f1_0, *f) - - f, err = compute.FieldPath{2, 0}.Get(s) - assert.NoError(t, err) - assert.Equal(t, f2_0, *f) - - f, err = compute.FieldPath{2, 1, 0}.Get(s) - assert.NoError(t, err) - assert.Equal(t, f2_1_0, *f) - - f, err = compute.FieldPath{1, 0}.GetField(s.Field(2)) - assert.NoError(t, err) - assert.Equal(t, f2_1_0, *f) - - f, err = compute.FieldPath{2, 1, 1}.Get(s) - assert.NoError(t, err) - assert.Equal(t, f2_1_1, *f) -} - -func TestFindFuncs(t *testing.T) { - f0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f1_0 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} - f1 := arrow.Field{Name: "alpha", Type: arrow.StructOf(f1_0)} - f2_0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f2_1_0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f2_1_1 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} - f2_1 := arrow.Field{Name: "gamma", Type: arrow.StructOf(f2_1_0, f2_1_1)} - f2 := arrow.Field{Name: "beta", Type: arrow.StructOf(f2_0, f2_1)} - s := arrow.NewSchema([]arrow.Field{f0, f1, f2}, nil) - - assert.Equal(t, []compute.FieldPath{{1}}, compute.FieldRefName("gamma").FindAllField(f2)) - fp, err := compute.FieldRefName("alpha").FindOneOrNone(s) - assert.ErrorIs(t, err, compute.ErrMultipleMatches) - assert.Len(t, fp, 0) - fp, err = compute.FieldRefName("alpha").FindOne(s) - assert.ErrorIs(t, err, compute.ErrMultipleMatches) - assert.Len(t, fp, 0) - - fp, err = compute.FieldRefName("beta").FindOneOrNone(s) - assert.NoError(t, err) - assert.Equal(t, compute.FieldPath{2}, fp) - fp, err = compute.FieldRefName("beta").FindOne(s) - assert.NoError(t, err) - assert.Equal(t, compute.FieldPath{2}, fp) - - fp, err = compute.FieldRefName("gamma").FindOneOrNone(s) - assert.NoError(t, err) - assert.Len(t, fp, 0) - - fp, err = compute.FieldRefName("gamma").FindOne(s) - assert.ErrorIs(t, err, compute.ErrNoMatch) - assert.Nil(t, fp) -} - -func TestGetFieldFuncs(t *testing.T) { - f0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f1_0 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} - f1 := arrow.Field{Name: "alpha", Type: arrow.StructOf(f1_0)} - f2_0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f2_1_0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f2_1_1 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} - f2_1 := arrow.Field{Name: "gamma", Type: arrow.StructOf(f2_1_0, f2_1_1)} - f2 := arrow.Field{Name: "beta", Type: arrow.StructOf(f2_0, f2_1)} - s := arrow.NewSchema([]arrow.Field{f0, f1, f2}, nil) - - ref, err := compute.NewFieldRefFromDotPath(`[2].alpha`) - assert.NoError(t, err) - - f, err := ref.GetOneField(s) - assert.NoError(t, err) - assert.Equal(t, f2_0, *f) - f, err = ref.GetOneOrNone(s) - assert.NoError(t, err) - assert.Equal(t, f2_0, *f) - - ref = compute.FieldRefList("beta", "gamma", 2) - f, err = ref.GetOneField(s) - assert.ErrorIs(t, err, compute.ErrNoMatch) - assert.Nil(t, f) - f, err = ref.GetOneOrNone(s) - assert.NoError(t, err) - assert.Nil(t, f) - - f, err = compute.FieldRefName("alpha").GetOneOrNone(s) - assert.ErrorIs(t, err, compute.ErrMultipleMatches) - assert.Nil(t, f) -} - -func TestFieldRefRecord(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - alphaBldr := array.NewInt32Builder(mem) - defer alphaBldr.Release() - - betaBldr := array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32) - defer betaBldr.Release() - - gammaBldr := array.NewStructBuilder(mem, arrow.StructOf( - arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, - arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32, Nullable: true})) - defer gammaBldr.Release() - - alphaBldr.AppendValues([]int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil) - betaBldr.AppendValues([]int32{0, 3, 7, 8, 8, 10, 13, 14, 17, 20, 22}, []bool{true, true, true, false, true, true, true, true, true, true}) - for i := 0; i < 22; i++ { - betaBldr.ValueBuilder().(*array.Int32Builder).Append(int32(i * 2)) - } - - gammaBldr.AppendValues([]bool{true, true, true, true, true, true, true, true, true, true}) - gammaBldr.FieldBuilder(0).(*array.Int32Builder).AppendValues([]int32{10, 20, 30, 40, 50, 60, 70, 80, 90, 100}, nil) - gammaBldr.FieldBuilder(1).(*array.Int32Builder).AppendValues([]int32{-10, -20, -30, -40, -50, -60, -70, -80, -90, -100}, nil) - - alpha := alphaBldr.NewInt32Array() - defer alpha.Release() - beta := betaBldr.NewListArray() - defer beta.Release() - gamma := gammaBldr.NewStructArray() - defer gamma.Release() - - rec := array.NewRecord(arrow.NewSchema([]arrow.Field{ - {Name: "alpha", Type: alpha.DataType(), Nullable: true}, - {Name: "alpha", Type: beta.DataType(), Nullable: true}, - {Name: "alpha", Type: gamma.DataType(), Nullable: true}, - }, nil), []arrow.Array{alpha, beta, gamma}, 10) - defer rec.Release() - - arr, err := compute.FieldPath{2, 0}.GetColumn(rec) - assert.NoError(t, err) - assert.Same(t, gamma.Field(0), arr) - - arr, err = compute.FieldPath{}.GetColumn(rec) - assert.ErrorIs(t, err, compute.ErrEmpty) - assert.Nil(t, arr) - - arr, err = compute.FieldPath{1, 0}.GetColumn(rec) - assert.NoError(t, err) - assert.Same(t, beta.ListValues(), arr) - - arr, err = compute.FieldPath{1, 0, 0}.GetColumn(rec) - assert.ErrorIs(t, err, compute.ErrNoChildren) - assert.Nil(t, arr) - - arr, err = compute.FieldPath{2, 2}.GetColumn(rec) - assert.ErrorIs(t, err, compute.ErrIndexRange) - assert.Nil(t, arr) - - arrs, err := compute.FieldRefName("alpha").GetAllColumns(rec) - assert.NoError(t, err) - assert.Equal(t, []arrow.Array{alpha, beta, gamma}, arrs) - - arrs, err = compute.FieldRefName("delta").GetAllColumns(rec) - assert.NoError(t, err) - assert.Len(t, arrs, 0) - - arr, err = compute.FieldRefName("delta").GetOneColumnOrNone(rec) - assert.NoError(t, err) - assert.Nil(t, arr) - - arr, err = compute.FieldRefName("alpha").GetOneColumnOrNone(rec) - assert.ErrorIs(t, err, compute.ErrMultipleMatches) - assert.Nil(t, arr) - - arr, err = compute.FieldRefList("alpha", "beta").GetOneColumnOrNone(rec) - assert.NoError(t, err) - assert.Same(t, gamma.Field(1), arr) -} diff --git a/go/arrow/compute/funckind_string.go b/go/arrow/compute/funckind_string.go deleted file mode 100644 index 204e844133e53..0000000000000 --- a/go/arrow/compute/funckind_string.go +++ /dev/null @@ -1,29 +0,0 @@ -// Code generated by "stringer -type=FuncKind -linecomment"; DO NOT EDIT. - -//go:build go1.18 - -package compute - -import "strconv" - -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[FuncScalar-0] - _ = x[FuncVector-1] - _ = x[FuncScalarAgg-2] - _ = x[FuncHashAgg-3] - _ = x[FuncMeta-4] -} - -const _FuncKind_name = "ScalarVectorScalarAggregateHashAggregateMeta" - -var _FuncKind_index = [...]uint8{0, 6, 12, 27, 40, 44} - -func (i FuncKind) String() string { - if i < 0 || i >= FuncKind(len(_FuncKind_index)-1) { - return "FuncKind(" + strconv.FormatInt(int64(i), 10) + ")" - } - return _FuncKind_name[_FuncKind_index[i]:_FuncKind_index[i+1]] -} diff --git a/go/arrow/compute/functions.go b/go/arrow/compute/functions.go deleted file mode 100644 index ebade11a8e60b..0000000000000 --- a/go/arrow/compute/functions.go +++ /dev/null @@ -1,430 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package compute - -import ( - "context" - "fmt" - "strings" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/compute/exec" -) - -type Function interface { - Name() string - Kind() FuncKind - Arity() Arity - Doc() FunctionDoc - NumKernels() int - Execute(context.Context, FunctionOptions, ...Datum) (Datum, error) - DispatchExact(...arrow.DataType) (exec.Kernel, error) - DispatchBest(...arrow.DataType) (exec.Kernel, error) - DefaultOptions() FunctionOptions - Validate() error -} - -// Arity defines the number of required arguments for a function. -// -// Naming conventions are taken from https://en.wikipedia.org/wiki/Arity -type Arity struct { - NArgs int - IsVarArgs bool -} - -// Convenience functions to generating Arities - -func Nullary() Arity { return Arity{0, false} } -func Unary() Arity { return Arity{1, false} } -func Binary() Arity { return Arity{2, false} } -func Ternary() Arity { return Arity{3, false} } -func VarArgs(minArgs int) Arity { return Arity{minArgs, true} } - -type FunctionDoc struct { - // A one-line summary of the function, using a verb. - // - // For example, "Add two numeric arrays or scalars" - Summary string - // A detailed description of the function, meant to follow the summary. - Description string - // Symbolic names (identifiers) for the function arguments. - // - // Can be used to generate nicer function signatures. - ArgNames []string - // Name of the options struct type, if any - OptionsType string - // Whether or not options are required for function execution. - // - // If false, then either there are no options for this function, - // or there is a usable default options value. - OptionsRequired bool -} - -// EmptyFuncDoc is a reusable empty function doc definition for convenience. -var EmptyFuncDoc FunctionDoc - -// FuncKind is an enum representing the type of a function -type FuncKind int8 - -const ( - // A function that performs scalar data operations on whole arrays - // of data. Can generally process Array or Scalar values. The size - // of the output will be the same as the size (or broadcasted size, - // in the case of mixing Array and Scalar inputs) of the input. - FuncScalar FuncKind = iota // Scalar - // A function with array input and output whose behavior depends on - // the values of the entire arrays passed, rather than the value of - // each scalar value. - FuncVector // Vector - // A function that computes a scalar summary statistic from array input. - FuncScalarAgg // ScalarAggregate - // A function that computes grouped summary statistics from array - // input and an array of group identifiers. - FuncHashAgg // HashAggregate - // A function that dispatches to other functions and does not contain - // its own kernels. - FuncMeta // Meta -) - -func validateFunctionSummary(summary string) error { - if strings.Contains(summary, "\n") { - return fmt.Errorf("%w: summary contains a newline", arrow.ErrInvalid) - } - if summary[len(summary)-1] == '.' { - return fmt.Errorf("%w: summary ends with a point", arrow.ErrInvalid) - } - return nil -} - -func validateFunctionDescription(desc string) error { - if len(desc) != 0 && desc[len(desc)-1] == '\n' { - return fmt.Errorf("%w: description ends with a newline", arrow.ErrInvalid) - } - - const maxLineSize = 78 - for _, ln := range strings.Split(desc, "\n") { - if len(ln) > maxLineSize { - return fmt.Errorf("%w: description line length exceeds %d characters", arrow.ErrInvalid, maxLineSize) - } - } - return nil -} - -// baseFunction is the base class for compute functions. Function -// implementations should embed this baseFunction and will contain -// a collection of "kernels" which are implementations of the function -// for specific argument types. Selecting a viable kernel for -// executing the function is referred to as "dispatching". -type baseFunction struct { - name string - kind FuncKind - arity Arity - doc FunctionDoc - defaultOpts FunctionOptions -} - -func (b *baseFunction) Name() string { return b.name } -func (b *baseFunction) Kind() FuncKind { return b.kind } -func (b *baseFunction) Arity() Arity { return b.arity } -func (b *baseFunction) Doc() FunctionDoc { return b.doc } -func (b *baseFunction) DefaultOptions() FunctionOptions { return b.defaultOpts } -func (b *baseFunction) Validate() error { - if b.doc.Summary == "" { - return nil - } - - argCount := len(b.doc.ArgNames) - if argCount != b.arity.NArgs && !(b.arity.IsVarArgs && argCount == b.arity.NArgs+1) { - return fmt.Errorf("in function '%s': number of argument names for function doc != function arity", b.name) - } - - if err := validateFunctionSummary(b.doc.Summary); err != nil { - return err - } - return validateFunctionDescription(b.doc.Description) -} - -func checkOptions(fn Function, opts FunctionOptions) error { - if opts == nil && fn.Doc().OptionsRequired { - return fmt.Errorf("%w: function '%s' cannot be called without options", arrow.ErrInvalid, fn.Name()) - } - return nil -} - -func (b *baseFunction) checkArity(nargs int) error { - switch { - case b.arity.IsVarArgs && nargs < b.arity.NArgs: - return fmt.Errorf("%w: varargs function '%s' needs at least %d arguments, but only %d passed", - arrow.ErrInvalid, b.name, b.arity.NArgs, nargs) - case !b.arity.IsVarArgs && nargs != b.arity.NArgs: - return fmt.Errorf("%w: function '%s' accepts %d arguments but %d passed", - arrow.ErrInvalid, b.name, b.arity.NArgs, nargs) - } - return nil -} - -// kernelType is a type constraint interface that is used for funcImpl -// generic definitions. It will be extended as other kernel types -// are defined. -// -// Currently only ScalarKernels are allowed to be used. -type kernelType interface { - exec.ScalarKernel | exec.VectorKernel - - // specifying the Kernel interface here allows us to utilize - // the methods of the Kernel interface on the generic - // constrained type - exec.Kernel -} - -// funcImpl is the basic implementation for any functions that use kernels -// i.e. all except for Meta functions. -type funcImpl[KT kernelType] struct { - baseFunction - - kernels []KT -} - -func (fi *funcImpl[KT]) DispatchExact(vals ...arrow.DataType) (*KT, error) { - if err := fi.checkArity(len(vals)); err != nil { - return nil, err - } - - for i := range fi.kernels { - if fi.kernels[i].GetSig().MatchesInputs(vals) { - return &fi.kernels[i], nil - } - } - - return nil, fmt.Errorf("%w: function '%s' has no kernel matching input types %s", - arrow.ErrNotImplemented, fi.name, arrow.TypesToString(vals)) -} - -func (fi *funcImpl[KT]) NumKernels() int { return len(fi.kernels) } -func (fi *funcImpl[KT]) Kernels() []*KT { - res := make([]*KT, len(fi.kernels)) - for i := range fi.kernels { - res[i] = &fi.kernels[i] - } - return res -} - -// A ScalarFunction is a function that executes element-wise operations -// on arrays or scalars, and therefore whose results generally do not -// depend on the order of the values in the arguments. Accepts and returns -// arrays that are all of the same size. These functions roughly correspond -// to the functions used in most SQL expressions. -type ScalarFunction struct { - funcImpl[exec.ScalarKernel] -} - -// NewScalarFunction constructs a new ScalarFunction object with the passed in -// name, arity and function doc. -func NewScalarFunction(name string, arity Arity, doc FunctionDoc) *ScalarFunction { - return &ScalarFunction{ - funcImpl: funcImpl[exec.ScalarKernel]{ - baseFunction: baseFunction{ - name: name, - arity: arity, - doc: doc, - kind: FuncScalar, - }, - }, - } -} - -func (s *ScalarFunction) SetDefaultOptions(opts FunctionOptions) { - s.defaultOpts = opts -} - -func (s *ScalarFunction) DispatchExact(vals ...arrow.DataType) (exec.Kernel, error) { - return s.funcImpl.DispatchExact(vals...) -} - -func (s *ScalarFunction) DispatchBest(vals ...arrow.DataType) (exec.Kernel, error) { - return s.DispatchExact(vals...) -} - -// AddNewKernel constructs a new kernel with the provided signature -// and execution/init functions and then adds it to the function's list of -// kernels. This assumes default null handling (intersection of validity bitmaps) -func (s *ScalarFunction) AddNewKernel(inTypes []exec.InputType, outType exec.OutputType, execFn exec.ArrayKernelExec, init exec.KernelInitFn) error { - if err := s.checkArity(len(inTypes)); err != nil { - return err - } - - if s.arity.IsVarArgs && len(inTypes) != 1 { - return fmt.Errorf("%w: varargs signatures must have exactly one input type", arrow.ErrInvalid) - } - - sig := &exec.KernelSignature{ - InputTypes: inTypes, - OutType: outType, - IsVarArgs: s.arity.IsVarArgs, - } - - s.kernels = append(s.kernels, exec.NewScalarKernelWithSig(sig, execFn, init)) - return nil -} - -// AddKernel adds the provided kernel to the list of kernels -// this function has. A copy of the kernel is added to the slice of kernels, -// which means that a given kernel object can be created, added and then -// reused to add other kernels. -func (s *ScalarFunction) AddKernel(k exec.ScalarKernel) error { - if err := s.checkArity(len(k.Signature.InputTypes)); err != nil { - return err - } - - if s.arity.IsVarArgs && !k.Signature.IsVarArgs { - return fmt.Errorf("%w: function accepts varargs but kernel signature does not", arrow.ErrInvalid) - } - - s.kernels = append(s.kernels, k) - return nil -} - -// Execute uses the passed in context, function options and arguments to eagerly -// execute the function using kernel dispatch, batch iteration and memory -// allocation details as defined by the kernel. -// -// If opts is nil, then the DefaultOptions() will be used. -func (s *ScalarFunction) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error) { - return execInternal(ctx, s, opts, -1, args...) -} - -type VectorFunction struct { - funcImpl[exec.VectorKernel] -} - -func NewVectorFunction(name string, arity Arity, doc FunctionDoc) *VectorFunction { - return &VectorFunction{ - funcImpl: funcImpl[exec.VectorKernel]{ - baseFunction: baseFunction{ - name: name, - arity: arity, - doc: doc, - kind: FuncVector, - }, - }, - } -} - -func (f *VectorFunction) SetDefaultOptions(opts FunctionOptions) { - f.defaultOpts = opts -} - -func (f *VectorFunction) DispatchExact(vals ...arrow.DataType) (exec.Kernel, error) { - return f.funcImpl.DispatchExact(vals...) -} - -func (f *VectorFunction) DispatchBest(vals ...arrow.DataType) (exec.Kernel, error) { - return f.DispatchExact(vals...) -} - -func (f *VectorFunction) AddNewKernel(inTypes []exec.InputType, outType exec.OutputType, execFn exec.ArrayKernelExec, init exec.KernelInitFn) error { - if err := f.checkArity(len(inTypes)); err != nil { - return err - } - - if f.arity.IsVarArgs && len(inTypes) != 1 { - return fmt.Errorf("%w: varags signatures must have exactly one input type", arrow.ErrInvalid) - } - - sig := &exec.KernelSignature{ - InputTypes: inTypes, - OutType: outType, - IsVarArgs: f.arity.IsVarArgs, - } - f.kernels = append(f.kernels, exec.NewVectorKernelWithSig(sig, execFn, init)) - return nil -} - -func (f *VectorFunction) AddKernel(kernel exec.VectorKernel) error { - if err := f.checkArity(len(kernel.Signature.InputTypes)); err != nil { - return err - } - - if f.arity.IsVarArgs && !kernel.Signature.IsVarArgs { - return fmt.Errorf("%w: function accepts varargs but kernel signature does not", arrow.ErrInvalid) - } - f.kernels = append(f.kernels, kernel) - return nil -} - -func (f *VectorFunction) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error) { - return execInternal(ctx, f, opts, -1, args...) -} - -// MetaFunctionImpl is the signature needed for implementing a MetaFunction -// which is a function that dispatches to another function instead. -type MetaFunctionImpl func(context.Context, FunctionOptions, ...Datum) (Datum, error) - -// MetaFunction is a function which dispatches to other functions, the impl -// must not be nil. -// -// For Array, ChunkedArray and Scalar datums, this may rely on the execution -// of concrete function types, but this must handle other Datum kinds on its -// own. -type MetaFunction struct { - baseFunction - impl MetaFunctionImpl -} - -// NewMetaFunction constructs a new MetaFunction which will call the provided -// impl for dispatching with the expected arity. -// -// Will panic if impl is nil. -func NewMetaFunction(name string, arity Arity, doc FunctionDoc, impl MetaFunctionImpl) *MetaFunction { - if impl == nil { - panic("arrow/compute: cannot construct MetaFunction with nil impl") - } - return &MetaFunction{ - baseFunction: baseFunction{ - name: name, - arity: arity, - doc: doc, - }, - impl: impl, - } -} - -func (MetaFunction) NumKernels() int { return 0 } -func (m *MetaFunction) DispatchExact(...arrow.DataType) (exec.Kernel, error) { - return nil, fmt.Errorf("%w: dispatch for metafunction", arrow.ErrNotImplemented) -} - -func (m *MetaFunction) DispatchBest(...arrow.DataType) (exec.Kernel, error) { - return nil, fmt.Errorf("%w: dispatch for metafunction", arrow.ErrNotImplemented) -} - -func (m *MetaFunction) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error) { - if err := m.checkArity(len(args)); err != nil { - return nil, err - } - if err := checkOptions(m, opts); err != nil { - return nil, err - } - - if opts == nil { - opts = m.defaultOpts - } - - return m.impl(ctx, opts, args...) -} diff --git a/go/arrow/compute/functions_test.go b/go/arrow/compute/functions_test.go deleted file mode 100644 index 31a4cf124e845..0000000000000 --- a/go/arrow/compute/functions_test.go +++ /dev/null @@ -1,69 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package compute_test - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/compute" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestArityBasics(t *testing.T) { - nullary := compute.Nullary() - assert.Equal(t, 0, nullary.NArgs) - assert.False(t, nullary.IsVarArgs) - - unary := compute.Unary() - assert.Equal(t, 1, unary.NArgs) - assert.False(t, unary.IsVarArgs) - - binary := compute.Binary() - assert.Equal(t, 2, binary.NArgs) - assert.False(t, binary.IsVarArgs) - - ternary := compute.Ternary() - assert.Equal(t, 3, ternary.NArgs) - assert.False(t, ternary.IsVarArgs) - - varargs := compute.VarArgs(2) - assert.Equal(t, 2, varargs.NArgs) - assert.True(t, varargs.IsVarArgs) -} - -func CheckDispatchBest(t *testing.T, funcName string, originalTypes, expected []arrow.DataType) { - fn, exists := compute.GetFunctionRegistry().GetFunction(funcName) - require.True(t, exists) - - vals := make([]arrow.DataType, len(originalTypes)) - copy(vals, originalTypes) - - actualKernel, err := fn.DispatchBest(vals...) - require.NoError(t, err) - expKernel, err := fn.DispatchExact(expected...) - require.NoError(t, err) - - assert.Same(t, expKernel, actualKernel) - assert.Equal(t, len(expected), len(vals)) - for i, v := range vals { - assert.True(t, arrow.TypeEqual(v, expected[i]), v.String(), expected[i].String()) - } -} diff --git a/go/arrow/compute/internal/kernels/Makefile b/go/arrow/compute/internal/kernels/Makefile deleted file mode 100644 index ac00bd837c0b3..0000000000000 --- a/go/arrow/compute/internal/kernels/Makefile +++ /dev/null @@ -1,110 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# this converts rotate instructions from "ro[lr] " -> "ro[lr] , 1" for yasm compatibility -PERL_FIXUP_ROTATE=perl -i -pe 's/(ro[rl]\s+\w{2,3})$$/\1, 1/' - -C2GOASM=c2goasm -CC=clang-11 -CXX=clang++-11 -C_FLAGS=-target x86_64-unknown-none -masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=5000 \ - -fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -O3 -fno-builtin -ffast-math -fno-jump-tables -I_lib -I../../../../internal/utils/_lib -ASM_FLAGS_AVX2=-mavx2 -mfma -ASM_FLAGS_SSE4=-msse4 -ASM_FLAGS_BMI2=-mbmi2 -ASM_FLAGS_POPCNT=-mpopcnt - -C_FLAGS_NEON=-O3 -fvectorize -mllvm -force-vector-width=16 -fno-asynchronous-unwind-tables -mno-red-zone -mstackrealign -fno-exceptions \ - -fno-rtti -fno-builtin -ffast-math -fno-jump-tables -I_lib -I../../../../internal/utils/_lib - -GO_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -not -name '*_test.go') -ALL_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -name '*.s' -not -name '*_test.go') - -.PHONEY: assembly - -INTEL_SOURCES := \ - cast_numeric_avx2_amd64.s cast_numeric_sse4_amd64.s constant_factor_avx2_amd64.s \ - constant_factor_sse4_amd64.s base_arithmetic_avx2_amd64.s base_arithmetic_sse4_amd64.s \ - scalar_comparison_avx2_amd64.s scalar_comparison_sse4_amd64.s - -# -# ARROW-15336: DO NOT add the assembly target for Arm64 (ARM_SOURCES) until c2goasm added the Arm64 support. -# min_max_neon_arm64.s was generated by asm2plan9s. -# And manually formatted it as the Arm64 Plan9. -# - -assembly: $(INTEL_SOURCES) - -_lib/cast_numeric_avx2_amd64.s: _lib/cast_numeric.cc - $(CXX) -std=c++17 -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -_lib/cast_numeric_sse4_amd64.s: _lib/cast_numeric.cc - $(CXX) -std=c++17 -S $(C_FLAGS) $(ASM_FLAGS_SSE4) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -_lib/cast_numeric_neon.s: _lib/cast_numeric.cc - $(CXX) -std=c++17 -S $(C_FLAGS_NEON) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -_lib/base_arithmetic_avx2_amd64.s: _lib/base_arithmetic.cc - $(CXX) -std=c++17 -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -_lib/base_arithmetic_sse4_amd64.s: _lib/base_arithmetic.cc - $(CXX) -std=c++17 -S $(C_FLAGS) $(ASM_FLAGS_SSE4) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -_lib/scalar_comparison_avx2_amd64.s: _lib/scalar_comparison.cc - $(CXX) -std=c++17 -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -_lib/scalar_comparison_sse4_amd64.s: _lib/scalar_comparison.cc - $(CXX) -std=c++17 -S $(C_FLAGS) $(ASM_FLAGS_SSE4) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -_lib/base_arithmetic_neon.s: _lib/base_arithmetic.cc - $(CXX) -std=c++17 -S $(C_FLAGS_NEON) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -_lib/constant_factor_avx2_amd64.s: _lib/constant_factor.c - $(CC) -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -_lib/constant_factor_sse4_amd64.s: _lib/constant_factor.c - $(CC) -S $(C_FLAGS) $(ASM_FLAGS_SSE4) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -_lib/constant_factor_neon.s: _lib/constant_factor.c - $(CC) -S $(C_FLAGS_NEON) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -cast_numeric_avx2_amd64.s: _lib/cast_numeric_avx2_amd64.s - $(C2GOASM) -a -f $^ $@ - -cast_numeric_sse4_amd64.s: _lib/cast_numeric_sse4_amd64.s - $(C2GOASM) -a -f $^ $@ - -constant_factor_avx2_amd64.s: _lib/constant_factor_avx2_amd64.s - $(C2GOASM) -a -f $^ $@ - -constant_factor_sse4_amd64.s: _lib/constant_factor_sse4_amd64.s - $(C2GOASM) -a -f $^ $@ - -base_arithmetic_avx2_amd64.s: _lib/base_arithmetic_avx2_amd64.s - $(C2GOASM) -a -f $^ $@ - -base_arithmetic_sse4_amd64.s: _lib/base_arithmetic_sse4_amd64.s - $(C2GOASM) -a -f $^ $@ - -scalar_comparison_avx2_amd64.s: _lib/scalar_comparison_avx2_amd64.s - $(C2GOASM) -a -f $^ $@ - -scalar_comparison_sse4_amd64.s: _lib/scalar_comparison_sse4_amd64.s - $(C2GOASM) -a -f $^ $@ - -clean: - rm -f $(INTEL_SOURCES) - rm -f $(addprefix _lib/,$(INTEL_SOURCES)) diff --git a/go/arrow/compute/internal/kernels/_lib/base_arithmetic.cc b/go/arrow/compute/internal/kernels/_lib/base_arithmetic.cc deleted file mode 100644 index 199c9d48ac631..0000000000000 --- a/go/arrow/compute/internal/kernels/_lib/base_arithmetic.cc +++ /dev/null @@ -1,484 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include "types.h" -#include "vendored/safe-math.h" - -// Corresponds to equivalent ArithmeticOp enum in base_arithmetic.go -// for passing across which operation to perform. This allows simpler -// implementation at the cost of having to pass the extra int8 and -// perform a switch. -// -// In cases of small arrays, this is completely negligible. In cases -// of large arrays, the time saved by using SIMD here is significantly -// worth the cost. -enum class optype : int8_t { - ADD, - SUB, - MUL, - DIV, - ABSOLUTE_VALUE, - NEGATE, - SQRT, - POWER, - SIN, - COS, - TAN, - ASIN, - ACOS, - ATAN, - ATAN2, - LN, - LOG10, - LOG2, - LOG1P, - LOGB, - SIGN, - - // this impl doesn't actually perform any overflow checks as we need - // to only run overflow checks on non-null entries - ADD_CHECKED, - SUB_CHECKED, - MUL_CHECKED, - DIV_CHECKED, - ABSOLUTE_VALUE_CHECKED, - NEGATE_CHECKED, - SQRT_CHECKED, - POWER_CHECKED, - SIN_CHECKED, - COS_CHECKED, - TAN_CHECKED, - ASIN_CHECKED, - ACOS_CHECKED, - LN_CHECKED, - LOG10_CHECKED, - LOG2_CHECKED, - LOG1P_CHECKED, - LOGB_CHECKED, -}; - -struct Add { - template - static constexpr T Call(Arg0 left, Arg1 right) { - if constexpr (is_arithmetic_v) - return left + right; - } -}; - -struct Sub { - template - static constexpr T Call(Arg0 left, Arg1 right) { - if constexpr (is_arithmetic_v) - return left - right; - } -}; - -struct AddChecked { - template - static constexpr T Call(Arg0 left, Arg1 right) { - static_assert(is_same::value && is_same::value, ""); - if constexpr(is_arithmetic_v) { - return left + right; - } - } -}; - - -struct SubChecked { - template - static constexpr T Call(Arg0 left, Arg1 right) { - static_assert(is_same::value && is_same::value, ""); - if constexpr(is_arithmetic_v) { - return left - right; - } - } -}; - -template -using maybe_make_unsigned = conditional_t && !is_same_v, make_unsigned_t, T>; - -template > -constexpr Unsigned to_unsigned(T signed_) { - return static_cast(signed_); -} - -struct Multiply { - static_assert(is_same_v, ""); - static_assert(is_same_v, ""); - static_assert(is_same_v, ""); - static_assert(is_same_v, ""); - static_assert(is_same_v, ""); - static_assert(is_same_v, ""); - static_assert(is_same_v, ""); - static_assert(is_same_v, ""); - - template - static constexpr T Call(Arg0 left, Arg1 right) { - static_assert(is_same_v && is_same_v, ""); - if constexpr(is_floating_point_v) { - return left * right; - } else if constexpr(is_unsigned_v && !is_same_v) { - return left * right; - } else if constexpr(is_signed_v && !is_same_v) { - return to_unsigned(left) * to_unsigned(right); - } else if constexpr(is_same_v || is_same_v) { - // multiplication of 16 bit integer types implicitly promotes to - // signed 32 bit integer. However, some inputs may overflow (which - // triggers undefined behavior). Therefore we first cast to 32 bit - // unsigned integers where overflow is well defined. - return static_cast(left) * static_cast(right); - } - } -}; - -struct MultiplyChecked { - template - static constexpr T Call(Arg0 left, Arg1 right) { - static_assert(is_same_v && is_same_v, ""); - if constexpr(is_arithmetic_v) { - return left * right; - } - } -}; - -struct AbsoluteValue { - template - static constexpr T Call(Arg input) { - if constexpr(is_same_v) { - *(((int*)&input)+0) &= 0x7fffffff; - return input; - } else if constexpr(is_same_v) { - *(((int*)&input)+1) &= 0x7fffffff; - return input; - } else if constexpr(is_unsigned_v) { - return input; - } else { - const auto mask = input >> (sizeof(Arg) * CHAR_BIT - 1); - return (input + mask) ^ mask; - } - } -}; - -struct AbsoluteValueChecked { - template - static constexpr T Call(Arg input) { - if constexpr(is_same_v) { - *(((int*)&input)+0) &= 0x7fffffff; - return input; - } else if constexpr(is_same_v) { - *(((int*)&input)+1) &= 0x7fffffff; - return input; - } else if constexpr(is_unsigned_v) { - return input; - } else { - const auto mask = input >> (sizeof(Arg) * CHAR_BIT - 1); - return (input + mask) ^ mask; - } - } -}; - -struct Negate { - template - static constexpr T Call(Arg input) { - if constexpr(is_floating_point_v) { - return -input; - } else if constexpr(is_unsigned_v) { - return ~input + 1; - } else { - return -input; - } - } -}; - -struct NegateChecked { - template - static constexpr T Call(Arg input) { - static_assert(is_same_v, ""); - if constexpr(is_floating_point_v) { - return -input; - } else if constexpr(is_unsigned_v) { - return 0; - } else { - return -input; - } - } -}; - -struct Sign { - template - static constexpr T Call(Arg input) { - if constexpr(is_floating_point_v) { - return isnan(input) ? input : ((input == 0) ? 0 : (signbit(input) ? -1 : 1)); - } else if constexpr(is_unsigned_v) { - return input > 0 ? 1 : 0; - } else if constexpr(is_signed_v) { - return input > 0 ? 1 : (input ? -1 : 0); - } - } -}; - -template -struct arithmetic_op_arr_arr_impl { - static inline void exec(const void* in_left, const void* in_right, void* out, const int len) { - const T* left = reinterpret_cast(in_left); - const T* right = reinterpret_cast(in_right); - OutT* output = reinterpret_cast(out); - - for (int i = 0; i < len; ++i) { - output[i] = Op::template Call(left[i], right[i]); - } - } -}; - -template -struct arithmetic_op_arr_scalar_impl { - static inline void exec(const void* in_left, const void* scalar_right, void* out, const int len) { - const T* left = reinterpret_cast(in_left); - const T right = *reinterpret_cast(scalar_right); - OutT* output = reinterpret_cast(out); - - for (int i = 0; i < len; ++i) { - output[i] = Op::template Call(left[i], right); - } - } -}; - -template -struct arithmetic_op_scalar_arr_impl { - static inline void exec(const void* scalar_left, const void* in_right, void* out, const int len) { - const T left = *reinterpret_cast(scalar_left); - const T* right = reinterpret_cast(in_right); - OutT* output = reinterpret_cast(out); - - for (int i = 0; i < len; ++i) { - output[i] = Op::template Call(left, right[i]); - } - } -}; - -template -struct arithmetic_unary_op_impl { - static inline void exec(const void* arg, void* out, const int len) { - const T* input = reinterpret_cast(arg); - OutT* output = reinterpret_cast(out); - - for (int i = 0; i < len; ++i) { - output[i] = Op::template Call(input[i]); - } - } -}; - -template typename Impl> -static inline void arithmetic_op(const int type, const void* in_left, const void* in_right, void* output, const int len) { - const auto intype = static_cast(type); - - switch (intype) { - case arrtype::UINT8: - return Impl::exec(in_left, in_right, output, len); - case arrtype::INT8: - return Impl::exec(in_left, in_right, output, len); - case arrtype::UINT16: - return Impl::exec(in_left, in_right, output, len); - case arrtype::INT16: - return Impl::exec(in_left, in_right, output, len); - case arrtype::UINT32: - return Impl::exec(in_left, in_right, output, len); - case arrtype::INT32: - return Impl::exec(in_left, in_right, output, len); - case arrtype::UINT64: - return Impl::exec(in_left, in_right, output, len); - case arrtype::INT64: - return Impl::exec(in_left, in_right, output, len); - case arrtype::FLOAT32: - return Impl::exec(in_left, in_right, output, len); - case arrtype::FLOAT64: - return Impl::exec(in_left, in_right, output, len); - default: - break; - } -} - -template typename Impl, typename Input> -static inline void arithmetic_op(const int otype, const void* input, void* output, const int len) { - const auto outtype = static_cast(otype); - - switch (outtype) { - case arrtype::UINT8: - return Impl::exec(input, output, len); - case arrtype::INT8: - return Impl::exec(input, output, len); - case arrtype::UINT16: - return Impl::exec(input, output, len); - case arrtype::INT16: - return Impl::exec(input, output, len); - case arrtype::UINT32: - return Impl::exec(input, output, len); - case arrtype::INT32: - return Impl::exec(input, output, len); - case arrtype::UINT64: - return Impl::exec(input, output, len); - case arrtype::INT64: - return Impl::exec(input, output, len); - case arrtype::FLOAT32: - return Impl::exec(input, output, len); - case arrtype::FLOAT64: - return Impl::exec(input, output, len); - default: - break; - } -} - - -template typename Impl> -static inline void arithmetic_op(const int type, const void* input, void* output, const int len) { - const auto intype = static_cast(type); - - switch (intype) { - case arrtype::UINT8: - return Impl::exec(input, output, len); - case arrtype::INT8: - return Impl::exec(input, output, len); - case arrtype::UINT16: - return Impl::exec(input, output, len); - case arrtype::INT16: - return Impl::exec(input, output, len); - case arrtype::UINT32: - return Impl::exec(input, output, len); - case arrtype::INT32: - return Impl::exec(input, output, len); - case arrtype::UINT64: - return Impl::exec(input, output, len); - case arrtype::INT64: - return Impl::exec(input, output, len); - case arrtype::FLOAT32: - return Impl::exec(input, output, len); - case arrtype::FLOAT64: - return Impl::exec(input, output, len); - default: - break; - } -} - -template typename Impl> -static inline void arithmetic_op(const int itype, const int otype, const void* input, void* output, const int len) { - const auto intype = static_cast(itype); - - switch (intype) { - case arrtype::UINT8: - return arithmetic_op(otype, input, output, len); - case arrtype::INT8: - return arithmetic_op(otype, input, output, len); - case arrtype::UINT16: - return arithmetic_op(otype, input, output, len); - case arrtype::INT16: - return arithmetic_op(otype, input, output, len); - case arrtype::UINT32: - return arithmetic_op(otype, input, output, len); - case arrtype::INT32: - return arithmetic_op(otype, input, output, len); - case arrtype::UINT64: - return arithmetic_op(otype, input, output, len); - case arrtype::INT64: - return arithmetic_op(otype, input, output, len); - case arrtype::FLOAT32: - return arithmetic_op(otype, input, output, len); - case arrtype::FLOAT64: - return arithmetic_op(otype, input, output, len); - default: - break; - } -} - -template