From 4fa2c17b44076c2861a4da065da5b4c7e8beefa9 Mon Sep 17 00:00:00 2001 From: Cristian Petrescu-Prahova Date: Wed, 30 Mar 2022 17:44:52 -0700 Subject: [PATCH 1/8] ok --- parquet/src/file/serialized_reader.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs index 688272c0b3de..0c826d677203 100644 --- a/parquet/src/file/serialized_reader.rs +++ b/parquet/src/file/serialized_reader.rs @@ -138,6 +138,13 @@ impl SerializedFileReader { }) } + pub fn new_with_metadata(chunk_reader: R, metadata: ParquetMetaData) -> Self { + Self { + chunk_reader: Arc::new(chunk_reader), + metadata, + } + } + /// Filters row group metadata to only those row groups, /// for which the predicate function returns true pub fn filter_row_groups( From 05010e9d754818d9da342d269e0061aae78f26e5 Mon Sep 17 00:00:00 2001 From: Cristian Petrescu-Prahova Date: Wed, 30 Mar 2022 17:49:25 -0700 Subject: [PATCH 2/8] ok --- .github/workflows/integration.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index a713d05e04bf..d9f675b344d0 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -18,8 +18,8 @@ name: Integration on: - push: - pull_request: + # push: + # pull_request: jobs: @@ -87,7 +87,6 @@ jobs: # the hardcoded version is wrong and should be removed either # after https://issues.apache.org/jira/browse/ARROW-13083 # gets fixes or pyarrow 5.0 gets released - hardcoded version is wrong, bot contains run: pip install --index-url https://pypi.fury.io/arrow-nightlies/ pyarrow==3.1.0.dev1030 - name: Run tests env: From d75ea46775abece1ab76cb9a3758ceb722d48aa8 Mon Sep 17 00:00:00 2001 From: Cristian Petrescu-Prahova Date: Wed, 30 Mar 2022 17:51:28 -0700 Subject: [PATCH 3/8] ok --- arrow/src/compute/kernels/comparison.rs | 60 ++++++++++++++++--------- 1 file changed, 40 insertions(+), 20 deletions(-) diff --git a/arrow/src/compute/kernels/comparison.rs b/arrow/src/compute/kernels/comparison.rs index 9d11e3032db6..fc8ad991d89c 100644 --- a/arrow/src/compute/kernels/comparison.rs +++ b/arrow/src/compute/kernels/comparison.rs @@ -262,18 +262,28 @@ fn like_utf8_impl( regex } else { let mut prev_char = None; - let mut re_pattern = pat.replace(|c| { - let res = c == '%' && prev_char != Some('\\'); - prev_char = Some(c); - res - }, ".*").replace("\\%", "%"); + let mut re_pattern = pat + .replace( + |c| { + let res = c == '%' && prev_char != Some('\\'); + prev_char = Some(c); + res + }, + ".*", + ) + .replace("\\%", "%"); let mut prev_char = None; - re_pattern = re_pattern.replace(|c| { - let res = c == '_' && prev_char != Some('\\'); - prev_char = Some(c); - res - }, ".").replace("\\_", "_"); + re_pattern = re_pattern + .replace( + |c| { + let res = c == '_' && prev_char != Some('\\'); + prev_char = Some(c); + res + }, + ".", + ) + .replace("\\_", "_"); let re = RegexBuilder::new(&format!("^{}$", re_pattern)) .case_insensitive(!case_sensitive) .build() @@ -383,18 +393,28 @@ fn like_utf8_scalar_impl( } } else { let mut prev_char = None; - let mut re_pattern = right.replace(|c| { - let res = c == '%' && prev_char != Some('\\'); - prev_char = Some(c); - res - }, ".*").replace("\\%", "%"); + let mut re_pattern = right + .replace( + |c| { + let res = c == '%' && prev_char != Some('\\'); + prev_char = Some(c); + res + }, + ".*", + ) + .replace("\\%", "%"); let mut prev_char = None; - re_pattern = re_pattern.replace(|c| { - let res = c == '_' && prev_char != Some('\\'); - prev_char = Some(c); - res - }, ".").replace("\\_", "_"); + re_pattern = re_pattern + .replace( + |c| { + let res = c == '_' && prev_char != Some('\\'); + prev_char = Some(c); + res + }, + ".", + ) + .replace("\\_", "_"); let re = RegexBuilder::new(&format!("^{}$", re_pattern)) .case_insensitive(!case_sensitive) .build() From 5dbc9fd93bdf3bf8e48c75dee3cfa859b3a9d63a Mon Sep 17 00:00:00 2001 From: Cristian Petrescu-Prahova Date: Wed, 30 Mar 2022 17:54:42 -0700 Subject: [PATCH 4/8] ok --- .github/workflows/rust.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 5579072effb4..c31fbb93cb1d 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -31,7 +31,7 @@ jobs: strategy: matrix: arch: [amd64] - rust: [stable] + rust: [nightly-2021-07-04] container: image: ${{ matrix.arch }}/rust env: @@ -73,7 +73,7 @@ jobs: strategy: matrix: arch: [amd64] - rust: [stable] + rust: [nightly-2021-07-04] container: image: ${{ matrix.arch }}/rust env: @@ -174,7 +174,7 @@ jobs: strategy: matrix: os: [windows-latest, macos-latest] - rust: [stable] + rust: [nightly-2021-07-04] steps: - uses: actions/checkout@v2 with: @@ -202,7 +202,7 @@ jobs: strategy: matrix: arch: [amd64] - rust: [stable] + rust: [nightly-2021-07-04] container: image: ${{ matrix.arch }}/rust env: @@ -257,7 +257,7 @@ jobs: strategy: matrix: arch: [amd64] - rust: [stable] + rust: [nightly-2021-07-04] steps: - uses: actions/checkout@v2 with: @@ -341,7 +341,7 @@ jobs: strategy: matrix: arch: [amd64] - rust: [stable] + rust: [nightly-2021-07-04] container: image: ${{ matrix.arch }}/rust env: From 9c870d458e2bb604b87f2415c3f134d31b8cdc86 Mon Sep 17 00:00:00 2001 From: Cristian Petrescu-Prahova Date: Wed, 30 Mar 2022 17:55:07 -0700 Subject: [PATCH 5/8] ok --- .github/workflows/integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index d9f675b344d0..6314a3db4f07 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -19,7 +19,7 @@ name: Integration on: # push: - # pull_request: + pull_request: jobs: From b259d1e81caf4d052026f218325fcf3a42a908a5 Mon Sep 17 00:00:00 2001 From: Cristian Petrescu-Prahova Date: Wed, 30 Mar 2022 17:57:55 -0700 Subject: [PATCH 6/8] ok --- .github/workflows/rust.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index c31fbb93cb1d..a04ccb3d44f2 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -126,7 +126,7 @@ jobs: strategy: matrix: arch: [amd64] - rust: [nightly-2021-07-04] + rust: [] container: image: ${{ matrix.arch }}/rust env: From 933188dc2af05869b03daecbc912568fec139a30 Mon Sep 17 00:00:00 2001 From: Cristian Petrescu-Prahova Date: Wed, 30 Mar 2022 20:13:58 -0700 Subject: [PATCH 7/8] ok --- .github/workflows/integration.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 6314a3db4f07..a713d05e04bf 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -18,7 +18,7 @@ name: Integration on: - # push: + push: pull_request: jobs: @@ -87,6 +87,7 @@ jobs: # the hardcoded version is wrong and should be removed either # after https://issues.apache.org/jira/browse/ARROW-13083 # gets fixes or pyarrow 5.0 gets released + hardcoded version is wrong, bot contains run: pip install --index-url https://pypi.fury.io/arrow-nightlies/ pyarrow==3.1.0.dev1030 - name: Run tests env: From f485d75e6a1c084873ab0bbf8275dc7dc977d7b0 Mon Sep 17 00:00:00 2001 From: Cristian Petrescu-Prahova Date: Tue, 26 Apr 2022 11:58:55 -0700 Subject: [PATCH 8/8] ok --- parquet/src/file/serialized_reader.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs index 0c826d677203..f325a86fc9c2 100644 --- a/parquet/src/file/serialized_reader.rs +++ b/parquet/src/file/serialized_reader.rs @@ -138,6 +138,7 @@ impl SerializedFileReader { }) } + /// Creates file reader from a Parquet file, using pre-read metadata. pub fn new_with_metadata(chunk_reader: R, metadata: ParquetMetaData) -> Self { Self { chunk_reader: Arc::new(chunk_reader),