Skip to content

Commit

Permalink
supports parquet parallel reading.
Browse files Browse the repository at this point in the history
use bufreader to read page header, avoid reading in fragments.

add hadoop shim.
  • Loading branch information
zhangli20 committed Nov 19, 2024
1 parent 15751e9 commit 5f8000c
Show file tree
Hide file tree
Showing 14 changed files with 388 additions and 230 deletions.
68 changes: 34 additions & 34 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 19 additions & 19 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,27 +66,27 @@ serde_json = { version = "1.0.96" }

[patch.crates-io]
# datafusion: branch=v42-blaze
datafusion = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "9a09e14"}
datafusion-common = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "9a09e14"}
datafusion-expr = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "9a09e14"}
datafusion-execution = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "9a09e14"}
datafusion-optimizer = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "9a09e14"}
datafusion-physical-expr = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "9a09e14"}
orc-rust = { git = "https://github.com/blaze-init/datafusion-orc.git", rev = "9c74ac3"}
datafusion = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "2bc42ea73"}
datafusion-common = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "2bc42ea73"}
datafusion-expr = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "2bc42ea73"}
datafusion-execution = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "2bc42ea73"}
datafusion-optimizer = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "2bc42ea73"}
datafusion-physical-expr = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "2bc42ea73"}
orc-rust = { git = "https://github.com/blaze-init/datafusion-orc.git", rev = "7833d7d"}

# arrow: branch=v53-blaze
arrow = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "9dbfd9018e"}
arrow-arith = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "9dbfd9018e"}
arrow-array = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "9dbfd9018e"}
arrow-buffer = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "9dbfd9018e"}
arrow-cast = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "9dbfd9018e"}
arrow-data = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "9dbfd9018e"}
arrow-ord = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "9dbfd9018e"}
arrow-row = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "9dbfd9018e"}
arrow-schema = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "9dbfd9018e"}
arrow-select = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "9dbfd9018e"}
arrow-string = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "9dbfd9018e"}
parquet = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "9dbfd9018e"}
arrow = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "91dc27dedf"}
arrow-arith = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "91dc27dedf"}
arrow-array = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "91dc27dedf"}
arrow-buffer = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "91dc27dedf"}
arrow-cast = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "91dc27dedf"}
arrow-data = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "91dc27dedf"}
arrow-ord = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "91dc27dedf"}
arrow-row = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "91dc27dedf"}
arrow-schema = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "91dc27dedf"}
arrow-select = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "91dc27dedf"}
arrow-string = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "91dc27dedf"}
parquet = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "91dc27dedf"}

# serde_json: branch=v1.0.96-blaze
serde_json = { git = "https://github.com/blaze-init/json", branch = "v1.0.96-blaze" }
38 changes: 38 additions & 0 deletions hadoop-shim/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<parent>
<groupId>org.blaze</groupId>
<artifactId>blaze-engine</artifactId>
<version>${revision}</version>
<relativePath>../</relativePath>
</parent>
<groupId>org.blaze</groupId>
<artifactId>hadoop-shim</artifactId>
<packaging>jar</packaging>

<dependencies>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.scala-lang.modules</groupId>
<artifactId>scala-java8-compat_2.12</artifactId>
<version>0.9.1</version>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scalaVersion}</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client-api</artifactId>
<version>3.4.0</version>
<scope>provided</scope>
</dependency>
</dependencies>
</project>
Loading

0 comments on commit 5f8000c

Please sign in to comment.