Skip to content

Commit

Permalink
Merge pull request #35 from zilliztech/master
Browse files Browse the repository at this point in the history
merge from zilliztech
  • Loading branch information
xiaocai2333 authored Mar 24, 2020
2 parents c4901ed + dda7d4c commit e92d931
Show file tree
Hide file tree
Showing 51 changed files with 4,398 additions and 2,412 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,5 @@ spark-warehouse/
tests/results
tests/rundocker.sh
tests/runlocal.sh

gui/server/data/0_5M_nyc_taxi_and_building.csv
40 changes: 31 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,20 +31,42 @@ Arctern includes three types of column-based interface: C++ API, Python API, and
# Invoke Arctern API in PySpark

from pyspark.sql import SparkSession
import arctern
from arctern_pyspark import register_funcs, heatmap
from arctern.util import save_png
from arctern.util.vega import vega_heatmap

if __name__ == "__main__":
if __name__== "__main__":
spark = SparkSession \
.builder \
.appName("Arctern-PySpark example") \
.getOrCreate()
.builder \
.appName("Arctern-PySpark example") \
.getOrCreate()

spark.conf.set("spark.sql.execution.arrow.pyspark.enabled", "true")
arctern.pyspark.register(spark)
register_funcs(spark)

df = spark.read.format("csv") \
.option("header", True) \
.option("delimiter", ",") \
.schema("passenger_count long, pickup_longitude double, pickup_latitude double") \
.load("file:///tmp/0_5M_nyc_taxi_and_building.csv") \
.cache()
df.createOrReplaceTempView("nyc_taxi")

res = spark.sql(
"select ST_Point(pickup_longitude, pickup_latitude) as point, passenger_count as w \
from nyc_taxi \
where ST_Within(ST_Point(pickup_longitude, pickup_latitude), 'POLYGON ((-73.998427 40.730309, \
-73.954348 40.730309, \
-73.954348 40.780816, \
-73.998427 40.780816, \
-73.998427 40.730309))')")

vega = vega_heatmap(1024, 896, 10.0, [-73.998427, 40.730309, -73.954348, 40.780816], 'EPSG:4326')
res = heatmap(res, vega)
save_png(res, '/tmp/heatmap.png')

spark.catalog.dropGlobalTempView("nyc_taxi")

within_df = spark.read.json('./example.json').cache()
within_df.createOrReplaceTempView("within")
spark.sql("select ST_Within_UDF(geo0, geo1) from within").show()
spark.stop()
```

Expand Down
42 changes: 32 additions & 10 deletions README_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,23 +36,45 @@ Arctern是一个面向大规模数据的地理信息分析引擎。定位如下
#### 代码示例:

```python
# 在pyspark上调用Arctern API
# Invoke Arctern API in PySpark

from pyspark.sql import SparkSession
import arctern
from arctern_pyspark import register_funcs, heatmap
from arctern.util import save_png
from arctern.util.vega import vega_heatmap

if __name__ == "__main__":
if __name__== "__main__":
spark = SparkSession \
.builder \
.appName("Arctern-PySpark example") \
.getOrCreate()
.builder \
.appName("Arctern-PySpark example") \
.getOrCreate()

spark.conf.set("spark.sql.execution.arrow.pyspark.enabled", "true")
arctern.pyspark.register(spark)
register_funcs(spark)

df = spark.read.format("csv") \
.option("header", True) \
.option("delimiter", ",") \
.schema("passenger_count long, pickup_longitude double, pickup_latitude double") \
.load("file:///tmp/0_5M_nyc_taxi_and_building.csv") \
.cache()
df.createOrReplaceTempView("nyc_taxi")

res = spark.sql(
"select ST_Point(pickup_longitude, pickup_latitude) as point, passenger_count as w \
from nyc_taxi \
where ST_Within(ST_Point(pickup_longitude, pickup_latitude), 'POLYGON ((-73.998427 40.730309, \
-73.954348 40.730309, \
-73.954348 40.780816, \
-73.998427 40.780816, \
-73.998427 40.730309))')")

vega = vega_heatmap(1024, 896, 10.0, [-73.998427, 40.730309, -73.954348, 40.780816], 'EPSG:4326')
res = heatmap(res, vega)
save_png(res, '/tmp/heatmap.png')

spark.catalog.dropGlobalTempView("nyc_taxi")

within_df = spark.read.json('./example.json').cache()
within_df.createOrReplaceTempView("within")
spark.sql("select ST_Within(geo0, geo1) from within").show()
spark.stop()
```

Expand Down
7 changes: 7 additions & 0 deletions ci/jenkins/pod/docker-pod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@ spec:
command:
- cat
tty: true
resources:
limits:
memory: "8Gi"
cpu: "2"
requests:
memory: "2Gi"
cpu: "1"
volumeMounts:
- name: docker-sock
mountPath: /var/run/docker.sock
Expand Down
Loading

0 comments on commit e92d931

Please sign in to comment.