This repository has been archived by the owner on May 31, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 53
/
justfile
130 lines (109 loc) · 6.43 KB
/
justfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
set dotenv-load := true
collector_url := 'https://api.delight.datamechanics.co/collector/'
api_key := ''
spark_distributions_folder := 'spark_distributions'
build:
sbt +package
clean:
sbt clean
publish:
sbt +publishSigned
download_spark_distribution url:
#!/usr/bin/env sh
set -e
mkdir -p {{spark_distributions_folder}} && cd {{spark_distributions_folder}}
URL={{url}}
ARCHIVE="${URL##*/}"
FOLDER="${ARCHIVE%.*}"
if [ ! -d "$FOLDER" ] ; then
if [ ! -f "$ARCHIVE" ] ; then
curl -O -L {{url}}
else
echo "Did not download $URL because archive $ARCHIVE exists"
fi
tar -zxf $ARCHIVE
else
echo "Did not download $URL because folder $FOLDER exists"
fi
download_2_3_2: (download_spark_distribution 'https://archive.apache.org/dist/spark/spark-2.3.2/spark-2.3.2-bin-hadoop2.7.tgz')
download_2_4_0: (download_spark_distribution 'https://archive.apache.org/dist/spark/spark-2.4.0/spark-2.4.0-bin-hadoop2.7.tgz')
download_2_4_7: (download_spark_distribution 'https://archive.apache.org/dist/spark/spark-2.4.7/spark-2.4.7-bin-hadoop2.7.tgz')
download_2_4_7_2_12: (download_spark_distribution 'https://archive.apache.org/dist/spark/spark-2.4.7/spark-2.4.7-bin-without-hadoop-scala-2.12.tgz')
download_3_0_1: (download_spark_distribution 'https://archive.apache.org/dist/spark/spark-3.0.1/spark-3.0.1-bin-hadoop3.2.tgz')
download_3_1_1: (download_spark_distribution 'https://archive.apache.org/dist/spark/spark-3.1.1/spark-3.1.1-bin-hadoop3.2.tgz')
download_3_2_0: (download_spark_distribution 'https://archive.apache.org/dist/spark/spark-3.2.0/spark-3.2.0-bin-hadoop3.2.tgz')
download_3_3_0: (download_spark_distribution 'https://archive.apache.org/dist/spark/spark-3.3.0/spark-3.3.0-bin-hadoop3.tgz')
download_all_spark_distributions: download_2_3_2 download_2_4_0 download_2_4_7 download_2_4_7_2_12 download_3_0_1 download_3_1_1 download_3_2_0 download_3_3_0
run_test_app spark_distribution_folder spark_version scala_version:
#!/bin/bash
if [ "$(git branch --show-current)" = "main" ]; then
VERSION="latest"
else
VERSION=$(git branch --show-current)
fi
{{spark_distribution_folder}}/bin/spark-submit \
--class org.apache.spark.examples.SparkPi \
--master 'local[*]' \
--packages co.datamechanics:delight_{{scala_version}}:${VERSION}-SNAPSHOT \
--repositories https://oss.sonatype.org/content/repositories/snapshots \
--conf spark.delight.accessToken.secret={{api_key}} \
--conf spark.delight.collector.url={{collector_url}} \
--conf spark.extraListeners=co.datamechanics.delight.DelightListener \
{{spark_distribution_folder}}/examples/jars/spark-examples_{{scala_version}}-{{spark_version}}.jar \
100
run_test_app_local_jar spark_distribution_folder spark_version scala_version:
#!/bin/bash
if [ "$(git branch --show-current)" = "main" ]; then
VERSION="latest"
else
VERSION=$(git branch --show-current)
fi
{{spark_distribution_folder}}/bin/spark-submit \
--class org.apache.spark.examples.SparkPi \
--master 'local[*]' \
--jars agent/target/scala-{{scala_version}}/delight_{{scala_version}}-${VERSION}-SNAPSHOT.jar \
--conf spark.delight.accessToken.secret={{api_key}} \
--conf spark.delight.collector.url={{collector_url}} \
--conf spark.delight.logDuration=true \
--conf spark.extraListeners=co.datamechanics.delight.DelightListener \
{{spark_distribution_folder}}/examples/jars/spark-examples_{{scala_version}}-{{spark_version}}.jar \
100
run_test_app_docker image spark_version scala_version:
#!/bin/bash
if [ "$(git branch --show-current)" = "main" ]; then
VERSION="latest"
else
VERSION=$(git branch --show-current)
fi
docker run --rm \
-v $(pwd)/agent/target/scala-{{scala_version}}/delight_{{scala_version}}-${VERSION}-SNAPSHOT.jar:/opt/spark/delight.jar \
{{image}} \
/opt/spark/bin/spark-submit --class org.apache.spark.examples.SparkPi --master 'local[*]' \
--jars /opt/spark/delight.jar \
--conf spark.delight.accessToken.secret={{api_key}} \
--conf spark.delight.collector.url={{collector_url}} \
--conf spark.delight.logDuration=true \
--conf spark.extraListeners=co.datamechanics.delight.DelightListener \
/opt/spark/examples/jars/spark-examples_{{scala_version}}-{{spark_version}}.jar \
100
run_2_3_2: (run_test_app 'spark_distributions/spark-2.3.2-bin-hadoop2.7' '2.3.2' '2.11')
run_2_4_0: (run_test_app 'spark_distributions/spark-2.4.0-bin-hadoop2.7' '2.4.0' '2.11')
run_2_4_7: (run_test_app 'spark_distributions/spark-2.4.7-bin-hadoop2.7' '2.4.7' '2.11')
run_2_4_7_2_12: (run_test_app 'spark_distributions/spark-2.4.7-bin-without-hadoop-scala-2.12' '2.4.7' '2.12')
run_3_0_1: (run_test_app 'spark_distributions/spark-3.0.1-bin-hadoop3.2' '3.0.1' '2.12')
run_3_1_1: (run_test_app 'spark_distributions/spark-3.1.1-bin-hadoop3.2' '3.1.1' '2.12')
run_3_2_0: (run_test_app 'spark_distributions/spark-3.2.0-bin-hadoop3.2' '3.2.0' '2.12')
run_3_3_0: (run_test_app 'spark_distributions/spark-3.3.0-bin-hadoop3' '3.3.0' '2.12')
run_local_jar_2_3_2: (run_test_app_local_jar 'spark_distributions/spark-2.3.2-bin-hadoop2.7' '2.3.2' '2.11')
run_local_jar_2_4_0: (run_test_app_local_jar 'spark_distributions/spark-2.4.0-bin-hadoop2.7' '2.4.0' '2.11')
run_local_jar_2_4_7: (run_test_app_local_jar 'spark_distributions/spark-2.4.7-bin-hadoop2.7' '2.4.7' '2.11')
run_local_jar_2_4_7_2_12: (run_test_app_local_jar 'spark_distributions/spark-2.4.7-bin-without-hadoop-scala-2.12' '2.4.7' '2.12')
run_local_jar_3_0_1: (run_test_app_local_jar 'spark_distributions/spark-3.0.1-bin-hadoop3.2' '3.0.1' '2.12')
run_local_jar_3_1_1: (run_test_app_local_jar 'spark_distributions/spark-3.1.1-bin-hadoop3.2' '3.1.1' '2.12')
run_local_jar_3_2_0: (run_test_app_local_jar 'spark_distributions/spark-3.2.0-bin-hadoop3.2' '3.2.0' '2.12')
run_local_jar_3_3_0: (run_test_app_local_jar 'spark_distributions/spark-3.3.0-bin-hadoop3' '3.3.0' '2.12')
run_docker_3_3_0: (run_test_app_docker 'datamechanics/spark:jvm-only-3.3.0-dm18' '3.3.0' '2.12')
run_docker_3_2_0: (run_test_app_docker 'datamechanics/spark:jvm-only-3.2.0-dm15' '3.2.0' '2.12')
run_docker_3_1_1: (run_test_app_docker 'datamechanics/spark:jvm-only-3.1.1-dm12' '3.1.1' '2.12')
run_docker_3_0_1: (run_test_app_docker 'datamechanics/spark:jvm-only-3.0.1-dm12' '3.0.1' '2.12')
run_docker_2_4_7: (run_test_app_docker 'datamechanics/spark:jvm-only-2.4.7-hadoop-3.1.0-java-8-scala-2.11-dm12' '2.4.7' '2.11')