Skip to content

Commit d022492

Browse files
committed
Merge branch 'master' into SPARK-34716
2 parents 01effb8 + d04b467 commit d022492

File tree

535 files changed

+42580
-8131
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

535 files changed

+42580
-8131
lines changed

.github/workflows/benchmark.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ jobs:
4747
SPARK_BENCHMARK_NUM_SPLITS: ${{ github.event.inputs.num-splits }}
4848
SPARK_BENCHMARK_CUR_SPLIT: ${{ matrix.split }}
4949
SPARK_GENERATE_BENCHMARK_FILES: 1
50+
SPARK_LOCAL_IP: localhost
5051
steps:
5152
- name: Checkout Spark repository
5253
uses: actions/checkout@v2

.github/workflows/build_and_test.yml

Lines changed: 45 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,8 @@ name: Build and test
33
on:
44
push:
55
branches:
6-
- master
7-
pull_request:
8-
branches:
9-
- master
10-
workflow_dispatch:
11-
inputs:
12-
target:
13-
description: 'Target branch to run'
14-
required: true
6+
- '**'
7+
- '!branch-*.*'
158

169
jobs:
1710
# Build: build Spark and run the tests for specified modules.
@@ -82,16 +75,24 @@ jobs:
8275
# GitHub Actions' default miniconda to use in pip packaging test.
8376
CONDA_PREFIX: /usr/share/miniconda
8477
GITHUB_PREV_SHA: ${{ github.event.before }}
85-
GITHUB_INPUT_BRANCH: ${{ github.event.inputs.target }}
78+
SPARK_LOCAL_IP: localhost
8679
steps:
8780
- name: Checkout Spark repository
8881
uses: actions/checkout@v2
8982
# In order to fetch changed files
9083
with:
9184
fetch-depth: 0
92-
- name: Merge dispatched input branch
93-
if: ${{ github.event.inputs.target != '' }}
94-
run: git merge --progress --ff-only origin/${{ github.event.inputs.target }}
85+
repository: apache/spark
86+
ref: master
87+
- name: Sync the current branch with the latest in Apache Spark
88+
if: github.repository != 'apache/spark'
89+
id: sync-branch
90+
run: |
91+
apache_spark_ref=`git rev-parse HEAD`
92+
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF##*/}
93+
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
94+
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
95+
echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref"
9596
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
9697
- name: Cache Scala, SBT and Maven
9798
uses: actions/cache@v2
@@ -132,6 +133,7 @@ jobs:
132133
# Run the tests.
133134
- name: Run tests
134135
run: |
136+
export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }}
135137
# Hive and SQL tests become flaky when running in parallel as it's too intensive.
136138
if [[ "$MODULES_TO_TEST" == "hive" ]] || [[ "$MODULES_TO_TEST" == "sql" ]]; then export SERIAL_SBT_TESTS=1; fi
137139
./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
@@ -170,16 +172,24 @@ jobs:
170172
# GitHub Actions' default miniconda to use in pip packaging test.
171173
CONDA_PREFIX: /usr/share/miniconda
172174
GITHUB_PREV_SHA: ${{ github.event.before }}
173-
GITHUB_INPUT_BRANCH: ${{ github.event.inputs.target }}
175+
SPARK_LOCAL_IP: localhost
174176
steps:
175177
- name: Checkout Spark repository
176178
uses: actions/checkout@v2
177179
# In order to fetch changed files
178180
with:
179181
fetch-depth: 0
180-
- name: Merge dispatched input branch
181-
if: ${{ github.event.inputs.target != '' }}
182-
run: git merge --progress --ff-only origin/${{ github.event.inputs.target }}
182+
repository: apache/spark
183+
ref: master
184+
- name: Sync the current branch with the latest in Apache Spark
185+
if: github.repository != 'apache/spark'
186+
id: sync-branch
187+
run: |
188+
apache_spark_ref=`git rev-parse HEAD`
189+
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF##*/}
190+
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
191+
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
192+
echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref"
183193
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
184194
- name: Cache Scala, SBT and Maven
185195
uses: actions/cache@v2
@@ -214,6 +224,7 @@ jobs:
214224
# Run the tests.
215225
- name: Run tests
216226
run: |
227+
export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }}
217228
./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST"
218229
- name: Upload test results to report
219230
if: always()
@@ -237,16 +248,24 @@ jobs:
237248
HADOOP_PROFILE: hadoop3.2
238249
HIVE_PROFILE: hive2.3
239250
GITHUB_PREV_SHA: ${{ github.event.before }}
240-
GITHUB_INPUT_BRANCH: ${{ github.event.inputs.target }}
251+
SPARK_LOCAL_IP: localhost
241252
steps:
242253
- name: Checkout Spark repository
243254
uses: actions/checkout@v2
244255
# In order to fetch changed files
245256
with:
246257
fetch-depth: 0
247-
- name: Merge dispatched input branch
248-
if: ${{ github.event.inputs.target != '' }}
249-
run: git merge --progress --ff-only origin/${{ github.event.inputs.target }}
258+
repository: apache/spark
259+
ref: master
260+
- name: Sync the current branch with the latest in Apache Spark
261+
if: github.repository != 'apache/spark'
262+
id: sync-branch
263+
run: |
264+
apache_spark_ref=`git rev-parse HEAD`
265+
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF##*/}
266+
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
267+
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
268+
echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref"
250269
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
251270
- name: Cache Scala, SBT and Maven
252271
uses: actions/cache@v2
@@ -272,6 +291,7 @@ jobs:
272291
# R issues at docker environment
273292
export TZ=UTC
274293
export _R_CHECK_SYSTEM_CLOCK_=FALSE
294+
export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }}
275295
./dev/run-tests --parallelism 2 --modules sparkr
276296
- name: Upload test results to report
277297
if: always()
@@ -468,6 +488,8 @@ jobs:
468488
tpcds-1g:
469489
name: Run TPC-DS queries with SF=1
470490
runs-on: ubuntu-20.04
491+
env:
492+
SPARK_LOCAL_IP: localhost
471493
steps:
472494
- name: Checkout Spark repository
473495
uses: actions/checkout@v2
@@ -476,15 +498,13 @@ jobs:
476498
uses: actions/cache@v2
477499
with:
478500
path: ./tpcds-sf-1
479-
key: tpcds-${{ hashFiles('tpcds-sf-1/.spark-tpcds-sf-1.md5') }}
480-
restore-keys: |
481-
tpcds-
501+
key: tpcds-556111e35d400f56cb0625dc16e9063d54628320
482502
- name: Checkout TPC-DS (SF=1) generated data repository
483503
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
484504
uses: actions/checkout@v2
485505
with:
486506
repository: maropu/spark-tpcds-sf-1
487-
ref: 6b660a53091bd6d23cbe58b0f09aae08e71cc667
507+
ref: 556111e35d400f56cb0625dc16e9063d54628320
488508
path: ./tpcds-sf-1
489509
- name: Cache Scala, SBT and Maven
490510
uses: actions/cache@v2

.github/workflows/labeler.yml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,18 @@
1717
# under the License.
1818
#
1919

20-
name: "Pull Request Labeler"
20+
# Intentionally has a general name.
21+
# because the test status check created in GitHub Actions
22+
# currently randomly picks any associated workflow.
23+
# So, the name was changed to make sense in that context too.
24+
# See also https://github.saobby.my.eu.orgmunity/t/specify-check-suite-when-creating-a-checkrun/118380/10
25+
26+
name: "On pull requests"
2127
on: pull_request_target
2228

2329
jobs:
2430
label:
31+
name: Label pull requests
2532
runs-on: ubuntu-latest
2633
steps:
2734
# In order to get back the negated matches like in the old config,
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
#
19+
20+
# Intentionally has a general name.
21+
# because the test status check created in GitHub Actions
22+
# currently randomly picks any associated workflow.
23+
# So, the name was changed to make sense in that context too.
24+
# See also https://github.saobby.my.eu.orgmunity/t/specify-check-suite-when-creating-a-checkrun/118380/10
25+
name: On pull request update
26+
on:
27+
pull_request_target:
28+
types: [opened, reopened, synchronize]
29+
30+
jobs:
31+
notify:
32+
name: Notify test workflow
33+
runs-on: ubuntu-20.04
34+
steps:
35+
- name: "Notify test workflow"
36+
uses: actions/github-script@v3
37+
if: ${{ github.base_ref == 'master' }}
38+
with:
39+
github-token: ${{ secrets.GITHUB_TOKEN }}
40+
script: |
41+
const endpoint = 'GET /repos/:owner/:repo/actions/workflows/:id/runs?&branch=:branch'
42+
43+
// TODO: Should use pull_request.user and pull_request.user.repos_url?
44+
// If a different person creates a commit to another forked repo,
45+
// it wouldn't be able to detect.
46+
const params = {
47+
owner: context.payload.pull_request.head.repo.owner.login,
48+
repo: context.payload.pull_request.head.repo.name,
49+
id: 'build_and_test.yml',
50+
branch: context.payload.pull_request.head.ref,
51+
}
52+
53+
console.log('Ref: ' + context.payload.pull_request.head.ref)
54+
console.log('SHA: ' + context.payload.pull_request.head.sha)
55+
56+
// Wait 3 seconds to make sure the fork repository triggered a workflow.
57+
await new Promise(r => setTimeout(r, 3000))
58+
59+
const runs = await github.request(endpoint, params)
60+
const runID = runs.data.workflow_runs[0].id
61+
// TODO: If no workflows were found, it's likely GitHub Actions was not enabled
62+
63+
if (runs.data.workflow_runs[0].head_sha != context.payload.pull_request.head.sha) {
64+
throw new Error('There was a new unsynced commit pushed. Please retrigger the workflow.');
65+
}
66+
67+
const runUrl = 'https://github.com/'
68+
+ context.payload.pull_request.head.repo.full_name
69+
+ '/actions/runs/'
70+
+ runID
71+
72+
const name = 'Build and test'
73+
const head_sha = context.payload.pull_request.head.sha
74+
const status = 'queued'
75+
76+
github.checks.create({
77+
...context.repo,
78+
name,
79+
head_sha,
80+
status,
81+
output: {
82+
title: 'Test results',
83+
summary: runUrl,
84+
text: JSON.stringify({
85+
owner: context.payload.pull_request.head.repo.owner.login,
86+
repo: context.payload.pull_request.head.repo.name,
87+
run_id: runID
88+
})
89+
}
90+
})
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
#
19+
20+
name: Update build status workflow
21+
22+
on:
23+
schedule:
24+
- cron: "*/15 * * * *"
25+
26+
jobs:
27+
update:
28+
name: Update build status
29+
runs-on: ubuntu-20.04
30+
steps:
31+
- name: "Update build status"
32+
uses: actions/github-script@v3
33+
with:
34+
github-token: ${{ secrets.GITHUB_TOKEN }}
35+
script: |
36+
const endpoint = 'GET /repos/:owner/:repo/pulls?state=:state'
37+
const params = {
38+
owner: context.repo.owner,
39+
repo: context.repo.repo,
40+
state: 'open'
41+
}
42+
43+
// See https://docs.github.com/en/graphql/reference/enums#mergestatestatus
44+
const maybeReady = ['behind', 'clean', 'draft', 'has_hooks', 'unknown', 'unstable'];
45+
46+
// Iterate open PRs
47+
for await (const prs of github.paginate.iterator(endpoint,params)) {
48+
// Each page
49+
for await (const pr of prs.data) {
50+
console.log('SHA: ' + pr.head.sha)
51+
console.log(' Mergeable status: ' + pr.mergeable_state)
52+
if (pr.mergeable_state == null || maybeReady.includes(pr.mergeable_state)) {
53+
const checkRuns = await github.request('GET /repos/{owner}/{repo}/commits/{ref}/check-runs', {
54+
owner: context.repo.owner,
55+
repo: context.repo.repo,
56+
ref: pr.head.sha
57+
})
58+
59+
// Iterator GitHub Checks in the PR
60+
for await (const cr of checkRuns.data.check_runs) {
61+
if (cr.name == 'Build and test') {
62+
// text contains parameters to make request in JSON.
63+
const params = JSON.parse(cr.output.text)
64+
65+
// Get the workflow run in the forked repository
66+
const run = await github.request('GET /repos/{owner}/{repo}/actions/runs/{run_id}', params)
67+
68+
// Keep syncing the status of the checks
69+
if (run.data.status == 'completed') {
70+
console.log(' Run ' + cr.id + ': set status (' + run.data.status + ') and conclusion (' + run.data.conclusion + ')')
71+
const response = await github.request('PATCH /repos/{owner}/{repo}/check-runs/{check_run_id}', {
72+
owner: context.repo.owner,
73+
repo: context.repo.repo,
74+
check_run_id: cr.id,
75+
output: cr.output,
76+
status: run.data.status,
77+
conclusion: run.data.conclusion
78+
})
79+
} else {
80+
console.log(' Run ' + cr.id + ': set status (' + run.data.status + ')')
81+
const response = await github.request('PATCH /repos/{owner}/{repo}/check-runs/{check_run_id}', {
82+
owner: context.repo.owner,
83+
repo: context.repo.repo,
84+
check_run_id: cr.id,
85+
output: cr.output,
86+
status: run.data.status,
87+
})
88+
}
89+
90+
break
91+
}
92+
}
93+
}
94+
}
95+
}

common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ public void handle(ResponseMessage message) throws Exception {
188188
if (listener == null) {
189189
logger.warn("Ignoring response for RPC {} from {} ({} bytes) since it is not outstanding",
190190
resp.requestId, getRemoteAddress(channel), resp.body().size());
191+
resp.body().release();
191192
} else {
192193
outstandingRpcs.remove(resp.requestId);
193194
try {

common/network-common/src/test/java/org/apache/spark/network/TestUtils.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@
2222
public class TestUtils {
2323
public static String getLocalHost() {
2424
try {
25-
return InetAddress.getLocalHost().getHostAddress();
25+
return (System.getenv().containsKey("SPARK_LOCAL_IP"))?
26+
System.getenv("SPARK_LOCAL_IP"):
27+
InetAddress.getLocalHost().getHostAddress();
2628
} catch (Exception e) {
2729
throw new RuntimeException(e);
2830
}

0 commit comments

Comments
 (0)