Skip to content

[QST] How to omit the Dataset.shuffle_by_keys step when exporting data from BigQuery to parquet #4409

[QST] How to omit the Dataset.shuffle_by_keys step when exporting data from BigQuery to parquet

[QST] How to omit the Dataset.shuffle_by_keys step when exporting data from BigQuery to parquet #4409

Workflow file for this run

# Copyright (c) 2020-2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# A workflow to trigger ci on hybrid infra (github + self hosted runner)
name: Blossom-CI
on:
issue_comment:
types: [created]
workflow_dispatch:
inputs:
platform:
description: "runs-on argument"
required: false
args:
description: "argument"
required: false
jobs:
Authorization:
name: Authorization
runs-on: blossom
outputs:
args: ${{ env.args }}
# This job only runs for pull request comments
if: contains( '\
albert17,\
benfred,\
bschifferer,\
EvenOldridge,\
gabrielspmoreira,\
jperez999,\
karlhighley,\
marcromeyn,\
benfred,\
rjzamora,\
rnyak,\
sararb,\
', format('{0},', github.actor)) && github.event.comment.body == '/blossom-ci'
steps:
- name: Check if comment is issued by authorized person
run: blossom-ci
env:
OPERATION: "AUTH"
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO_KEY_DATA: ${{ secrets.BLOSSOM_KEY }}
Vulnerability-scan:
name: Vulnerability scan
needs: [Authorization]
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
with:
repository: ${{ fromJson(needs.Authorization.outputs.args).repo }}
ref: ${{ fromJson(needs.Authorization.outputs.args).ref }}
lfs: "true"
# repo specific steps
#- name: Setup java
# uses: actions/setup-java@v1
# with:
# java-version: 1.8
# add blackduck properties https://synopsys.atlassian.net/wiki/spaces/INTDOCS/pages/631308372/Methods+for+Configuring+Analysis#Using-a-configuration-file
#- name: Setup blackduck properties
# run: |
# PROJECTS=$(mvn -am dependency:tree | grep maven-dependency-plugin | awk '{ out="com.nvidia:"$(NF-1);print out }' | grep rapids | xargs | sed -e 's/ /,/g')
# echo detect.maven.build.command="-pl=$PROJECTS -am" >> application.properties
# echo detect.maven.included.scopes=compile >> application.properties
- name: Run blossom action
uses: NVIDIA/blossom-action@main
env:
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO_KEY_DATA: ${{ secrets.BLOSSOM_KEY }}
with:
args1: ${{ fromJson(needs.Authorization.outputs.args).args1 }}
args2: ${{ fromJson(needs.Authorization.outputs.args).args2 }}
args3: ${{ fromJson(needs.Authorization.outputs.args).args3 }}
Job-trigger:
name: Start ci job
needs: [Vulnerability-scan]
runs-on: blossom
steps:
- name: Start ci job
run: blossom-ci
env:
OPERATION: "START-CI-JOB"
CI_SERVER: ${{ secrets.CI_SERVER }}
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
Post-processing:
name: Post processing
runs-on: blossom
if: github.event_name == 'workflow_dispatch'
steps:
- name: Start post processing
run: blossom-ci
env:
OPERATION: "POST-PROCESSING"
CI_SERVER: ${{ secrets.CI_SERVER }}
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}