Skip to content

Commit

Permalink
Merge pull request #7 from feathr-ai/main
Browse files Browse the repository at this point in the history
merge main to feature branch
  • Loading branch information
aabbasi-hbo committed Nov 10, 2022
2 parents a1da659 + b19480d commit 9390f9b
Show file tree
Hide file tree
Showing 234 changed files with 46,190 additions and 3,888 deletions.
2 changes: 1 addition & 1 deletion .github/pull_request_template.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
## Description
<!--
Hey! Thank you for the contribution! Please go through https://github.com/linkedin/feathr/blob/main/docs/dev_guide/pull_request_guideline.md for more information.
Hey! Thank you for the contribution! Please go through https://github.com/feathr-ai/feathr/blob/main/docs/dev_guide/pull_request_guideline.md for more information.
Describe what changes to make and why you are making these changes.
Expand Down
37 changes: 37 additions & 0 deletions .github/workflows/devskim-security-linter.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party (Microsoft) and are governed by
# separate terms of service, privacy policy, and support
# documentation.
# For more details about Devskim, visit https://github.com/marketplace/actions/devskim

name: DevSkim

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]
schedule:
- cron: '00 4 * * *'

jobs:
lint:
name: DevSkim
runs-on: ubuntu-20.04
permissions:
actions: read
contents: read
security-events: write
steps:
- name: Checkout code
uses: actions/checkout@v3

- name: Run DevSkim scanner
uses: microsoft/DevSkim-Action@v1
with:
ignore-globs: "**/.git/**,**/test/**"

- name: Upload DevSkim scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@v2
with:
sarif_file: devskim-results.sarif
31 changes: 12 additions & 19 deletions .github/workflows/docker-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,27 +52,20 @@ jobs:


steps:
- name: Deploy to Feathr SQL Registry Azure Web App
id: deploy-to-sql-webapp
uses: azure/webapps-deploy@v2
with:
app-name: 'feathr-sql-registry'
publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_SQL_REGISTRY }}
images: 'index.docker.io/feathrfeaturestore/feathr-registry:nightly'

- name: Deploy to Feathr Purview Registry Azure Web App
id: deploy-to-purview-webapp
uses: azure/webapps-deploy@v2
with:
app-name: 'feathr-purview-registry'
publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_PURVIEW_REGISTRY }}
images: 'index.docker.io/feathrfeaturestore/feathr-registry:nightly'
uses: distributhor/workflow-webhook@v3.0.1
env:
webhook_url: ${{ secrets.AZURE_WEBAPP_FEATHR_PURVIEW_REGISTRY_WEBHOOK }}

- name: Deploy to Feathr RBAC Registry Azure Web App
id: deploy-to-rbac-webapp
uses: azure/webapps-deploy@v2
with:
app-name: 'feathr-rbac-registry'
publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_FEATHR_RBAC_REGISTRY }}
images: 'index.docker.io/feathrfeaturestore/feathr-registry:nightly'

uses: distributhor/workflow-webhook@v3.0.1
env:
webhook_url: ${{ secrets.AZURE_WEBAPP_FEATHR_RBAC_REGISTRY_WEBHOOK }}

- name: Deploy to Feathr SQL Registry Azure Web App
id: deploy-to-sql-webapp
uses: distributhor/workflow-webhook@v3.0.1
env:
webhook_url: ${{ secrets.AZURE_WEBAPP_FEATHR_SQL_REGISTRY_WEBHOOK }}
5 changes: 4 additions & 1 deletion .github/workflows/document-scan.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
name: Feathr Documents' Broken Link Check

on: [push]
on:
push:
branches: [main]

jobs:
check-links:
runs-on: ubuntu-latest
Expand Down
99 changes: 90 additions & 9 deletions .github/workflows/pull_request_push_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,15 @@ on:
- "docs/**"
- "ui/**"
- "**/README.md"

schedule:
# Runs daily at 1 PM UTC (9 PM CST), will send notification to TEAMS_WEBHOOK
- cron: '00 13 * * *'

jobs:
sbt_test:
runs-on: ubuntu-latest
if: github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'safe to test'))
if: github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'safe to test'))
steps:
- uses: actions/checkout@v2
with:
Expand All @@ -41,7 +45,7 @@ jobs:

python_lint:
runs-on: ubuntu-latest
if: github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'safe to test'))
if: github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'safe to test'))
steps:
- name: Set up Python 3.8
uses: actions/setup-python@v2
Expand All @@ -61,7 +65,7 @@ jobs:
databricks_test:
runs-on: ubuntu-latest
if: github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'safe to test'))
if: github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'safe to test'))
steps:
- uses: actions/checkout@v2
with:
Expand All @@ -87,8 +91,7 @@ jobs:
- name: Install Feathr Package
run: |
python -m pip install --upgrade pip
python -m pip install pytest pytest-xdist databricks-cli
python -m pip install -e ./feathr_project/
python -m pip install -e ./feathr_project/[all]
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Set env variable and upload jars
env:
Expand Down Expand Up @@ -123,15 +126,14 @@ jobs:
COSMOS1_KEY: ${{secrets.COSMOS1_KEY}}
SQL1_USER: ${{secrets.SQL1_USER}}
SQL1_PASSWORD: ${{secrets.SQL1_PASSWORD}}

run: |
# run only test with databricks. run in 4 parallel jobs
pytest -n 6 feathr_project/test/
azure_synapse_test:
# might be a bit duplication to setup both the azure_synapse test and databricks test, but for now we will keep those to accelerate the test speed
runs-on: ubuntu-latest
if: github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'safe to test'))
if: github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'safe to test'))
steps:
- uses: actions/checkout@v2
with:
Expand Down Expand Up @@ -165,8 +167,7 @@ jobs:
- name: Install Feathr Package
run: |
python -m pip install --upgrade pip
python -m pip install pytest pytest-xdist
python -m pip install -e ./feathr_project/
python -m pip install -e ./feathr_project/[all]
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Run Feathr with Azure Synapse
env:
Expand Down Expand Up @@ -197,3 +198,83 @@ jobs:
# skip databricks related test as we just ran the test; also seperate databricks and synapse test to make sure there's no write conflict
# run in 4 parallel jobs to make the time shorter
pytest -n 6 feathr_project/test/
local_spark_test:
runs-on: ubuntu-latest
if: github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'safe to test'))
steps:
- uses: actions/checkout@v2
with:
ref: ${{ github.event.pull_request.head.sha }}
- name: Set up JDK 8
uses: actions/setup-java@v2
with:
java-version: "8"
distribution: "temurin"
- name: Build JAR
run: |
sbt assembly
# remote folder for CI upload
echo "CI_SPARK_REMOTE_JAR_FOLDER=feathr_jar_github_action_$(date +"%H_%M_%S")" >> $GITHUB_ENV
# get local jar name without paths so version change won't affect it
echo "FEATHR_LOCAL_JAR_NAME=$(ls target/scala-2.12/*.jar| xargs -n 1 basename)" >> $GITHUB_ENV
# get local jar name without path
echo "FEATHR_LOCAL_JAR_FULL_NAME_PATH=$(ls target/scala-2.12/*.jar)" >> $GITHUB_ENV
- name: Set up Python 3.8
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Install Feathr Package
run: |
python -m pip install --upgrade pip
python -m pip install -e ./feathr_project/[all]
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Run Feathr with Local Spark
env:
PROJECT_CONFIG__PROJECT_NAME: "feathr_github_ci_local"
SPARK_CONFIG__SPARK_CLUSTER: local
REDIS_PASSWORD: ${{secrets.REDIS_PASSWORD}}
AZURE_CLIENT_ID: ${{secrets.AZURE_CLIENT_ID}}
AZURE_TENANT_ID: ${{secrets.AZURE_TENANT_ID}}
AZURE_CLIENT_SECRET: ${{secrets.AZURE_CLIENT_SECRET}}
S3_ACCESS_KEY: ${{secrets.S3_ACCESS_KEY}}
S3_SECRET_KEY: ${{secrets.S3_SECRET_KEY}}
ADLS_ACCOUNT: ${{secrets.ADLS_ACCOUNT}}
ADLS_KEY: ${{secrets.ADLS_KEY}}
BLOB_ACCOUNT: ${{secrets.BLOB_ACCOUNT}}
BLOB_KEY: ${{secrets.BLOB_KEY}}
JDBC_TABLE: ${{secrets.JDBC_TABLE}}
JDBC_USER: ${{secrets.JDBC_USER}}
JDBC_PASSWORD: ${{secrets.JDBC_PASSWORD}}
JDBC_DRIVER: ${{secrets.JDBC_DRIVER}}
JDBC_SF_PASSWORD: ${{secrets.JDBC_SF_PASSWORD}}
KAFKA_SASL_JAAS_CONFIG: ${{secrets.KAFKA_SASL_JAAS_CONFIG}}
MONITORING_DATABASE_SQL_PASSWORD: ${{secrets.MONITORING_DATABASE_SQL_PASSWORD}}
COSMOS1_KEY: ${{secrets.COSMOS1_KEY}}
SQL1_USER: ${{secrets.SQL1_USER}}
SQL1_PASSWORD: ${{secrets.SQL1_PASSWORD}}
run: |
# skip cloud related tests
pytest feathr_project/test/test_local_spark_e2e.py
failure_notification:
# If any failure, warning message will be sent
needs: [sbt_test, python_lint, databricks_test, azure_synapse_test, local_spark_test]
runs-on: ubuntu-latest
if: failure() && github.event_name == 'schedule'
steps:
- name: Warning
run: |
curl -H 'Content-Type: application/json' -d '{"text": "[WARNING] Daily CI has failure, please check: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' ${{ secrets.TEAMS_WEBHOOK }}
notification:
# Final Daily Report with all job status
needs: [sbt_test, python_lint, databricks_test, azure_synapse_test, local_spark_test]
runs-on: ubuntu-latest
if: always() && github.event_name == 'schedule'
steps:
- name: Get Date
run: echo "NOW=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
- name: Notification
run: |
curl -H 'Content-Type: application/json' -d '{"text": "${{env.NOW}} Daily Report: 1. SBT Test ${{needs.sbt_test.result}}, 2. Python Lint Test ${{needs.python_lint.result}}, 3. Databricks Test ${{needs.databricks_test.result}}, 4. Synapse Test ${{needs.azure_synapse_test.result}} , 5. LOCAL SPARK TEST ${{needs.local_spark_test.result}}. Link: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' ${{ secrets.TEAMS_WEBHOOK }}
8 changes: 6 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ As a contributor, you represent that the code you submit is your original work o

# Responsible Disclosure of Security Vulnerabilities

Please do not file reports on Github for security issues. Please review the guidelines on at (link to more info). Reports should be encrypted using PGP (link to PGP key) and sent to security@linkedin.com preferably with the title "Github linkedin/ - ".
Please do not file reports on Github for security issues. Please review the guidelines on at (link to more info). Reports should be encrypted using PGP (link to PGP key) and sent to feathr-security@lists.lfaidata.foundation.

# Contribution Process

Expand All @@ -14,7 +14,7 @@ The Feathr community welcome everyone, and encourage a friendly and positive env

Please read existing Github issues or development work that is in progress or in the backlog to avoid duplication. If you are interested in those existing ones, you can leave a comment in the Github issues and the community will try to involve you. If you are not sure if it's duplicated, just create a Github issue and ask!

If it's a simple bug fix(less than 20 lines) or documentation change, you can just submit your pull request(PR) without Github issues. For any other PRs, a Github issue is required.
If it's a simple bug fix (less than 20 lines) or documentation change, you can just submit your pull request(PR) without Github issues. For any other PRs, a Github issue is required.

If you want to contribute something new and it's not tracked in existing Github issues, please create a new Github issue and the community will help review the idea. Please state `why` in your Github issue. If you already have a short design in mind, you can provide a one pager in the Github issue. If the idea in general make sense, then we can proceed to the design or development work. If the change is not small, an [RFC](https://en.wikipedia.org/wiki/Request_for_Comments) should be reviewed and approved by the team.

Expand All @@ -40,7 +40,11 @@ Our open source community strives to:
- **Be respectful**: We are a world-wide community of professionals, and we conduct ourselves professionally. Disagreement is no excuse for poor behavior and poor manners.
- **Understand disagreements**: Disagreements, both social and technical, are useful learning opportunities. Seek to understand the other viewpoints and resolve differences constructively.
- **Remember that we’re different**. The strength of our community comes from its diversity, people from a wide range of backgrounds. Different people have different perspectives on issues. Being unable to understand why someone holds a viewpoint doesn’t mean that they’re wrong. Focus on helping to resolve issues and learning from mistakes.
-

## Attribution & Acknowledgements

This code of conduct is based on the Open Code of Conduct from the [TODOGroup](https://todogroup.org/blog/open-code-of-conduct/).

# Committers
Benjamin Le, David Stein, Edwin Cheung, Hangfei Lin, Jimmy Guo, Jinghui Mo, Li Lu, Rama Ramani, Ray Zhang, Xiaoyong Zhu
15 changes: 15 additions & 0 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Component Governance Pipeline
# Runs the Feathr code through Component Governance Detection tool and publishes the result under compliance tab.

trigger:
- main

pool:
vmImage: ubuntu-latest

steps:
- task: ComponentGovernanceComponentDetection@0
inputs:
scanType: 'Register'
verbosity: 'Verbose'
alertWarningLevel: 'High'
9 changes: 6 additions & 3 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import sbt.Keys.publishLocalConfiguration

ThisBuild / resolvers += Resolver.mavenLocal
ThisBuild / scalaVersion := "2.12.15"
ThisBuild / version := "0.7.2"
ThisBuild / version := "0.9.0-rc2"
ThisBuild / organization := "com.linkedin.feathr"
ThisBuild / organizationName := "linkedin"
val sparkVersion = "3.1.3"

publishLocalConfiguration := publishLocalConfiguration.value.withOverwrite(true)

val localAndCloudDiffDependencies = Seq(
"org.apache.spark" %% "spark-avro" % sparkVersion,
"org.apache.spark" %% "spark-sql" % sparkVersion,
Expand Down Expand Up @@ -46,7 +50,6 @@ val localAndCloudCommonDependencies = Seq(
"org.xerial" % "sqlite-jdbc" % "3.36.0.3",
"com.github.changvvb" %% "jackson-module-caseclass" % "1.1.1",
"com.azure.cosmos.spark" % "azure-cosmos-spark_3-1_2-12" % "4.11.1",
"org.elasticsearch" % "elasticsearch-spark-30_2.12" % "7.15.2",
"org.eclipse.jetty" % "jetty-util" % "9.3.24.v20180605"
) // Common deps

Expand Down Expand Up @@ -101,4 +104,4 @@ assembly / assemblyMergeStrategy := {
// Some systems(like Hadoop) use different versinos of protobuf(like v2) so we have to shade it.
assemblyShadeRules in assembly := Seq(
ShadeRule.rename("com.google.protobuf.**" -> "shade.protobuf.@1").inAll,
)
)
Loading

0 comments on commit 9390f9b

Please sign in to comment.