Skip to content

Commit

Permalink
Add github action script to run semgrep tests against adapter PRs
Browse files Browse the repository at this point in the history
Commit introduces script and helpers to run semgrep test against PRs and upload semgrep results as PR comment
  • Loading branch information
onkarvhanumante committed Jul 6, 2023
1 parent f1787d4 commit d78c3d2
Show file tree
Hide file tree
Showing 2 changed files with 405 additions and 0 deletions.
331 changes: 331 additions & 0 deletions .github/workflows/helpers/pull-request-utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,331 @@
const synchronizeEvent = "synchronize",
openedEvent = "opened",
completedStatus = "completed",
resultSize = 100

class diffHelper {
constructor(input) {
this.owner = input.context.repo.owner
this.repo = input.context.repo.repo
this.github = input.github
this.pullRequestNumber = input.context.payload.pull_request.number
this.pullRequestEvent = input.event
this.testName = input.testName
this.fileNameFilter = !input.fileNameFilter ? () => true : input.fileNameFilter
this.fileLineFilter = !input.fileLineFilter ? () => true : input.fileLineFilter
}

/*
Checks whether the test defined by this.testName has been executed on the given commit
@param {string} commit - commit SHA to check for test execution
@returns {boolean} - returns true if the test has been executed on the commit, otherwise false
*/
async #isTestExecutedOnCommit(commit) {
const response = await this.github.rest.checks.listForRef({
owner: this.owner,
repo: this.repo,
ref: commit,
})

return response.data.check_runs.some(
({ status, name }) => status === completedStatus && name === this.testName
)
}

/*
Retrieves the line numbers of added or updated lines in the provided files
@param {Array} files - array of files containing their filename and patch
@returns {Object} - object mapping filenames to arrays of line numbers indicating the added or updated lines
*/
async #getDiffForFiles(files = []) {
let diff = {}
for (const { filename, patch } of files) {
if (this.fileNameFilter(filename)) {
const lines = patch.split("\n")
if (lines.length === 1) {
continue
}

let lineNumber
for (const line of lines) {
// Check if line is diff header
// example:
// @@ -1,3 +1,3 @@
// 1 var a
// 2
// 3 - //test
// 3 +var b
// Here @@ -1,3 +1,3 @@ is diff header
if (line.match(/@@\s.*?@@/) != null) {
lineNumber = parseInt(line.match(/\+(\d+)/)[0])
continue
}

// "-" prefix indicates line was deleted. So do not consider deleted line
if (line.startsWith("-")) {
continue
}

// "+"" prefix indicates line was added or updated. Include line number in diff details
if (line.startsWith("+") && this.fileLineFilter(line)) {
diff[filename] = diff[filename] || []
diff[filename].push(lineNumber)
}
lineNumber++
}
}
}
return diff
}

/*
Retrieves a list of commits that have not been checked by the test defined by this.testName
@returns {Array} - array of commit SHAs that have not been checked by the test
*/
async #getNonScannedCommits() {
const { data } = await this.github.rest.pulls.listCommits({
owner: this.owner,
repo: this.repo,
pull_number: this.pullRequestNumber,
per_page: resultSize,
})
let nonScannedCommits = []

// API returns commits in ascending order. Loop in reverse to quickly retrieve unchecked commits
for (let i = data.length - 1; i >= 0; i--) {
const { sha, parents } = data[i]

// Commit can be merged master commit. Such commit have multiple parents
// Do not consider such commit for building file diff
if (parents.length > 1) {
continue
}

const isTestExecuted = await this.#isTestExecutedOnCommit(sha)
if (isTestExecuted) {
// Remaining commits have been tested in previous scans. Therefore, do not need to be considered again
break
} else {
nonScannedCommits.push(sha)
}
}

// Reverse to return commits in ascending order. This is needed to build diff for commits in chronological order
return nonScannedCommits.reverse()
}

/*
Filters the commit diff to include only the files that are part of the PR diff
@param {Array} commitDiff - array of line numbers representing lines added or updated in the commit
@param {Array} prDiff - array of line numbers representing lines added or updated in the pull request
@returns {Array} - filtered commit diff, including only the files that are part of the PR diff
*/
async #filterCommitDiff(commitDiff = [], prDiff = []) {
return commitDiff.filter((file) => prDiff.includes(file))
}

/*
Builds the diff for the pull request, including both the changes in the pull request and the changes in non-scanned commits
@returns {string} - json string representation of the pull request diff and the diff for non-scanned commits
*/
async buildDiff() {
const { data } = await this.github.rest.pulls.listFiles({
owner: this.owner,
repo: this.repo,
pull_number: this.pullRequestNumber,
per_page: resultSize,
})

const pullRequestDiff = await this.#getDiffForFiles(data)

let nonScannedCommitsDiff = {}

// The "synchronize" event implies that new commit are pushed after the pull request was opened
if (
Object.keys(pullRequestDiff).length != 0 &&
this.pullRequestEvent === synchronizeEvent
) {
// Retrieves list of commits that have not been scanned by the PR check
const nonScannedCommits = await this.#getNonScannedCommits()
for (const commit of nonScannedCommits) {
const { data } = await this.github.rest.repos.getCommit({
owner: this.owner,
repo: this.repo,
ref: commit,
})

const commitDiff = await this.#getDiffForFiles(data.files)
const files = Object.keys(commitDiff)
for (const file of files) {
// Consider scenario where the changes made to a file in the initial commit are completely undone by subsequent commits
// In such cases, the modifications from the initial commit should not be taken into account
// If the changes were entirely removed, there should be no entry for the file in the pullRequestStats
const filePRDiff = pullRequestDiff[file]
if (!filePRDiff) {
continue
}

// Consider scenario where changes made in the commit were partially removed or modified by subsequent commits
// In such cases, include only those commit changes that are part of the pullRequestStats object
// This ensures that only the changes that are reflected in the pull request are considered
const changes = await this.#filterCommitDiff(commitDiff[file], filePRDiff)

if (changes.length !== 0) {
// Check if nonScannedCommitsDiff[file] exists, if not assign an empty array to it
nonScannedCommitsDiff[file] = nonScannedCommitsDiff[file] || []
// Combine the existing nonScannedCommitsDiff[file] array with the commit changes
// Remove any duplicate elements using the Set data structure
nonScannedCommitsDiff[file] = [
...new Set([...nonScannedCommitsDiff[file], ...changes]),
]
}
}
}
}

const prDiffFiles = Object.keys(pullRequestDiff)
const pullRequest = {
hasChanges: prDiffFiles.length > 0,
files: prDiffFiles.join(" "),
diff: pullRequestDiff,
}
const uncheckedCommits = { diff: nonScannedCommitsDiff }
return JSON.stringify({ pullRequest, uncheckedCommits })
}
}

class semgrepHelper {
constructor(input) {
this.owner = input.context.repo.owner
this.repo = input.context.repo.repo
this.github = input.github

this.pullRequestNumber = input.context.payload.pull_request.number
this.pullRequestEvent = input.event

this.pullRequestDiff = input.diff.pullRequest.diff
this.newCommitsDiff = input.diff.uncheckedCommits.diff

this.semgrepErrors = []
this.semgrepWarnings = []
input.semgrepResult.forEach((res) => {
res.severity === "High" ? this.semgrepErrors.push(res) : this.semgrepWarnings.push(res)
})

this.headSha = input.headSha
}

/*
Retrieves the matching line number from the provided diff for a given file and range of lines
@param {Object} range - object containing the file, start line, and end line to find a match
@param {Object} diff - object containing file changes and corresponding line numbers
@returns {number|null} - line number that matches the range within the diff, or null if no match is found
*/
async #getMatchingLineFromDiff({ file, start, end }, diff) {
const fileDiff = diff[file]
if (!fileDiff) {
return null
}
if (fileDiff.includes(start)) {
return start
}
if (fileDiff.includes(end)) {
return end
}
return null
}

/*
Splits the semgrep results into different categories based on the scan
@param {Array} semgrepResults - array of results reported by semgrep
@returns {Object} - object containing the categorized semgrep results i.e results reported in previous scans and new results found in the current scan
*/
async #splitSemgrepResultsByScan(semgrepResults = []) {
const result = {
nonDiff: [], // Errors or warnings found in files updated in pull request, but not part of sections that were modified in the pull request
previous: [], // Errors or warnings found in previous semgrep scans
current: [], // Errors or warnings found in current semgrep scan
}

for (const se of semgrepResults) {
const prDiffLine = await this.#getMatchingLineFromDiff(se, this.pullRequestDiff)
if (!prDiffLine) {
result.nonDiff.push({ ...se })
continue
}

switch (this.pullRequestEvent) {
case openedEvent:
// "Opened" event implies that this is the first check
// Therefore, the error should be appended to the result.current
result.current.push({ ...se, line: prDiffLine })
case synchronizeEvent:
const commitDiffLine = await this.#getMatchingLineFromDiff(se, this.newCommitsDiff)
// Check if error or warning is part of current commit diff
// If not then error or warning was reported in previous scans
commitDiffLine != null
? result.current.push({ ...se, line: commitDiffLine })
: result.previous.push({
...se,
line: prDiffLine,
})
}
}
return result
}

/*
Adds review comments based on the semgrep results to the current pull request
@returns {Object} - object containing the count of unaddressed comments from the previous scan and the count of new comments from the current scan
*/
async addReviewComments() {
let result = {
previousScan: { unAddressedComments: 0 },
currentScan: { newComments: 0 },
}

if (this.semgrepErrors.length == 0 && this.semgrepWarnings.length == 0) {
return result
}

const errors = await this.#splitSemgrepResultsByScan(this.semgrepErrors)
if (errors.previous.length == 0 && errors.current.length == 0) {
console.log("Semgrep did not find any errors in the current pull request changes")
} else {
for (const { message, file, line } of errors.current) {
await this.github.rest.pulls.createReviewComment({
owner: this.owner,
repo: this.repo,
pull_number: this.pullRequestNumber,
commit_id: this.headSha,
body: message,
path: file,
line: line,
})
}
result.currentScan.newComments = errors.current.length
if (this.pullRequestEvent == synchronizeEvent) {
result.previousScan.unAddressedComments = errors.previous.length
}
}

const warnings = await this.#splitSemgrepResultsByScan(this.semgrepWarnings)
for (const { message, file, line } of warnings.current) {
await this.github.rest.pulls.createReviewComment({
owner: this.owner,
repo: this.repo,
pull_number: this.pullRequestNumber,
commit_id: this.headSha,
body: "Consider this as a suggestion. " + message,
path: file,
line: line,
})
}
return result
}
}

module.exports = {
diffHelper: (input) => new diffHelper(input),
semgrepHelper: (input) => new semgrepHelper(input),
}
Loading

0 comments on commit d78c3d2

Please sign in to comment.