Skip to content

Commit

Permalink
script to run semgrep tests against adapter PRs (prebid#2907)
Browse files Browse the repository at this point in the history
authored by @onkarvhanumante
  • Loading branch information
onkarvhanumante authored and Peiling-Ding committed Jul 14, 2023
1 parent 02f40e3 commit 9df856d
Show file tree
Hide file tree
Showing 3 changed files with 412 additions and 1 deletion.
337 changes: 337 additions & 0 deletions .github/workflows/helpers/pull-request-utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,337 @@
const synchronizeEvent = "synchronize",
openedEvent = "opened",
completedStatus = "completed",
resultSize = 100

class diffHelper {
constructor(input) {
this.owner = input.context.repo.owner
this.repo = input.context.repo.repo
this.github = input.github
this.pullRequestNumber = input.context.payload.pull_request.number
this.pullRequestEvent = input.event
this.testName = input.testName
this.fileNameFilter = !input.fileNameFilter ? () => true : input.fileNameFilter
this.fileLineFilter = !input.fileLineFilter ? () => true : input.fileLineFilter
}

/*
Checks whether the test defined by this.testName has been executed on the given commit
@param {string} commit - commit SHA to check for test execution
@returns {boolean} - returns true if the test has been executed on the commit, otherwise false
*/
async #isTestExecutedOnCommit(commit) {
const response = await this.github.rest.checks.listForRef({
owner: this.owner,
repo: this.repo,
ref: commit,
})

return response.data.check_runs.some(
({ status, name }) => status === completedStatus && name === this.testName
)
}

/*
Retrieves the line numbers of added or updated lines in the provided files
@param {Array} files - array of files containing their filename and patch
@returns {Object} - object mapping filenames to arrays of line numbers indicating the added or updated lines
*/
async #getDiffForFiles(files = []) {
let diff = {}
for (const { filename, patch } of files) {
if (this.fileNameFilter(filename)) {
const lines = patch.split("\n")
if (lines.length === 1) {
continue
}

let lineNumber
for (const line of lines) {
// Check if line is diff header
// example:
// @@ -1,3 +1,3 @@
// 1 var a
// 2
// 3 - //test
// 3 +var b
// Here @@ -1,3 +1,3 @@ is diff header
if (line.match(/@@\s.*?@@/) != null) {
lineNumber = parseInt(line.match(/\+(\d+)/)[0])
continue
}

// "-" prefix indicates line was deleted. So do not consider deleted line
if (line.startsWith("-")) {
continue
}

// "+"" prefix indicates line was added or updated. Include line number in diff details
if (line.startsWith("+") && this.fileLineFilter(line)) {
diff[filename] = diff[filename] || []
diff[filename].push(lineNumber)
}
lineNumber++
}
}
}
return diff
}

/*
Retrieves a list of commits that have not been checked by the test defined by this.testName
@returns {Array} - array of commit SHAs that have not been checked by the test
*/
async #getNonScannedCommits() {
const { data } = await this.github.rest.pulls.listCommits({
owner: this.owner,
repo: this.repo,
pull_number: this.pullRequestNumber,
per_page: resultSize,
})
let nonScannedCommits = []

// API returns commits in ascending order. Loop in reverse to quickly retrieve unchecked commits
for (let i = data.length - 1; i >= 0; i--) {
const { sha, parents } = data[i]

// Commit can be merged master commit. Such commit have multiple parents
// Do not consider such commit for building file diff
if (parents.length > 1) {
continue
}

const isTestExecuted = await this.#isTestExecutedOnCommit(sha)
if (isTestExecuted) {
// Remaining commits have been tested in previous scans. Therefore, do not need to be considered again
break
} else {
nonScannedCommits.push(sha)
}
}

// Reverse to return commits in ascending order. This is needed to build diff for commits in chronological order
return nonScannedCommits.reverse()
}

/*
Filters the commit diff to include only the files that are part of the PR diff
@param {Array} commitDiff - array of line numbers representing lines added or updated in the commit
@param {Array} prDiff - array of line numbers representing lines added or updated in the pull request
@returns {Array} - filtered commit diff, including only the files that are part of the PR diff
*/
async #filterCommitDiff(commitDiff = [], prDiff = []) {
return commitDiff.filter((file) => prDiff.includes(file))
}

/*
Builds the diff for the pull request, including both the changes in the pull request and the changes in non-scanned commits
@returns {string} - json string representation of the pull request diff and the diff for non-scanned commits
*/
async buildDiff() {
const { data } = await this.github.rest.pulls.listFiles({
owner: this.owner,
repo: this.repo,
pull_number: this.pullRequestNumber,
per_page: resultSize,
})

const pullRequestDiff = await this.#getDiffForFiles(data)

const nonScannedCommitsDiff =
Object.keys(pullRequestDiff).length != 0 && this.pullRequestEvent === synchronizeEvent // The "synchronize" event implies that new commit are pushed after the pull request was opened
? await this.getNonScannedCommitDiff(pullRequestDiff)
: {}

const prDiffFiles = Object.keys(pullRequestDiff)
const pullRequest = {
hasChanges: prDiffFiles.length > 0,
files: prDiffFiles.join(" "),
diff: pullRequestDiff,
}
const uncheckedCommits = { diff: nonScannedCommitsDiff }
return JSON.stringify({ pullRequest, uncheckedCommits })
}

/*
Retrieves the diff for non-scanned commits by comparing their changes with the pull request diff
@param {Object} pullRequestDiff - The diff of files in the pull request
@returns {Object} - The diff of files in the non-scanned commits that are part of the pull request diff
*/
async getNonScannedCommitDiff(pullRequestDiff) {
let nonScannedCommitsDiff = {}
// Retrieves list of commits that have not been scanned by the PR check
const nonScannedCommits = await this.#getNonScannedCommits()
for (const commit of nonScannedCommits) {
const { data } = await this.github.rest.repos.getCommit({
owner: this.owner,
repo: this.repo,
ref: commit,
})

const commitDiff = await this.#getDiffForFiles(data.files)
const files = Object.keys(commitDiff)
for (const file of files) {
// Consider scenario where the changes made to a file in the initial commit are completely undone by subsequent commits
// In such cases, the modifications from the initial commit should not be taken into account
// If the changes were entirely removed, there should be no entry for the file in the pullRequestStats
const filePRDiff = pullRequestDiff[file]
if (!filePRDiff) {
continue
}

// Consider scenario where changes made in the commit were partially removed or modified by subsequent commits
// In such cases, include only those commit changes that are part of the pullRequestStats object
// This ensures that only the changes that are reflected in the pull request are considered
const changes = await this.#filterCommitDiff(commitDiff[file], filePRDiff)

if (changes.length !== 0) {
// Check if nonScannedCommitsDiff[file] exists, if not assign an empty array to it
nonScannedCommitsDiff[file] = nonScannedCommitsDiff[file] || []
// Combine the existing nonScannedCommitsDiff[file] array with the commit changes
// Remove any duplicate elements using the Set data structure
nonScannedCommitsDiff[file] = [
...new Set([...nonScannedCommitsDiff[file], ...changes]),
]
}
}
}
return nonScannedCommitsDiff
}
}

class semgrepHelper {
constructor(input) {
this.owner = input.context.repo.owner
this.repo = input.context.repo.repo
this.github = input.github

this.pullRequestNumber = input.context.payload.pull_request.number
this.pullRequestEvent = input.event

this.pullRequestDiff = input.diff.pullRequest.diff
this.newCommitsDiff = input.diff.uncheckedCommits.diff

this.semgrepErrors = []
this.semgrepWarnings = []
input.semgrepResult.forEach((res) => {
res.severity === "High" ? this.semgrepErrors.push(res) : this.semgrepWarnings.push(res)
})

this.headSha = input.headSha
}

/*
Retrieves the matching line number from the provided diff for a given file and range of lines
@param {Object} range - object containing the file, start line, and end line to find a match
@param {Object} diff - object containing file changes and corresponding line numbers
@returns {number|null} - line number that matches the range within the diff, or null if no match is found
*/
async #getMatchingLineFromDiff({ file, start, end }, diff) {
const fileDiff = diff[file]
if (!fileDiff) {
return null
}
if (fileDiff.includes(start)) {
return start
}
if (fileDiff.includes(end)) {
return end
}
return null
}

/*
Splits the semgrep results into different categories based on the scan
@param {Array} semgrepResults - array of results reported by semgrep
@returns {Object} - object containing the categorized semgrep results i.e results reported in previous scans and new results found in the current scan
*/
async #splitSemgrepResultsByScan(semgrepResults = []) {
const result = {
nonDiff: [], // Errors or warnings found in files updated in pull request, but not part of sections that were modified in the pull request
previous: [], // Errors or warnings found in previous semgrep scans
current: [], // Errors or warnings found in current semgrep scan
}

for (const se of semgrepResults) {
const prDiffLine = await this.#getMatchingLineFromDiff(se, this.pullRequestDiff)
if (!prDiffLine) {
result.nonDiff.push({ ...se })
continue
}

switch (this.pullRequestEvent) {
case openedEvent:
// "Opened" event implies that this is the first check
// Therefore, the error should be appended to the result.current
result.current.push({ ...se, line: prDiffLine })
case synchronizeEvent:
const commitDiffLine = await this.#getMatchingLineFromDiff(se, this.newCommitsDiff)
// Check if error or warning is part of current commit diff
// If not then error or warning was reported in previous scans
commitDiffLine != null
? result.current.push({ ...se, line: commitDiffLine })
: result.previous.push({
...se,
line: prDiffLine,
})
}
}
return result
}

/*
Adds review comments based on the semgrep results to the current pull request
@returns {Object} - object containing the count of unaddressed comments from the previous scan and the count of new comments from the current scan
*/
async addReviewComments() {
let result = {
previousScan: { unAddressedComments: 0 },
currentScan: { newComments: 0 },
}

if (this.semgrepErrors.length == 0 && this.semgrepWarnings.length == 0) {
return result
}

const errors = await this.#splitSemgrepResultsByScan(this.semgrepErrors)
if (errors.previous.length == 0 && errors.current.length == 0) {
console.log("Semgrep did not find any errors in the current pull request changes")
} else {
for (const { message, file, line } of errors.current) {
await this.github.rest.pulls.createReviewComment({
owner: this.owner,
repo: this.repo,
pull_number: this.pullRequestNumber,
commit_id: this.headSha,
body: message,
path: file,
line: line,
})
}
result.currentScan.newComments = errors.current.length
if (this.pullRequestEvent == synchronizeEvent) {
result.previousScan.unAddressedComments = errors.previous.length
}
}

const warnings = await this.#splitSemgrepResultsByScan(this.semgrepWarnings)
for (const { message, file, line } of warnings.current) {
await this.github.rest.pulls.createReviewComment({
owner: this.owner,
repo: this.repo,
pull_number: this.pullRequestNumber,
commit_id: this.headSha,
body: "Consider this as a suggestion. " + message,
path: file,
line: line,
})
}
return result
}
}

module.exports = {
diffHelper: (input) => new diffHelper(input),
semgrepHelper: (input) => new semgrepHelper(input),
}
Loading

0 comments on commit 9df856d

Please sign in to comment.