Skip to content

Commit

Permalink
build(tools): software bill of materials generation for npm packages
Browse files Browse the repository at this point in the history
Added a script to generate a .csv SBoM for npm package dependencies.
The short hand to call the script is by running
$ yarn tools:generate-sbom
and then it saves all a .csv file with the combined output under
`./dist/sbom/`

Fixes #2081

Signed-off-by: Peter Somogyvari <peter.somogyvari@accenture.com>
  • Loading branch information
petermetz committed Jul 22, 2023
1 parent 4bb0493 commit 8f1d1f3
Show file tree
Hide file tree
Showing 5 changed files with 558 additions and 2 deletions.
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
"tools:validate-bundle-names": "TS_NODE_PROJECT=./tools/tsconfig.json node --trace-deprecation --experimental-modules --abort-on-uncaught-exception --loader ts-node/esm --experimental-specifier-resolution=node ./tools/validate-bundle-names.js",
"tools:bump-openapi-spec-dep-versions": "TS_NODE_PROJECT=./tools/tsconfig.json node --trace-deprecation --experimental-modules --abort-on-uncaught-exception --loader ts-node/esm --experimental-specifier-resolution=node ./tools/bump-openapi-spec-dep-versions.ts",
"tools:get-latest-sem-ver-git-tag": "TS_NODE_PROJECT=./tools/tsconfig.json node --abort-on-uncaught-exception --loader ts-node/esm --experimental-specifier-resolution=node --no-warnings ./tools/get-latest-sem-ver-git-tag.ts",
"tools:generate-sbom": "TS_NODE_PROJECT=tools/tsconfig.json node --experimental-json-modules --trace-deprecation --experimental-modules --abort-on-uncaught-exception --loader ts-node/esm --experimental-specifier-resolution=node ./tools/generate-sbom.ts",
"generate-api-server-config": "node ./tools/generate-api-server-config.js",
"sync-ts-config": "TS_NODE_PROJECT=tools/tsconfig.json node --experimental-json-modules --loader ts-node/esm ./tools/sync-npm-deps-to-tsc-projects.ts",
"start:api-server": "node ./packages/cactus-cmd-api-server/dist/lib/main/typescript/cmd/cactus-api.js --config-file=.config.json",
Expand Down Expand Up @@ -142,6 +143,7 @@
"karma-electron": "7.0.0",
"karma-tap": "4.2.0",
"karma-webpack": "5.0.0",
"license-report": "6.4.0",
"lint-staged": "11.1.2",
"make-dir-cli": "3.0.0",
"node-polyfill-webpack-plugin": "1.1.4",
Expand Down
264 changes: 264 additions & 0 deletions tools/generate-sbom.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,264 @@
import { fileURLToPath } from "url";
import { dirname } from "path";
import path from "path";
import { promisify } from "util";
import { exec, ExecOptions } from "child_process";
import fs from "fs-extra";
import { globby, Options as GlobbyOptions } from "globby";
import { RuntimeError } from "run-time-error";
import fastSafeStringify from "fast-safe-stringify";
import { INpmListDependencyV1, npmList } from "./npm-list";

const execAsync = promisify(exec);

async function getManifestFiles(req: {
PROJECT_DIR: string;
}): Promise<{ readonly manifestFilePaths: string[] }> {
const { PROJECT_DIR } = req;

const MANIFEST_INCLUDE_GLOBS = [
// FIXME make this compatible with the other (currently commented out)
// manifest files for a complete picture of the dependencies involved.
//
"**/go.mod",
"**/Cargo.toml",
"**/build.gradle*",
"yarn.lock",
"**/package.json",
];

const MANIFEST_EXCLUDE_GLOBS = ["**/node_modules/**"];

const globbyOptions: GlobbyOptions = {
cwd: PROJECT_DIR,
absolute: true,
ignore: MANIFEST_EXCLUDE_GLOBS,
};
const manifestFilePaths = await globby(MANIFEST_INCLUDE_GLOBS, globbyOptions);
return { manifestFilePaths };
}

/**
* # Software Bill of Materials Generator Script
*
* How does it work:
* 1. It uses a list of glob patterns to find manifest files defining dependencies.
* For example build.gradle, yarn.lock, etc. (For now only npm package.json files
* are supported unfortunately)
* 2. Once a complete list of these files have been gathered, it iterates through
* their respective directories and runs the SBoM generator tool.
* 3. The results of each execution are appended to a .csv file where there is
* a field called "related to" which will contain the manifest file's relative
* path within the project directory.
*/
const main = async (argv: string[], env: NodeJS.ProcessEnv) => {
if (!argv) {
throw new RuntimeError(`Process argv cannot be falsy.`);
}
if (!env) {
throw new RuntimeError(`Process env cannot be falsy.`);
}

const TAG = "[tools/generate-sbom.ts] ";
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const SCRIPT_DIR = __dirname;
const PROJECT_DIR = path.join(SCRIPT_DIR, "../");
console.log(`SCRIPT_DIR=${SCRIPT_DIR}`);
console.log(`PROJECT_DIR=${PROJECT_DIR}`);

const getManifestFilesRes = await getManifestFiles({ PROJECT_DIR });
const globbedManifestFileCount = getManifestFilesRes.manifestFilePaths.length;
console.log(`Found ${globbedManifestFileCount} package.json files via glob.`);

const { dependencies } = await npmList({ PROJECT_DIR });
const manifestFilePaths: Set<string> = new Set();
Object.entries(dependencies).forEach(([, v]) => {
traverseDeps(v, manifestFilePaths);
});

const sbomCacheDir = path.join(PROJECT_DIR, ".cacti-tools", "cache", "sbom");
await fs.mkdirp(sbomCacheDir);
console.log("Created SBoM cache dir at: ", sbomCacheDir);

const sbomDir = path.join(PROJECT_DIR, "dist", "sbom");
await fs.mkdirp(sbomDir);
console.log("Created SBoM dir at: ", sbomDir);

const dateAndTime = new Date().toJSON().slice(0, 24).replaceAll(":", "-");
const filename = `cacti_sbom_nodejs_${dateAndTime}.csv`;
const specFileReportPathAbs = path.join(sbomDir, filename);
console.log("Streaming data to SBoM csv file at: ", specFileReportPathAbs);

const manifestCount = manifestFilePaths.size;
let runtimeMsSum = 0;
let idx = 0;
let csvHeadersEnabled = true;
for (const manifestFilePath of manifestFilePaths) {
idx++;
const start = new Date();
const pkgDirPath = path.dirname(manifestFilePath);
const dirPath = path.relative(PROJECT_DIR, pkgDirPath);
const manifestRelPath = path.relative(PROJECT_DIR, manifestFilePath);
const req = {
dirPath,
TAG,
csvHeadersEnabled,
manifestFilePath,
PROJECT_DIR,
};
try {
const res = await generateSBoM(req);
csvHeadersEnabled = false;
const csvContent = res.stdout; // avoid empty lines in .csv file
await fs.appendFile(specFileReportPathAbs, csvContent);
const end = new Date();
const runtimeMs = end.getTime() - start.getTime();
runtimeMsSum += runtimeMs;

const { logMessage } = createDiagnosticsMessage({
idx,
manifestCount,
manifestRelPath,
runtimeMsSum,
runtimeMs,
});
console.log(logMessage);
} catch (ex: unknown) {
// If it was a syntax error in the package.json file
// then we just log it as a warning and move on.
if (ex instanceof ManifestParseError) {
console.warn(ex);
} else {
const msg = `Failed to generate SBoM for ${req.manifestFilePath}`;
const throwable = ex instanceof Error ? ex : fastSafeStringify(ex);
throw new RuntimeError(msg, throwable);
}
}
}
};

function traverseDeps(root: INpmListDependencyV1, paths: Set<string>): void {
if (root.path) {
paths.add(root.path.concat("/package.json"));
} else {
console.warn(`MISSING PATH => ${JSON.stringify(root).substring(0, 4000)}`);
}
if (!root.dependencies) {
return;
}
Object.entries(root.dependencies).forEach(([, v]) => {
traverseDeps(v, paths);
});
}

function createDiagnosticsMessage(req: {
readonly idx: number;
readonly runtimeMs: number;
readonly runtimeMsSum: number;
readonly manifestCount: number;
readonly manifestRelPath: string;
}): { logMessage: string } {
const { idx, manifestCount, runtimeMsSum, runtimeMs, manifestRelPath } = req;
const percentage = ((idx / manifestCount) * 100).toFixed(2);
const progressInfo = `${percentage}%\t${idx}/\t\t${manifestCount}`;
const avgRuntimeMs = runtimeMsSum / idx;
const estRuntimeMin = Math.ceil((avgRuntimeMs * manifestCount) / 60000);
const logMessage = `${progressInfo}\t\testRuntimeMin=${estRuntimeMin}\t${manifestRelPath}\t\t\t\t\truntimeMs=${runtimeMs}ms`;
return { logMessage };
}

export async function lernaPkgList(req: {
readonly PROJECT_DIR: string;
}): Promise<{ readonly pkgNames: string[] }> {
const TAG = "[tools/generate-sbom.ts/lernaPkgList()]";
const shellCmd = `./node_modules/.bin/lerna ls --json --all --no-progress --loglevel=silent`;
const execOpts: ExecOptions = {
cwd: req.PROJECT_DIR,
maxBuffer: 32 * 1024 * 1024, // 32 MB of stdout will be allowed
};

try {
const { stderr, stdout } = await execAsync(shellCmd, execOpts);
if (stderr) {
console.error(`${TAG} shell CMD: ${shellCmd}`);
console.error(`${TAG} stderr of the above command: ${stderr}`);
}
const pkgs = JSON.parse(stdout);
const pkgNames = pkgs.map((x: { name: string }) => x.name);
return { pkgNames };
} catch (ex: unknown) {
const msg = `${TAG} Failed to execute shell CMD: ${shellCmd}`;
const throwable = ex instanceof Error ? ex : fastSafeStringify(ex);
throw new RuntimeError(msg, throwable);
}
}

export async function generateSBoM(req: {
readonly TAG: string;
readonly csvHeadersEnabled: boolean;
readonly manifestFilePath: string;
readonly dirPath: string;
readonly PROJECT_DIR: string;
}): Promise<{
readonly manifestFilePath: string;
readonly stderr: string;
readonly stdout: string;
}> {
const { csvHeadersEnabled, TAG, PROJECT_DIR, manifestFilePath } = req;

const manifestRelPath = path.relative(PROJECT_DIR, manifestFilePath);
const executable = `./node_modules/.bin/license-report`;

const { pkgNames } = await lernaPkgList({ PROJECT_DIR });

const csvFields = [
"department",
"relatedTo",
"name",
"licenseType",
"link",
"remoteVersion",
"installedVersion",
"definedVersion",
"author",
];

const cmdArgs = [
"--output=csv",
`--fields=${csvFields.join(" --fields=")} `,
"--exclude=" + pkgNames.join(" --exclude="),
csvHeadersEnabled ? " --csvHeaders " : "",
`--relatedTo.value=${manifestRelPath}`,
`--department.value='Hyperledger Cacti'`,
`--package=${manifestFilePath}`,
].join(" ");

const shellCmd = `${executable} ${cmdArgs}`;

const execOpts: ExecOptions = {
cwd: req.PROJECT_DIR,
maxBuffer: 2 * 1024 * 1024, // 2 MB of stdout will be allowed
};

try {
const { stderr, stdout } = await execAsync(shellCmd, execOpts);
if (stderr) {
console.error(`${TAG} shell CMD: ${shellCmd}`);
console.error(`${TAG} stderr of the above command: ${stderr}`);
}
return { manifestFilePath, stderr, stdout };
} catch (ex: unknown) {
const msg = `${TAG} Failed to execute shell CMD: ${shellCmd}`;
if (ex instanceof Error && ex.message.includes("SyntaxError: ")) {
throw new ManifestParseError(msg, ex);
} else {
const throwable = ex instanceof Error ? ex : fastSafeStringify(ex);
throw new RuntimeError(msg, throwable);
}
}
}

export class ManifestParseError extends RuntimeError {}

main(process.argv, process.env);
87 changes: 87 additions & 0 deletions tools/npm-list.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import { ExecOptions, exec } from "child_process";
import { promisify } from "util";

import fastSafeStringify from "fast-safe-stringify";
import { RuntimeError } from "run-time-error";
import { hasKey } from "./has-key";

const execAsync = promisify(exec);

export interface INpmListRequestV1 {
readonly PROJECT_DIR: string;
}

export interface INpmListResponseV1 {
readonly dependencies: Record<string, INpmListDependencyV1>;
}

export interface INpmListRepositoryV1 {
readonly type: string;
readonly url: string;
readonly directory?: string;
}

export interface INpmListDependencyV1 {
readonly version: string;
readonly resolved: string;
readonly overridden: boolean;
readonly name: string;
readonly description: string;
readonly author: string | { readonly name: string; readonly email: string };
readonly homepage: string;
readonly license: string;
readonly repository: INpmListRepositoryV1 | string;

readonly _id: string;
readonly extraneous: boolean;
readonly path: string;

readonly dependencies?: Record<string, INpmListDependencyV1>;
}

export async function npmList(
req: INpmListRequestV1,
): Promise<INpmListResponseV1> {
const TAG = "[tools/generate-sbom.ts#npmList()]";
const shellCmd = `npm ls --all --json --long --include-workspace-root --loglevel=silent`;

const { PROJECT_DIR } = req;

const execOpts: ExecOptions = {
cwd: PROJECT_DIR,
maxBuffer: 256 * 1024 * 1024,
};

try {
const { stderr, stdout } = await execAsync(shellCmd, execOpts);
if (stderr) {
console.error(`${TAG} shell CMD: ${shellCmd}`);
console.error(`${TAG} stderr of the above command: ${stderr}`);
}
return JSON.parse(stdout);
} catch (ex: unknown) {
// We have to detect if npm is giving a non-zero exit code only because
// it found some extraneous dependencies (in which case it's output of
// the list of dependencies is still a valid JSON document that is still
// 100% valid for our intents and purposes)
const canHandle =
ex instanceof Error &&
hasKey(ex, "code") &&
hasKey(ex, "signal") &&
hasKey(ex, "stderr") &&
hasKey(ex, "stdout") &&
ex.code === 1 &&
ex.signal === null &&
ex.stderr === "" &&
typeof ex.stdout === "string" &&
ex.stdout.length > 0;

if (canHandle) {
return JSON.parse(ex.stdout as string);
} else {
const msg = `${TAG} Failed to execute shell CMD: ${shellCmd}`;
const throwable = ex instanceof Error ? ex : fastSafeStringify(ex);
throw new RuntimeError(msg, throwable);
}
}
}
2 changes: 1 addition & 1 deletion tools/tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,6 @@

/* Advanced Options */
"skipLibCheck": true, /* Skip type checking of declaration files. */
"forceConsistentCasingInFileNames": true /* Disallow inconsistently-cased references to the same file. */
"forceConsistentCasingInFileNames": true
}
}
Loading

0 comments on commit 8f1d1f3

Please sign in to comment.