Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: don't hash emails in monitor command #2352

Merged
merged 1 commit into from
Nov 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion src/cli/commands/test/iac-local-execution/file-utils.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
import * as fs from 'fs';
import * as tar from 'tar';
import * as path from 'path';
import * as crypto from 'crypto';
import {
FailedToInitLocalCacheError,
LOCAL_POLICY_ENGINE_DIR,
} from './local-cache';
import { CUSTOM_RULES_TARBALL } from './oci-pull';
import { hashData } from '../../../../lib/monitor/dev-count-analysis';

function hashData(s: string): string {
const hashedData = crypto
.createHash('sha1')
.update(s)
.digest('hex');
return hashedData;
}

export function createIacDir(): void {
// this path will be able to be customised by the user in the future
Expand Down
63 changes: 18 additions & 45 deletions src/lib/monitor/dev-count-analysis.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
* "Contributing" is defined as having contributed a commit in the last 90 days.
* This is use only on the `snyk monitor` command as that is used to monitor a project's dependencies in an
* on-going manner.
* It collects only a hash of the email of a git user and the most recent commit timestamp (both per the `git log`
* It collects the email of a git user and the most recent commit timestamp (both per the `git log`
* output) and can be disabled by config (see https://snyk.io/policies/tracking-and-analytics/).
*/
import * as crypto from 'crypto';
import { exec } from 'child_process';
import { Contributor } from '../types';

Expand Down Expand Up @@ -37,16 +36,12 @@ export async function getContributors(
}

export class GitCommitInfo {
authorHashedEmail: string;
authorEmail: string;
commitTimestamp: string; // use ISO 8601 format

constructor(authorHashedEmail: string, commitTimestamp: string) {
if (isSha1Hash(authorHashedEmail)) {
this.authorHashedEmail = authorHashedEmail;
this.commitTimestamp = commitTimestamp;
} else {
throw new Error('authorHashedEmail must be a sha1 hash');
}
constructor(authorEmail: string, commitTimestamp: string) {
this.authorEmail = authorEmail;
this.commitTimestamp = commitTimestamp;
}
}

Expand All @@ -61,40 +56,38 @@ export class GitRepoCommitStats {
return new GitRepoCommitStats([]);
}

public addCommitInfo(info: GitCommitInfo) {
public addCommitInfo(info: GitCommitInfo): void {
this.commitInfos.push(info);
}

public getUniqueAuthorsCount(): number {
const uniqueAuthorHashedEmails = this.getUniqueAuthorHashedEmails();
return uniqueAuthorHashedEmails.size;
const uniqueAuthorEmails = this.getUniqueAuthorEmails();
return uniqueAuthorEmails.size;
}

public getCommitsCount(): number {
return this.commitInfos.length;
}

public getUniqueAuthorHashedEmails(): Set<string> {
public getUniqueAuthorEmails(): Set<string> {
const allCommitAuthorHashedEmails: string[] = this.commitInfos.map(
(c) => c.authorHashedEmail,
(c) => c.authorEmail,
);
const uniqueAuthorHashedEmails: Set<string> = new Set(
const uniqueAuthorEmails: Set<string> = new Set(
allCommitAuthorHashedEmails,
);
return uniqueAuthorHashedEmails;
return uniqueAuthorEmails;
}

public getRepoContributors(): Contributor[] {
const uniqueAuthorHashedEmails = this.getUniqueAuthorHashedEmails();
const uniqueAuthorEmails = this.getUniqueAuthorEmails();
const contributors: Contributor[] = [];

// for each uniqueAuthorHashedEmails, get the latest commit
for (const nextUniqueAuthorHashedEmail of uniqueAuthorHashedEmails) {
for (const nextUniqueAuthorEmail of uniqueAuthorEmails) {
const latestCommitTimestamp = this.getMostRecentCommitTimestamp(
nextUniqueAuthorHashedEmail,
nextUniqueAuthorEmail,
);
contributors.push({
userId: nextUniqueAuthorHashedEmail,
email: nextUniqueAuthorEmail,
lastCommitDate: latestCommitTimestamp,
});
}
Expand All @@ -103,7 +96,7 @@ export class GitRepoCommitStats {

public getMostRecentCommitTimestamp(authorHashedEmail: string): string {
for (const nextGI of this.commitInfos) {
if (nextGI.authorHashedEmail === authorHashedEmail) {
if (nextGI.authorEmail === authorHashedEmail) {
return nextGI.commitTimestamp;
}
}
Expand All @@ -115,8 +108,7 @@ export function parseGitLogLine(logLine: string): GitCommitInfo {
const lineComponents = logLine.split(SERIOUS_DELIMITER);
const authorEmail = lineComponents[2];
const commitTimestamp = lineComponents[3];
const hashedAuthorEmail = hashData(authorEmail);
const commitInfo = new GitCommitInfo(hashedAuthorEmail, commitTimestamp);
const commitInfo = new GitCommitInfo(authorEmail, commitTimestamp);
return commitInfo;
}

Expand All @@ -130,25 +122,6 @@ export function parseGitLog(gitLog: string): GitRepoCommitStats {
return stats;
}

export function hashData(s: string): string {
const hashedData = crypto
.createHash('sha1')
.update(s)
.digest('hex');
return hashedData;
}

export function isSha1Hash(data: string): boolean {
// sha1 hash must be exactly 40 characters of 0-9 / a-f (i.e. lowercase hex characters)
// ^ == start anchor
// [0-9a-f] == characters 0,1,2,3,4,5,6,7,8,9,a,b,c,d,e,f only
// {40} 40 of the [0-9a-f] characters
// $ == end anchor
const matchRegex = new RegExp('^[0-9a-f]{40}$');
const looksHashed = matchRegex.test(data);
return looksHashed;
}

/**
* @returns time stamp in seconds-since-epoch of 90 days ago since 90 days is the "contributing devs" timeframe
*/
Expand Down
2 changes: 1 addition & 1 deletion src/lib/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ export interface ProtectOptions {
}

export interface Contributor {
userId: string;
email: string;
lastCommitDate: string;
}

Expand Down
31 changes: 13 additions & 18 deletions test/dev-count-analysis.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,17 @@ import {
SERIOUS_DELIMITER,
MAX_COMMITS_IN_GIT_LOG,
separateLines,
hashData,
} from '../src/lib/monitor/dev-count-analysis';

const testTimeout = 60000;

const TIMESTAMP_TO_TEST = 1590174610000;

describe('cli dev count via git log analysis', () => {
let expectedContributorUserIds: string[] = [];
let expectedMergeOnlyUserIds: string[] = [];
let expectedContributoremails: string[] = [];
let expectedMergeOnlyemails: string[] = [];

// this computes the expectedContributorUserIds and expectedMergeOnlyUserIds
// this computes the expectedContributoremails and expectedMergeOnlyemails
beforeAll(async () => {
const timestampEpochSecondsEndOfPeriod = Math.floor(
TIMESTAMP_TO_TEST / 1000,
Expand Down Expand Up @@ -53,12 +52,8 @@ describe('cli dev count via git log analysis', () => {
}
}

expectedContributorUserIds = uniqueEmailsContainingAtLeastOneNonMergeCommit.map(
hashData,
);
expectedMergeOnlyUserIds = uniqueEmailsContainingOnlyMergeCommits.map(
hashData,
);
expectedContributoremails = uniqueEmailsContainingAtLeastOneNonMergeCommit;
expectedMergeOnlyemails = uniqueEmailsContainingOnlyMergeCommits;
}, testTimeout);

it(
Expand All @@ -69,9 +64,9 @@ describe('cli dev count via git log analysis', () => {
periodDays: 10,
repoPath: process.cwd(),
});
const contributorUserIds = contributors.map((c) => c.userId);
expect(contributorUserIds.sort()).toEqual(
expectedContributorUserIds.sort(),
const contributoremails = contributors.map((c) => c.email);
expect(contributoremails.sort()).toEqual(
expectedContributoremails.sort(),
);
},
testTimeout,
Expand All @@ -85,13 +80,13 @@ describe('cli dev count via git log analysis', () => {
periodDays: 10,
repoPath: process.cwd(),
});
const contributorUserIds = contributors.map((c) => c.userId);
const contributoremails = contributors.map((c) => c.email);

// make sure none of uniqueEmailsContainingOnlyMergeCommits are in contributorUserIds
const legitUserIdsWhichAreAlsoInMergeOnlyUserIds = expectedMergeOnlyUserIds.filter(
(user) => contributorUserIds.includes(user),
// make sure none of uniqueEmailsContainingOnlyMergeCommits are in contributoremails
const legitemailsWhichAreAlsoInMergeOnlyemails = expectedMergeOnlyemails.filter(
(user) => contributoremails.includes(user),
);
expect(legitUserIdsWhichAreAlsoInMergeOnlyUserIds).toHaveLength(0);
expect(legitemailsWhichAreAlsoInMergeOnlyemails).toHaveLength(0);
},
testTimeout,
);
Expand Down
101 changes: 19 additions & 82 deletions test/dev-count-analysis.test.ts
Original file line number Diff line number Diff line change
@@ -1,66 +1,22 @@
import { test } from 'tap';
const osName = require('os-name');

const isWindows =
osName()
.toLowerCase()
.indexOf('windows') === 0;

import {
GitCommitInfo,
parseGitLog,
GitRepoCommitStats,
parseGitLogLine,
isSha1Hash,
hashData,
getTimestampStartOfContributingDevTimeframe,
separateLines,
execShell,
ShellOutError,
runGitLog,
} from '../src/lib/monitor/dev-count-analysis';

const expectedEmailHashes = {
'someemail-1@somedomain.com': '069598f5bf317927731aecc6648bd521f6a12c92',
'someemail-2@somedomain.com': '15726593ee5b5182412ca858e2472477f4ce9f30',
};

test('hashData works', (t) => {
t.plan(2);

const email1 = 'someemail-1@somedomain.com';
const hashedEmail1 = hashData(email1);
t.equal(hashedEmail1, expectedEmailHashes['someemail-1@somedomain.com']);

const email2 = 'someemail-2@somedomain.com';
const hashedEmail2 = hashData(email2);
t.equal(hashedEmail2, expectedEmailHashes['someemail-2@somedomain.com']);
});

test('isSha1Hash works', (t) => {
t.plan(6);
t.ok(isSha1Hash('069598f5bf317927731aecc6648bd521f6a12c92'));
t.ok(isSha1Hash('0123456789abcdef0123456789abcdef01234567')); // all the possible hex characters
t.notOk(isSha1Hash('abcdefghijklmnopqrstuvwxyz01234567890')); // contains letters which are not hex characters
t.notOk(isSha1Hash('0123456789abcdef0123456789abcdef01234567a')); // more than 40 characters
t.notOk(isSha1Hash('0123456789abcdef0123456789abcdef0123456')); // less than 40 characters
t.notOk(isSha1Hash('someemail-1@somedomain.com')); // obviously an email
});
const osName = require('os-name');

test('you cannot create a new GitCommitInfo if the email is not hashed', (t) => {
t.plan(1);
try {
new GitCommitInfo(
'someemail-1@somedomain.com',
'2020-02-06T11:43:11+00:00',
);
t.fail(
'GitCommitInfo constructor should throw exception if you try to pass a non-hahsed email',
);
} catch (err) {
t.pass();
}
});
const isWindows =
osName()
.toLowerCase()
.indexOf('windows') === 0;

test('can calculate start of contributing developer period', (t) => {
t.plan(1);
Expand All @@ -76,10 +32,7 @@ test('can parse a git log line', (t) => {
const line =
'0bd4d3c394a54ba54f6c44705ac73d7d87b39525_SNYK_SEPARATOR_some-user_SNYK_SEPARATOR_someemail-1@somedomain.com_SNYK_SEPARATOR_2020-02-06T11:43:11+00:00';
const commitInfo: GitCommitInfo = parseGitLogLine(line);
t.equal(
commitInfo.authorHashedEmail,
expectedEmailHashes['someemail-1@somedomain.com'],
);
t.equal(commitInfo.authorEmail, 'someemail-1@somedomain.com');
});

test('can handle an empty git log', (t) => {
Expand Down Expand Up @@ -157,68 +110,52 @@ test('runGitLog returns empty string and does not throw error when git log comma
});

function validateGitParsing(gitLog: string, t) {
t.plan(17);
t.plan(13);
const stats: GitRepoCommitStats = parseGitLog(gitLog);

t.equal(stats.getCommitsCount(), 3);
t.equal(stats.getUniqueAuthorsCount(), 2);

const uniqueAuthors: Set<string> = stats.getUniqueAuthorHashedEmails();
const uniqueAuthors: Set<string> = stats.getUniqueAuthorEmails();
t.equal(uniqueAuthors.size, 2);

t.notOk(uniqueAuthors.has('someemail-1@somedomain.com'));
t.notOk(uniqueAuthors.has('someemail-2@somedomain.com'));
t.ok(uniqueAuthors.has(expectedEmailHashes['someemail-1@somedomain.com']));
t.ok(uniqueAuthors.has(expectedEmailHashes['someemail-2@somedomain.com']));
t.ok(uniqueAuthors.has('someemail-1@somedomain.com'));
t.ok(uniqueAuthors.has('someemail-2@somedomain.com'));

const mostRecentCommitTimestampSomeEmail1 = stats.getMostRecentCommitTimestamp(
expectedEmailHashes['someemail-1@somedomain.com'],
'someemail-1@somedomain.com',
);
t.equal(mostRecentCommitTimestampSomeEmail1, '2020-02-06T11:43:11+00:00');
const mostRecentCommitTimestampSomeEmail2 = stats.getMostRecentCommitTimestamp(
expectedEmailHashes['someemail-2@somedomain.com'],
'someemail-2@somedomain.com',
);
t.equal(mostRecentCommitTimestampSomeEmail2, '2020-02-02T23:31:13+02:00');
t.equal(stats.getMostRecentCommitTimestamp('missing-email'), '');

const contributors: {
userId: string;
email: string;
lastCommitDate: string;
}[] = stats.getRepoContributors();
t.equal(contributors.length, 2);

t.notOk(
contributors.map((c) => c.userId).includes('someemail-1@somedomain.com'),
);
t.notOk(
contributors.map((c) => c.userId).includes('someemail-2@somedomain.com'),
);
t.ok(
contributors
.map((c) => c.userId)
.includes(expectedEmailHashes['someemail-1@somedomain.com']),
);
t.ok(
contributors
.map((c) => c.userId)
.includes(expectedEmailHashes['someemail-2@somedomain.com']),
);
t.ok(contributors.map((c) => c.email).includes('someemail-1@somedomain.com'));
t.ok(contributors.map((c) => c.email).includes('someemail-2@somedomain.com'));

const getTimestampById = (userId: string): string => {
const getTimestampById = (email: string): string => {
for (const c of contributors) {
if (c.userId === userId) {
if (c.email === email) {
return c.lastCommitDate;
}
}
throw new Error('contributor not found');
};

t.equal(
getTimestampById(expectedEmailHashes['someemail-1@somedomain.com']),
getTimestampById('someemail-1@somedomain.com'),
'2020-02-06T11:43:11+00:00',
);
t.equal(
getTimestampById(expectedEmailHashes['someemail-2@somedomain.com']),
getTimestampById('someemail-2@somedomain.com'),
'2020-02-02T23:31:13+02:00',
);
}
Expand Down