Skip to content

Commit

Permalink
Use FNV instead of SHA512 for the hashing in A/B test (#7260)
Browse files Browse the repository at this point in the history
* Use FNV instead of SHA512

* Keep using `SHA512` algorithm for old experiments

* Include LS to keep using the old algorithm

* Use the expected list of old experiments

* Added dispersion tests and changed npm package used

* Code reviews

* Code reviews
  • Loading branch information
Kartik Raj authored Sep 17, 2019
1 parent cacf988 commit db297c4
Show file tree
Hide file tree
Showing 9 changed files with 2,135 additions and 11 deletions.
1 change: 1 addition & 0 deletions news/2 Fixes/7218.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed A/B testing sampling
10 changes: 8 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2662,6 +2662,7 @@
"@babel/preset-env": "^7.1.0",
"@babel/preset-react": "^7.0.0",
"@babel/register": "^7.4.4",
"@enonic/fnv-plus": "^1.3.0",
"@istanbuljs/nyc-config-typescript": "^0.1.3",
"@nteract/plotly": "^1.48.3",
"@nteract/transform-dataresource": "^4.3.5",
Expand Down
11 changes: 9 additions & 2 deletions src/client/common/crypto.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,15 @@ import { ICryptoUtils, IHashFormat } from './types';
*/
@injectable()
export class CryptoUtils implements ICryptoUtils {
public createHash<E extends keyof IHashFormat>(data: string, hashFormat: E): IHashFormat[E] {
const hash = createHash('sha512').update(data).digest('hex');
public createHash<E extends keyof IHashFormat>(data: string, hashFormat: E, algorithm: 'SHA512' | 'FNV' = 'FNV'): IHashFormat[E] {
let hash: string;
if (algorithm === 'FNV') {
// tslint:disable-next-line:no-require-imports
const fnv = require('@enonic/fnv-plus');
hash = fnv.fast1a32hex(data) as string;
} else {
hash = createHash('sha512').update(data).digest('hex');
}
if (hashFormat === 'number') {
const result = parseInt(hash, 16);
if (isNaN(result)) {
Expand Down
9 changes: 8 additions & 1 deletion src/client/common/experiments.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ export const downloadedExperimentStorageKey = 'DOWNLOADED_EXPERIMENTS_STORAGE_KE
const configFile = path.join(EXTENSION_ROOT_DIR, 'experiments.json');
export const configUri = 'https://raw.githubusercontent.com/microsoft/vscode-python/master/experiments.json';
export const EXPERIMENTS_EFFORT_TIMEOUT_MS = 2000;
// The old experiments which are working fine using the `SHA512` algorithm
export const oldExperimentSalts = ['ShowExtensionSurveyPrompt', 'ShowPlayIcon', 'AlwaysDisplayTestExplorer', 'LS'];

/**
* Manages and stores experiments, implements the AB testing functionality
Expand Down Expand Up @@ -160,7 +162,12 @@ export class ExperimentsManager implements IExperimentsManager {
if (typeof (this.appEnvironment.machineId) !== 'string') {
throw new Error('Machine ID should be a string');
}
const hash = this.crypto.createHash(`${this.appEnvironment.machineId}+${salt}`, 'number');
let hash: number;
if (oldExperimentSalts.find(oldSalt => oldSalt === salt)) {
hash = this.crypto.createHash(`${this.appEnvironment.machineId}+${salt}`, 'number', 'SHA512');
} else {
hash = this.crypto.createHash(`${this.appEnvironment.machineId}+${salt}`, 'number', 'FNV');
}
return hash % 100 >= min && hash % 100 < max;
}

Expand Down
2 changes: 1 addition & 1 deletion src/client/common/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,7 @@ export interface ICryptoUtils {
* @param data The string to hash
* @param hashFormat Return format of the hash, number or string
*/
createHash<E extends keyof IHashFormat>(data: string, hashFormat: E): IHashFormat[E];
createHash<E extends keyof IHashFormat>(data: string, hashFormat: E, algorithm?: 'SHA512' | 'FNV'): IHashFormat[E];
}

export const IAsyncDisposableRegistry = Symbol('IAsyncDisposableRegistry');
Expand Down
83 changes: 82 additions & 1 deletion src/test/common/crypto.unit.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,18 @@

'use strict';

import { assert } from 'chai';
import { assert, expect } from 'chai';
import * as path from 'path';
import { CryptoUtils } from '../../client/common/crypto';
import { FileSystem } from '../../client/common/platform/fileSystem';
import { PlatformService } from '../../client/common/platform/platformService';
import { EXTENSION_ROOT_DIR_FOR_TESTS } from '../constants';

// tslint:disable-next-line: max-func-body-length
suite('Crypto Utils', async () => {
let crypto: CryptoUtils;
const fs = new FileSystem(new PlatformService());
const file = path.join(EXTENSION_ROOT_DIR_FOR_TESTS, 'src', 'test', 'common', 'randomWords.txt');
setup(() => {
crypto = new CryptoUtils();
});
Expand Down Expand Up @@ -45,4 +52,78 @@ suite('Crypto Utils', async () => {
const hash2 = crypto.createHash('hash2', 'string');
assert.notEqual(hash1, hash2, 'Hashes should be different strings');
});
test('If hashFormat equals `number`, ensure numbers are uniformly distributed on scale from 0 to 100', async () => {
const words = await fs.readFile(file);
const wordList = words.split('\n');
const buckets: number[] = Array(100).fill(0);
const hashes = Array(10).fill(0);
for (const w of wordList) {
for (let i = 0; i < 10; i += 1) {
const word = `${w}${i}`;
const hash = crypto.createHash(word, 'number');
buckets[hash % 100] += 1;
hashes[i] = hash % 100;
}
}
// Total number of words = wordList.length * 10, because we added ten variants of each word above.
const expectedHitsPerBucket = wordList.length * 10 / 100;
for (const hit of buckets) {
expect(hit).to.be.lessThan(1.25 * expectedHitsPerBucket);
expect(hit).to.be.greaterThan(0.75 * expectedHitsPerBucket);
}
});
test('If hashFormat equals `number`, on a scale of 0 to 100, small difference in the input on average produce large differences (about 33) in the output ', async () => {
const words = await fs.readFile(file);
const wordList = words.split('\n');
const buckets: number[] = Array(100).fill(0);
let hashes: number[] = [];
let totalDifference = 0;
// We are only iterating over the first 10 words for purposes of this test
for (const w of wordList.slice(0, 10)) {
hashes = [];
totalDifference = 0;
if (w.length === 0) {
continue;
}
for (let i = 0; i < 10; i += 1) {
const word = `${w}${i}`;
const hash = crypto.createHash(word, 'number');
buckets[hash % 100] += 1;
hashes.push(hash % 100);
}
for (let i = 0; i < 10; i += 1) {
const word = `${i}${w}`;
const hash = crypto.createHash(word, 'number');
buckets[hash % 100] += 1;
hashes.push(hash % 100);
}
// Iterating over ASCII alphabets 'a' to 'z' and appending to the word
for (let i = 0; i < 26; i += 1) {
const word = `${String.fromCharCode(97 + i)}${w}`;
const hash = crypto.createHash(word, 'number');
buckets[hash % 100] += 1;
hashes.push(hash % 100);
}
// Iterating over ASCII alphabets 'a' to 'z' and prepending to the word
for (let i = 0; i < 26; i += 1) {
const word = `${w}${String.fromCharCode(97 + i)}`;
const hash = crypto.createHash(word, 'number');
buckets[hash % 100] += 1;
hashes.push(hash % 100);
}
// tslint:disable: prefer-for-of
for (let i = 0; i < hashes.length; i += 1) {
for (let j = 0; j < hashes.length; j += 1) {
if (hashes[i] > hashes[j]) {
totalDifference += hashes[i] - hashes[j];
} else {
totalDifference += hashes[j] - hashes[i];
}
}
}
const averageDifference = totalDifference / hashes.length / hashes.length;
expect(averageDifference).to.be.lessThan(1.25 * 33);
expect(averageDifference).to.be.greaterThan(0.75 * 33);
}
});
});
29 changes: 25 additions & 4 deletions src/test/common/experiments.unit.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import { ApplicationEnvironment } from '../../client/common/application/applicat
import { IApplicationEnvironment, IWorkspaceService } from '../../client/common/application/types';
import { WorkspaceService } from '../../client/common/application/workspace';
import { CryptoUtils } from '../../client/common/crypto';
import { configUri, downloadedExperimentStorageKey, ExperimentsManager, experimentStorageKey, isDownloadedStorageValidKey } from '../../client/common/experiments';
import { configUri, downloadedExperimentStorageKey, ExperimentsManager, experimentStorageKey, isDownloadedStorageValidKey, oldExperimentSalts } from '../../client/common/experiments';
import { HttpClient } from '../../client/common/net/httpClient';
import { PersistentStateFactory } from '../../client/common/persistentState';
import { FileSystem } from '../../client/common/platform/fileSystem';
Expand Down Expand Up @@ -596,14 +596,35 @@ suite('xA/B experiments', () => {
expect(() => expManager.isUserInRange(79, 94, 'salt')).to.throw();
} else if (testParams.error) {
const error = new Error('Kaboom');
when(crypto.createHash(anything(), 'number')).thenThrow(error);
when(crypto.createHash(anything(), 'number', anything())).thenThrow(error);
expect(() => expManager.isUserInRange(79, 94, 'salt')).to.throw(error);
} else {
when(crypto.createHash(anything(), 'number')).thenReturn(testParams.hash);
when(crypto.createHash(anything(), 'number', anything())).thenReturn(testParams.hash);
expect(expManager.isUserInRange(79, 94, 'salt')).to.equal(testParams.expectedResult, 'Incorrectly identified');
}
});
});
test('If experiment salt belongs to an old experiment, keep using `SHA512` algorithm', async () => {
when(appEnvironment.machineId).thenReturn('101');
when(crypto.createHash(anything(), 'number', 'SHA512')).thenReturn(644);
when(crypto.createHash(anything(), anything(), 'FNV')).thenReturn(1293);
// 'ShowPlayIcon' is one of the old experiments
expManager.isUserInRange(79, 94, 'ShowPlayIcon');
verify(crypto.createHash(anything(), 'number', 'SHA512')).once();
verify(crypto.createHash(anything(), anything(), 'FNV')).never();
});
test('If experiment salt does not belong to an old experiment, use `FNV` algorithm', async () => {
when(appEnvironment.machineId).thenReturn('101');
when(crypto.createHash(anything(), anything(), 'SHA512')).thenReturn(644);
when(crypto.createHash(anything(), 'number', 'FNV')).thenReturn(1293);
expManager.isUserInRange(79, 94, 'NewExperimentSalt');
verify(crypto.createHash(anything(), anything(), 'SHA512')).never();
verify(crypto.createHash(anything(), 'number', 'FNV')).once();
});
test('Use the expected list of old experiments', async () => {
const expectedOldExperimentSalts = ['ShowExtensionSurveyPrompt', 'ShowPlayIcon', 'AlwaysDisplayTestExplorer', 'LS'];
assert.deepEqual(expectedOldExperimentSalts, oldExperimentSalts);
});
});

const testsForPopulateUserExperiments =
Expand Down Expand Up @@ -633,7 +654,7 @@ suite('xA/B experiments', () => {
.returns(() => testParams.experimentStorageValue);
when(appEnvironment.machineId).thenReturn('101');
if (testParams.hash) {
when(crypto.createHash(anything(), 'number')).thenReturn(testParams.hash);
when(crypto.createHash(anything(), 'number', anything())).thenReturn(testParams.hash);
}
expManager.populateUserExperiments();
assert.deepEqual(expManager.userExperiments, testParams.expectedResult);
Expand Down
Loading

0 comments on commit db297c4

Please sign in to comment.