From 93dd407219f1807311c546567a54b2bfe4a04acb Mon Sep 17 00:00:00 2001 From: ST-DDT Date: Fri, 9 Feb 2024 21:47:38 +0100 Subject: [PATCH] feat: introduce faker.clone and derive --- docs/guide/randomizer.md | 22 ++-- docs/guide/usage.md | 185 +++++++++++++++++++++++++++++++++ src/faker.ts | 15 +++ src/internal/mersenne.ts | 51 +++++++-- src/randomizer.ts | 34 ++++-- src/simple-faker.ts | 66 +++++++++++- test/faker.spec.ts | 64 +++++++++++- test/internal/mersenne.spec.ts | 22 ++++ test/simple-faker.spec.ts | 71 +++++++++++++ 9 files changed, 498 insertions(+), 32 deletions(-) diff --git a/docs/guide/randomizer.md b/docs/guide/randomizer.md index e03152a74ad..9e2e17923c6 100644 --- a/docs/guide/randomizer.md +++ b/docs/guide/randomizer.md @@ -104,13 +104,19 @@ export function generatePureRandRandomizer( seed: number | number[] = Date.now() ^ (Math.random() * 0x100000000), factory: (seed: number) => RandomGenerator = xoroshiro128plus ): Randomizer { - const self = { - next: () => (self.generator.unsafeNext() >>> 0) / 0x100000000, - seed: (seed: number | number[]) => { - self.generator = factory(typeof seed === 'number' ? seed : seed[0]); - }, - } as Randomizer & { generator: RandomGenerator }; - self.seed(seed); - return self; + function wrapperFactory(generator?: RandomGenerator): Randomizer { + const self = { + next: () => (self.generator.unsafeNext() >>> 0) / 0x100000000, + seed: (seed: number | number[]) => { + self.generator = factory(typeof seed === 'number' ? seed : seed[0]); + }, + clone: () => wrapperFactory(self.generator.clone()), + } as Randomizer & { generator: RandomGenerator }; + return self; + } + + const randomizer = wrapperFactory(); + randomizer.seed(seed); + return randomizer; } ``` diff --git a/docs/guide/usage.md b/docs/guide/usage.md index ed6bf0afd13..552d20d783c 100644 --- a/docs/guide/usage.md +++ b/docs/guide/usage.md @@ -275,3 +275,188 @@ We will update these docs once a replacement is available. ::: Congratulations, you should now be able to create any complex object you desire. Happy faking 🥳. + +## Create multiple complex objects + +Sometimes having a single one of your complex objects isn't enough. +Imagine having a list view/database of some kind you want to populate: + +| ID | First Name | Last Name | +| --------- | ---------- | --------- | +| 6fbe024f… | Tatyana | Koch | +| 862f3ccb… | Hans | Donnelly | +| b452acd6… | Judy | Boehm | + +The values are directly created using this method: + +```ts +import { faker } from '@faker-js/faker'; + +function createRandomUser(): User { + return { + _id: faker.string.uuid(), + firstName: faker.person.firstName(), + lastName: faker.person.lastName(), + }; +} + +const users = Array.from({ length: 3 }, () => createRandomUser()); +``` + +After some time you notice that you need a new column `createdDate`. + +You modify the method to also create that: + +```ts +function createRandomUser(): User { + return { + _id: faker.string.uuid(), + firstName: faker.person.firstName(), + lastName: faker.person.lastName(), + createdDate: faker.date.past(), + }; +} +``` + +Now let's have a look at our table again: + +| ID | First Name | Last Name | Created Date | +| --------- | ---------- | --------- | ------------ | +| 6fbe024f… | Tatyana | Koch | 2022-12-28 | +| 62f3ccbf… | Kacie | Pouros | 2023-04-06 | +| 52acd600… | Aron | Von | 2023-05-04 | + +Suddenly the second line onwards look different. + +Why? Because calling `faker.date.past()` consumes a value from the seed changing all subsequent values. + +There are two solutions to that: + +1. Set the seed explicitly before creating the data for that row: + +```ts +const users = Array.from({ length: 3 }, (_, i) => { + faker.seed(i); + return createRandomUser(); +}); +``` + +Which is very straightforward, but comes at the disadvantage, that you change the seed of your faker instance. +This might cause issues, if you have lists of groups that contains lists of users. Each group contains the same users because the seed is reset. + +2. Derive a new faker instance for each user you create. + +```ts +function createRandomUser(faker: Faker): User { + const derivedFaker = faker.derive(); + return { + _id: derivedFaker.string.uuid(), + firstName: derivedFaker.person.firstName(), + lastName: derivedFaker.person.lastName(), + createdDate: derivedFaker.date.past(), + }; +} + +const users = Array.from({ length: 3 }, () => createRandomUser(faker)); +``` + +The `faker.derive()` call clones the instance and re-initializes the seed of the clone with a generated value from the original. +This decouples the generation of the list from generating a user. +It does not matter how many properties you add to or remove from the `User` the following rows will not change. + +::: tip Note +The following is only relevant, if you want to avoid changing your generated objects as much as possible: + +When adding one or more new properties, we recommend generating them last, because if you create a new property in the middle of your object, then the properties after that will still change (due to the extra seed consumption). +When removing properties, you can continue calling the old method (or a dummy method) to consume the same amount of seed values. +::: + +This also works for deeply nested complex objects: + +```ts +function createLegalAgreement(faker: Faker) { + const derivedFaker = faker.derive(); + return { + _id: derivedFaker.string.uuid(), + partyA: createRandomUser(derivedFaker), + partyB: createRandomUser(derivedFaker), + }; +} + +function createRandomUser(faker: Faker): User { + const derivedFaker = faker.derive(); + return { + _id: derivedFaker.string.uuid(), + firstName: derivedFaker.person.firstName(), + lastName: derivedFaker.person.lastName(), + createdDate: derivedFaker.date.past(), + address: createRandomAddress(derivedFaker), + }; +} + +function createRandomAddress(faker: Faker): Address { + const derivedFaker = faker.derive(); + return { + _id: derivedFaker.string.uuid(), + streetName: derivedFaker.location.street(), + }; +} +``` + +::: warning Warning +Migrating your existing data generators will still change all data once, but after that they are independent. +So we recommend writing your methods like this from the start. +::: + +::: info Note +Depending on your preferences and requirements you can design the methods either like this: + +```ts +function createRandomXyz(faker: Faker): Xyz { + return { + _id: faker.string.uuid(), + }; +} + +createRandomXyz(faker.derive()); +createRandomXyz(faker.derive()); +createRandomXyz(faker.derive()); +``` + +or this + +```ts +function createRandomXyz(faker: Faker): Xyz { + const derivedFaker = faker.derive(); + return { + _id: derivedFaker.string.uuid(), + }; +} + +createRandomXyz(faker); +createRandomXyz(faker); +createRandomXyz(faker); +``` + +The sole difference being more or less explicit about deriving a faker instance (writing more or less code). +::: + +## Create identical complex objects + +If you want to create two identical objects, e.g. one to mutate and one to compare it to, +then you can use `faker.clone()` to create a faker instance with the same settings and seed as the original. + +```ts +const clonedFaker = faker.clone(); +const user1 = createRandomUser(faker); +const user2 = createRandomUser(clonedFaker); +expect(user1).toEqual(user2); ✅ + +subscribeToNewsletter(user1); +// Check that the user hasn't been modified +expect(user1).toEqual(user2); ✅ +``` + +::: info Note +Calling `faker.clone()` is idempotent. So you can call it as often as you want, it doesn't have an impact on the original faker instance. +::: diff --git a/src/faker.ts b/src/faker.ts index e63b89b3e59..03c032d75cd 100644 --- a/src/faker.ts +++ b/src/faker.ts @@ -430,6 +430,21 @@ export class Faker extends SimpleFaker { 'This method has been removed. Please use the constructor instead.' ); } + + clone(): Faker { + const instance = new Faker({ + locale: this.rawDefinitions, + randomizer: this._randomizer.clone(), + }); + instance.setDefaultRefDate(this._defaultRefDate); + return instance; + } + + derive(): Faker { + const instance = this.clone(); + instance.seed(this.number.int()); + return instance; + } } export type FakerOptions = ConstructorParameters[0]; diff --git a/src/internal/mersenne.ts b/src/internal/mersenne.ts index 2372e364d78..66143e2b5bf 100644 --- a/src/internal/mersenne.ts +++ b/src/internal/mersenne.ts @@ -82,6 +82,22 @@ export class MersenneTwister19937 { private mt: number[] = Array.from({ length: this.N }); // the array for the state vector private mti = this.N + 1; // mti==N+1 means mt[N] is not initialized + /** + * Creates a new instance of MersenneTwister19937. + * + * @param options The required options to initialize the instance. + * @param options.mt The state vector to use. The array will be copied. + * @param options.mti The state vector index to use. + */ + constructor(options?: { mt: number[]; mti: number }) { + if (options != null && 'mt' in options) { + this.mt = [...options.mt]; + this.mti = options.mti; + } else { + this.initGenrand(Date.now() ^ (Math.random() * 0x100000000)); + } + } + /** * Returns a 32-bits unsigned integer from an operand to which applied a bit * operator. @@ -166,11 +182,11 @@ export class MersenneTwister19937 { /** * Initialize by an array with array-length. * - * @param initKey is the array for initializing keys - * @param keyLength is its length + * @param initKey Is the array for initializing keys. */ - initByArray(initKey: number[], keyLength: number): void { + initByArray(initKey: number[]): void { this.initGenrand(19650218); + const keyLength = initKey.length; let i = 1; let j = 0; let k = this.N > keyLength ? this.N : keyLength; @@ -240,11 +256,6 @@ export class MersenneTwister19937 { // generate N words at one time let kk: number; - // if initGenrand() has not been called a default initial seed is used - if (this.mti === this.N + 1) { - this.initGenrand(5489); - } - for (kk = 0; kk < this.N - this.M; kk++) { y = this.unsigned32( (this.mt[kk] & this.UPPER_MASK) | (this.mt[kk + 1] & this.LOWER_MASK) @@ -324,6 +335,10 @@ export class MersenneTwister19937 { return (a * 67108864.0 + b) * (1.0 / 9007199254740992.0); } // These real versions are due to Isaku Wada, 2002/01/09 + + clone(): MersenneTwister19937 { + return new MersenneTwister19937({ mt: this.mt, mti: this.mti }); + } } /** @@ -333,10 +348,21 @@ export class MersenneTwister19937 { * @internal */ export function generateMersenne32Randomizer(): Randomizer { + // This method does not expose any internal parameters to users. const twister = new MersenneTwister19937(); + return _generateMersenne32Randomizer(twister); +} - twister.initGenrand(Math.ceil(Math.random() * Number.MAX_SAFE_INTEGER)); - +/** + * Generates a MersenneTwister19937 randomizer with 32 bits of precision. + * + * @internal + * + * @param twister The twister to use. + */ +function _generateMersenne32Randomizer( + twister: MersenneTwister19937 +): Randomizer { return { next(): number { return twister.genrandReal2(); @@ -345,8 +371,11 @@ export function generateMersenne32Randomizer(): Randomizer { if (typeof seed === 'number') { twister.initGenrand(seed); } else if (Array.isArray(seed)) { - twister.initByArray(seed, seed.length); + twister.initByArray(seed); } }, + clone(): Randomizer { + return _generateMersenne32Randomizer(twister.clone()); + }, }; } diff --git a/src/randomizer.ts b/src/randomizer.ts index d2c6892b616..297c3e80fe5 100644 --- a/src/randomizer.ts +++ b/src/randomizer.ts @@ -19,14 +19,20 @@ * seed: number | number[] = Date.now() ^ (Math.random() * 0x100000000), * factory: (seed: number) => RandomGenerator = xoroshiro128plus * ): Randomizer { - * const self = { - * next: () => (self.generator.unsafeNext() >>> 0) / 0x100000000, - * seed: (seed: number | number[]) => { - * self.generator = factory(typeof seed === 'number' ? seed : seed[0]); - * }, - * } as Randomizer & { generator: RandomGenerator }; - * self.seed(seed); - * return self; + * function wrapperFactory(generator?: RandomGenerator): Randomizer { + * const self = { + * next: () => (self.generator.unsafeNext() >>> 0) / 0x100000000, + * seed: (seed: number | number[]) => { + * self.generator = factory(typeof seed === 'number' ? seed : seed[0]); + * }, + * clone: () => wrapperFactory(self.generator.clone()), + * } as Randomizer & { generator: RandomGenerator }; + * return self; + * } + * + * const randomizer = wrapperFactory(); + * randomizer.seed(seed); + * return randomizer; * } * * const randomizer = generatePureRandRandomizer(); @@ -68,4 +74,16 @@ export interface Randomizer { * @since 8.2.0 */ seed(seed: number | number[]): void; + + /** + * Creates an exact copy of this Randomizer. Including the current seed state. + * + * @example + * const clone = randomizer.clone(); + * randomizer.next() // 0.3404027920160495 + * clone.next() // 0.3404027920160495 (same as above) + * randomizer.next() // 0.929890375900335 + * clone.next() // 0.929890375900335 (same as above) + */ + clone(): Randomizer; } diff --git a/src/simple-faker.ts b/src/simple-faker.ts index 3993392deab..c9ea0b2fb79 100644 --- a/src/simple-faker.ts +++ b/src/simple-faker.ts @@ -79,7 +79,7 @@ export class SimpleFaker { } /** @internal */ - private readonly _randomizer: Randomizer; + protected readonly _randomizer: Randomizer; readonly datatype: DatatypeModule = new DatatypeModule(this); readonly date: SimpleDateModule = new SimpleDateModule(this); @@ -254,6 +254,70 @@ export class SimpleFaker { return seed; } + + /** + * Clones this instance, preserving the current state. + * This method is idempotent and does not consume any seed values. + * The cloned instance will produce the same values as the original, given that the methods are called in the same order. + * This is useful for creating identical complex objects: + * - One to be mutated by the method under test + * - and the other one serves as a comparison. + * + * @see faker.derive If you want to generate deterministic but different values. + * + * @example + * faker.seed(42); + * faker.number.int(10); // 4 (1st call) + * faker.number.int(10); // 8 (2nd call) + * + * faker.seed(42); + * // Creates a new instance with the same state as the current instance + * const clonedFaker = faker.clone(); + * // The cloned instance will produce the same values as the original + * clonedFaker.number.int(10); // 4 (cloned 1st call) + * clonedFaker.number.int(10); // 8 (cloned 2nd call) + * + * // The original instance is not affected + * faker.number.int(10); // 4 (1st call) + * faker.number.int(10); // 8 (2nd call) + */ + clone(): SimpleFaker { + const instance = new SimpleFaker({ + randomizer: this._randomizer.clone(), + }); + instance.setDefaultRefDate(this._defaultRefDate); + return instance; + } + + /** + * Derives a new instance from this one. + * This consumes a single value from the original instance to initialize the seed of the derived instance, and thus has a one-time effect on subsequent calls. + * The derived instance can be used to generate deterministic values based on the current seed without consuming a dynamic amount of seed values. + * This is useful, if you wish to generate a complex object (e.g. a Person) and might want to add a property to it later. + * If the Person is created from a derived instance, then adding or removing properties from the Person doesn't have any impact on the following data, generated using the original instance (except from the derive call itself). + * + * @see simpleFaker.clone If you want to create an exact clone of this SimpleFaker instance without consuming a seed value. + * + * @example + * simpleFaker.seed(42); + * simpleFaker.number.int(10); // 4 (1st call) + * simpleFaker.number.int(10); // 8 (2nd call) + * + * simpleFaker.seed(42); + * // Creates a new instance with a seed generated from the current instance + * const derivedFaker = simpleFaker.derive(); // (1st call) + * // The derived instance will produce values dependent on the state of the original instance at the time of the derive call + * derivedFaker.number.int(10); // 7 (derived 1st call) + * derivedFaker.number.int(10); // 0 (derived 2nd call) + * + * // It doesn't matter how many calls to derived are executed + * simpleFaker.number.int(10); // 8 (2nd call) <- This is same as before + */ + derive(): SimpleFaker { + const instance = this.clone(); + instance.seed(this.number.int()); + return instance; + } } export const simpleFaker = new SimpleFaker(); diff --git a/test/faker.spec.ts b/test/faker.spec.ts index a026577bdf7..6fa6b684298 100644 --- a/test/faker.spec.ts +++ b/test/faker.spec.ts @@ -71,12 +71,14 @@ describe('faker', () => { describe('randomizer', () => { it('should be possible to provide a custom Randomizer', () => { + const randomizer = { + next: () => 0, + seed: () => void 0, + clone: () => randomizer, + }; const customFaker = new Faker({ locale: {}, - randomizer: { - next: () => 0, - seed: () => void 0, - }, + randomizer, }); expect(customFaker.number.int()).toBe(0); @@ -125,6 +127,60 @@ describe('faker', () => { }); }); + describe('clone()', () => { + it('should create a clone that returns the same values as the original', () => { + const clone1 = faker.clone(); + const clone2 = faker.clone(); + const clone3 = clone1.clone(); + + expect(clone1).not.toBe(faker); + expect(clone2).not.toBe(faker); + expect(clone3).not.toBe(faker); + expect(clone1).not.toBe(clone2); + expect(clone1).not.toBe(clone3); + expect(clone2).not.toBe(clone3); + + const value0 = faker.number.int(); + expect(clone1.number.int()).toBe(value0); + expect(clone2.number.int()).toBe(value0); + expect(clone3.number.int()).toBe(value0); + + const value1 = clone1.number.int(); + expect(faker.number.int()).toBe(value1); + expect(clone2.number.int()).toBe(value1); + expect(clone3.number.int()).toBe(value1); + + const value2 = clone2.number.int(); + expect(clone1.number.int()).toBe(value2); + expect(faker.number.int()).toBe(value2); + expect(clone3.number.int()).toBe(value2); + + const value3 = clone3.number.int(); + expect(clone1.number.int()).toBe(value3); + expect(clone2.number.int()).toBe(value3); + expect(faker.number.int()).toBe(value3); + }); + }); + + describe('derive()', () => { + it("should create a derived faker, that doesn't affect the original", () => { + const seed = faker.seed(); + faker.number.int(); + const value = faker.number.int(); + + faker.seed(seed); + const derived = faker.derive(); + + expect(derived).not.toBe(faker); + + for (let i = 0; i < derived.number.int(100); i++) { + derived.number.int(); + } + + expect(faker.number.int()).toBe(value); + }); + }); + describe('defaultRefDate', () => { it('should be a defined', () => { expect(faker.defaultRefDate).toBeDefined(); diff --git a/test/internal/mersenne.spec.ts b/test/internal/mersenne.spec.ts index d72a9c3f9b6..5d9622764b7 100644 --- a/test/internal/mersenne.spec.ts +++ b/test/internal/mersenne.spec.ts @@ -133,4 +133,26 @@ describe('generateMersenne32Randomizer()', () => { }); }); }); + + describe('clone()', () => { + it('should return a new instance', () => { + const clone = randomizer.clone(); + + expect(clone).not.toBe(randomizer); + }); + + it('should return a new instance with the same state', () => { + const clone = randomizer.clone(); + + // Test that the clone is independent from the original at intervals + const originalValues = Array.from({ length: 1000 }, randomizer.next); + const clonedValues = Array.from({ length: 1000 }, clone.next); + expect(clonedValues).toEqual(originalValues); + + // Test that the clone is independent from the original at each call + for (let i = 0; i < 1000; i++) { + expect(clone.next()).toBe(randomizer.next()); + } + }); + }); }); diff --git a/test/simple-faker.spec.ts b/test/simple-faker.spec.ts index 36536b212b0..6c0a3af0696 100644 --- a/test/simple-faker.spec.ts +++ b/test/simple-faker.spec.ts @@ -20,6 +20,23 @@ describe('simpleFaker', () => { } }); + describe('randomizer', () => { + it('should be possible to provide a custom Randomizer', () => { + const randomizer = { + next: () => 0, + seed: () => void 0, + clone: () => randomizer, + }; + const customFaker = new SimpleFaker({ + randomizer, + }); + + expect(customFaker.number.int()).toBe(0); + expect(customFaker.number.int()).toBe(0); + expect(customFaker.number.int()).toBe(0); + }); + }); + // This is only here for coverage // The actual test is in mersenne.spec.ts describe('seed()', () => { @@ -60,6 +77,60 @@ describe('simpleFaker', () => { }); }); + describe('clone()', () => { + it('should create a clone that returns the same values as the original', () => { + const clone1 = simpleFaker.clone(); + const clone2 = simpleFaker.clone(); + const clone3 = clone1.clone(); + + expect(clone1).not.toBe(simpleFaker); + expect(clone2).not.toBe(simpleFaker); + expect(clone3).not.toBe(simpleFaker); + expect(clone1).not.toBe(clone2); + expect(clone1).not.toBe(clone3); + expect(clone2).not.toBe(clone3); + + const value0 = simpleFaker.number.int(); + expect(clone1.number.int()).toBe(value0); + expect(clone2.number.int()).toBe(value0); + expect(clone3.number.int()).toBe(value0); + + const value1 = clone1.number.int(); + expect(simpleFaker.number.int()).toBe(value1); + expect(clone2.number.int()).toBe(value1); + expect(clone3.number.int()).toBe(value1); + + const value2 = clone2.number.int(); + expect(clone1.number.int()).toBe(value2); + expect(simpleFaker.number.int()).toBe(value2); + expect(clone3.number.int()).toBe(value2); + + const value3 = clone3.number.int(); + expect(clone1.number.int()).toBe(value3); + expect(clone2.number.int()).toBe(value3); + expect(simpleFaker.number.int()).toBe(value3); + }); + }); + + describe('derive()', () => { + it("should create a derived faker, that doesn't affect the original", () => { + const seed = simpleFaker.seed(); + simpleFaker.number.int(); + const value = simpleFaker.number.int(); + + simpleFaker.seed(seed); + const derived = simpleFaker.derive(); + + expect(derived).not.toBe(simpleFaker); + + for (let i = 0; i < derived.number.int(100); i++) { + derived.number.int(); + } + + expect(simpleFaker.number.int()).toBe(value); + }); + }); + describe('defaultRefDate', () => { it('should be a defined', () => { expect(simpleFaker.defaultRefDate).toBeDefined();