Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(lambda-python): asset hash is non-deterministic #12984

Merged
merged 14 commits into from
Feb 24, 2021
Merged
84 changes: 67 additions & 17 deletions packages/@aws-cdk/aws-lambda-python/lib/bundling.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,56 @@ export interface BundlingOptions {
* Output path suffix ('python' for a layer, '.' otherwise)
*/
readonly outputPathSuffix: string;

/**
* Determines how asset hash is calculated. Assets will get rebuild and
* uploaded only if their hash has changed.
*
* If asset hash is set to `SOURCE` (default), then only changes to the source
* directory will cause the asset to rebuild. This means, for example, that in
* order to pick up a new dependency version, a change must be made to the
* source tree. Ideally, this can be implemented by including a dependency
* lockfile in your source tree or using fixed dependencies.
*
* If the asset hash is set to `OUTPUT`, the hash is calculated after
* bundling. This means that any change in the output will cause the asset to
* be invalidated and uploaded. Bear in mind that `pip` adds timestamps to
* dependencies it installs, which implies that in this mode Python bundles
* will _always_ get rebuild and uploaded. Normally this is an anti-pattern
* since build
*
* @default AssetHashType.SOURCE By default, hash is calculated based on the
* contents of the source directory. If `assetHash` is also specified, the
* default is `CUSTOM`. This means that only updates to the source will cause
* the asset to rebuild.
*/
readonly assetHashType?: cdk.AssetHashType;

/**
* Specify a custom hash for this asset. If `assetHashType` is set it must
* be set to `AssetHashType.CUSTOM`. For consistency, this custom hash will
* be SHA256 hashed and encoded as hex. The resulting hash will be the asset
* hash.
*
* NOTE: the hash is used in order to identify a specific revision of the asset, and
* used for optimizing and caching deployment activities related to this asset such as
* packaging, uploading to Amazon S3, etc. If you chose to customize the hash, you will
* need to make sure it is updated every time the asset changes, or otherwise it is
* possible that some deployments will not be invalidated.
*
* @default - based on `assetHashType`
*/
readonly assetHash?: string;
}

/**
* Produce bundled Lambda asset code
*/
export function bundle(options: BundlingOptions): lambda.AssetCode {
export function bundle(options: BundlingOptions): lambda.Code {
const { entry, runtime, outputPathSuffix } = options;

const hasDeps = hasDependencies(entry);
const stagedir = cdk.FileSystem.mkdtemp('python-bundling-');
const hasDeps = stageDependencies(entry, stagedir);

const depsCommand = chain([
hasDeps ? `rsync -r ${BUNDLER_DEPENDENCIES_CACHE}/. ${cdk.AssetStaging.BUNDLING_OUTPUT_DIR}/${outputPathSuffix}` : '',
Expand All @@ -54,15 +95,19 @@ export function bundle(options: BundlingOptions): lambda.AssetCode {
? 'Dockerfile.dependencies'
: 'Dockerfile';

const image = cdk.BundlingDockerImage.fromAsset(entry, {
// copy Dockerfile to workdir
fs.copyFileSync(path.join(__dirname, dockerfile), path.join(stagedir, dockerfile));

const image = cdk.BundlingDockerImage.fromAsset(stagedir, {
buildArgs: {
IMAGE: runtime.bundlingDockerImage.image,
},
file: path.join(__dirname, dockerfile),
file: dockerfile,
});

return lambda.Code.fromAsset(entry, {
assetHashType: cdk.AssetHashType.BUNDLE,
assetHashType: options.assetHashType,
assetHash: options.assetHash,
exclude: DEPENDENCY_EXCLUDES,
bundling: {
image,
Expand All @@ -75,20 +120,25 @@ export function bundle(options: BundlingOptions): lambda.AssetCode {
* Checks to see if the `entry` directory contains a type of dependency that
* we know how to install.
*/
export function hasDependencies(entry: string): boolean {
if (fs.existsSync(path.join(entry, 'Pipfile'))) {
return true;
}

if (fs.existsSync(path.join(entry, 'poetry.lock'))) {
return true;
}

if (fs.existsSync(path.join(entry, 'requirements.txt'))) {
return true;
export function stageDependencies(entry: string, stagedir: string): boolean {
const prefixes = [
'Pipfile',
'pyproject',
'poetry',
'requirements.txt',
];

let found = false;
for (const file of fs.readdirSync(entry)) {
for (const prefix of prefixes) {
if (file.startsWith(prefix)) {
fs.copyFileSync(path.join(entry, file), path.join(stagedir, file));
found = true;
}
}
}

return false;
return found;
}

function chain(commands: string[]): string {
Expand Down
42 changes: 42 additions & 0 deletions packages/@aws-cdk/aws-lambda-python/lib/function.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import * as fs from 'fs';
import * as path from 'path';
import * as lambda from '@aws-cdk/aws-lambda';
import { AssetHashType } from '@aws-cdk/core';
import { bundle } from './bundling';

// keep this import separate from other imports to reduce chance for merge conflicts with v2-main
Expand Down Expand Up @@ -37,6 +38,45 @@ export interface PythonFunctionProps extends lambda.FunctionOptions {
* @default lambda.Runtime.PYTHON_3_7
*/
readonly runtime?: lambda.Runtime;

/**
* Determines how asset hash is calculated. Assets will get rebuild and
* uploaded only if their hash has changed.
*
* If asset hash is set to `SOURCE` (default), then only changes to the source
* directory will cause the asset to rebuild. This means, for example, that in
* order to pick up a new dependency version, a change must be made to the
* source tree. Ideally, this can be implemented by including a dependency
* lockfile in your source tree or using fixed dependencies.
*
* If the asset hash is set to `OUTPUT`, the hash is calculated after
* bundling. This means that any change in the output will cause the asset to
* be invalidated and uploaded. Bear in mind that `pip` adds timestamps to
* dependencies it installs, which implies that in this mode Python bundles
* will _always_ get rebuild and uploaded. Normally this is an anti-pattern
* since build
*
* @default AssetHashType.SOURCE By default, hash is calculated based on the
* contents of the source directory. This means that only updates to the
* source will cause the asset to rebuild.
*/
readonly assetHashType?: AssetHashType;

/**
* Specify a custom hash for this asset. If `assetHashType` is set it must
* be set to `AssetHashType.CUSTOM`. For consistency, this custom hash will
* be SHA256 hashed and encoded as hex. The resulting hash will be the asset
* hash.
*
* NOTE: the hash is used in order to identify a specific revision of the asset, and
* used for optimizing and caching deployment activities related to this asset such as
* packaging, uploading to Amazon S3, etc. If you chose to customize the hash, you will
* need to make sure it is updated every time the asset changes, or otherwise it is
* possible that some deployments will not be invalidated.
*
* @default - based on `assetHashType`
*/
readonly assetHash?: string;
}

/**
Expand Down Expand Up @@ -70,6 +110,8 @@ export class PythonFunction extends lambda.Function {
runtime,
entry,
outputPathSuffix: '.',
assetHashType: props.assetHashType,
assetHash: props.assetHash,
}),
handler: `${index.slice(0, -3)}.${handler}`,
});
Expand Down
62 changes: 17 additions & 45 deletions packages/@aws-cdk/aws-lambda-python/test/bundling.test.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import * as fs from 'fs';
import * as path from 'path';
import { Code, Runtime } from '@aws-cdk/aws-lambda';
import { hasDependencies, bundle } from '../lib/bundling';
import { FileSystem } from '@aws-cdk/core';
import { stageDependencies, bundle } from '../lib/bundling';

jest.mock('@aws-cdk/aws-lambda');
const existsSyncOriginal = fs.existsSync;
const existsSyncMock = jest.spyOn(fs, 'existsSync');

jest.mock('child_process', () => ({
spawnSync: jest.fn(() => {
Expand Down Expand Up @@ -41,9 +40,6 @@ test('Bundling a function without dependencies', () => {
],
}),
}));

// Searches for requirements.txt in entry
expect(existsSyncMock).toHaveBeenCalledWith(path.join(entry, 'requirements.txt'));
});

test('Bundling a function with requirements.txt installed', () => {
Expand All @@ -63,9 +59,6 @@ test('Bundling a function with requirements.txt installed', () => {
],
}),
}));

// Searches for requirements.txt in entry
expect(existsSyncMock).toHaveBeenCalledWith(path.join(entry, 'requirements.txt'));
});

test('Bundling Python 2.7 with requirements.txt installed', () => {
Expand All @@ -85,9 +78,6 @@ test('Bundling Python 2.7 with requirements.txt installed', () => {
],
}),
}));

// Searches for requirements.txt in entry
expect(existsSyncMock).toHaveBeenCalledWith(path.join(entry, 'requirements.txt'));
});

test('Bundling a layer with dependencies', () => {
Expand Down Expand Up @@ -128,42 +118,24 @@ test('Bundling a python code layer', () => {
}));
});

describe('Dependency detection', () => {
test('Detects pipenv', () => {
existsSyncMock.mockImplementation((p: fs.PathLike) => {
if (/Pipfile/.test(p.toString())) {
return true;
}
return existsSyncOriginal(p);
});

expect(hasDependencies('/asset-input')).toEqual(true);
});

test('Detects poetry', () => {
existsSyncMock.mockImplementation((p: fs.PathLike) => {
if (/poetry.lock/.test(p.toString())) {
return true;
}
return existsSyncOriginal(p);
});

expect(hasDependencies('/asset-input')).toEqual(true);
});

test('Detects requirements.txt', () => {
existsSyncMock.mockImplementation((p: fs.PathLike) => {
if (/requirements.txt/.test(p.toString())) {
return true;
}
return existsSyncOriginal(p);
});

expect(hasDependencies('/asset-input')).toEqual(true);
describe('Dependency detection', () => {
test.each(['Pipfile', 'poetry.lock', 'requirements.txt'])('detect dependency %p', filename => {
// GIVEN
const sourcedir = FileSystem.mkdtemp('source-');
const stagedir = FileSystem.mkdtemp('stage-');
fs.writeFileSync(path.join(sourcedir, filename), 'dummy!');

// WHEN
const found = stageDependencies(sourcedir, stagedir);

// THEN
expect(found).toBeTruthy();
expect(fs.existsSync(path.join(stagedir, filename))).toBeTruthy();
});

test('No known dependencies', () => {
existsSyncMock.mockImplementation(() => false);
expect(hasDependencies('/asset-input')).toEqual(false);
const sourcedir = FileSystem.mkdtemp('source-');
expect(stageDependencies(sourcedir, '/dummy')).toEqual(false);
});
});
84 changes: 77 additions & 7 deletions packages/@aws-cdk/aws-lambda-python/test/function.test.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,36 @@
import '@aws-cdk/assert/jest';
import { Runtime } from '@aws-cdk/aws-lambda';
import { Stack } from '@aws-cdk/core';
import { Code, Runtime } from '@aws-cdk/aws-lambda';
import { AssetHashType, AssetOptions, Stack } from '@aws-cdk/core';
import { PythonFunction } from '../lib';
import { bundle } from '../lib/bundling';

jest.mock('../lib/bundling', () => {
return {
bundle: jest.fn().mockReturnValue({
bind: () => {
return { inlineCode: 'code' };
},
bindToResource: () => { return; },
bundle: jest.fn().mockImplementation((options: AssetOptions): Code => {
const mockObjectKey = (() => {
const hashType = options.assetHashType ?? (options.assetHash ? 'custom' : 'source');
switch (hashType) {
case 'source': return 'SOURCE_MOCK';
case 'output': return 'OUTPUT_MOCK';
case 'custom': {
if (!options.assetHash) { throw new Error('no custom hash'); }
return options.assetHash;
}
}

throw new Error('unexpected asset hash type');
})();

return {
isInline: false,
bind: () => ({
s3Location: {
bucketName: 'mock-bucket-name',
objectKey: mockObjectKey,
},
}),
bindToResource: () => { return; },
};
}),
hasDependencies: jest.fn().mockReturnValue(false),
};
Expand Down Expand Up @@ -73,3 +93,53 @@ test('throws with the wrong runtime family', () => {
runtime: Runtime.NODEJS_12_X,
})).toThrow(/Only `PYTHON` runtimes are supported/);
});

test('allows specifying hash type', () => {
new PythonFunction(stack, 'source1', {
entry: 'test/lambda-handler-nodeps',
index: 'index.py',
handler: 'handler',
});

new PythonFunction(stack, 'source2', {
entry: 'test/lambda-handler-nodeps',
index: 'index.py',
handler: 'handler',
assetHashType: AssetHashType.SOURCE,
});

new PythonFunction(stack, 'output', {
entry: 'test/lambda-handler-nodeps',
index: 'index.py',
handler: 'handler',
assetHashType: AssetHashType.OUTPUT,
});

new PythonFunction(stack, 'custom', {
entry: 'test/lambda-handler-nodeps',
index: 'index.py',
handler: 'handler',
assetHash: 'MY_CUSTOM_HASH',
});

expect(stack).toHaveResource('AWS::Lambda::Function', {
Code: {
S3Bucket: 'mock-bucket-name',
S3Key: 'SOURCE_MOCK',
},
});

expect(stack).toHaveResource('AWS::Lambda::Function', {
Code: {
S3Bucket: 'mock-bucket-name',
S3Key: 'OUTPUT_MOCK',
},
});

expect(stack).toHaveResource('AWS::Lambda::Function', {
Code: {
S3Bucket: 'mock-bucket-name',
S3Key: 'MY_CUSTOM_HASH',
},
});
});
Loading