From c05d113535e5d1f48f58292a150fd7be70858803 Mon Sep 17 00:00:00 2001 From: Sam Liu Date: Thu, 13 May 2021 11:01:02 -0700 Subject: [PATCH 1/2] feat: Allow dir_checksum() to accept a ignore_list --- samcli/lib/utils/hash.py | 14 ++++++++++--- tests/unit/lib/utils/test_hash.py | 33 +++++++++++++++++++++++++------ 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/samcli/lib/utils/hash.py b/samcli/lib/utils/hash.py index ee080faf51..a9cbae1885 100644 --- a/samcli/lib/utils/hash.py +++ b/samcli/lib/utils/hash.py @@ -3,6 +3,7 @@ """ import os import hashlib +from typing import List, Optional BLOCK_SIZE = 4096 @@ -37,25 +38,32 @@ def file_checksum(file_name: str) -> str: return md5.hexdigest() -def dir_checksum(directory: str, followlinks: bool = True) -> str: +def dir_checksum(directory: str, followlinks: bool = True, ignore_list: Optional[List[str]] = None) -> str: """ Parameters ---------- directory : A directory with an absolute path followlinks: Follow symbolic links through the given directory + ignore_list: The list of file/directory names to ignore in checksum Returns ------- md5 checksum of the directory. """ + ignore_set = set(ignore_list or []) md5_dir = hashlib.md5() files = list() # Walk through given directory and find all directories and files. - for dirpath, _, filenames in os.walk(directory, followlinks=followlinks): + for dirpath, dirnames, filenames in os.walk(directory, followlinks=followlinks): + # > When topdown is True, the caller can modify the dirnames list in-place + # > (perhaps using del or slice assignment) and walk() will only recurse + # > into the subdirectories whose names remain in dirnames + # > https://docs.python.org/library/os.html#os.walk + dirnames[:] = [dirname for dirname in dirnames if dirname not in ignore_set] # Go through every file in the directory and sub-directory. - for filepath in [os.path.join(dirpath, filename) for filename in filenames]: + for filepath in [os.path.join(dirpath, filename) for filename in filenames if filename not in ignore_set]: # Look at filename and contents. # Encode file's checksum to be utf-8 and bytes. files.append(filepath) diff --git a/tests/unit/lib/utils/test_hash.py b/tests/unit/lib/utils/test_hash.py index 1f16bb393e..388b3c96da 100644 --- a/tests/unit/lib/utils/test_hash.py +++ b/tests/unit/lib/utils/test_hash.py @@ -40,11 +40,11 @@ def test_dir_hash_independent_of_file_order(self): mockwalk.return_value = [ ( self.temp_dir, - (), - ( + [], + [ file1.name, file2.name, - ), + ], ), ] dir_checksums["first"] = dir_checksum(self.temp_dir) @@ -53,11 +53,11 @@ def test_dir_hash_independent_of_file_order(self): mockwalk.return_value = [ ( self.temp_dir, - (), - ( + [], + [ file2.name, file1.name, - ), + ], ), ] dir_checksums["second"] = dir_checksum(self.temp_dir) @@ -73,6 +73,27 @@ def test_dir_hash_same_contents_diff_file_per_directory(self): checksum_after = dir_checksum(os.path.dirname(_file.name)) self.assertNotEqual(checksum_before, checksum_after) + def test_dir_hash_with_ignore_list(self): + _file = tempfile.NamedTemporaryFile(delete=False, dir=self.temp_dir) + _file.write(b"Testfile") + _file.close() + + dir_path = os.path.dirname(_file.name) + checksum_before = dir_checksum(dir_path) + + # add a file to .aws-sam/ + aws_sam_dir_path = os.path.join(dir_path, ".aws-sam") + os.mkdir(aws_sam_dir_path) + _new_file = tempfile.NamedTemporaryFile(delete=False, dir=aws_sam_dir_path) + _new_file.write(b"dummy") + _new_file.close() + + checksum_after = dir_checksum(os.path.dirname(_file.name)) + self.assertNotEqual(checksum_before, checksum_after) + + checksum_after_with_ignore_list = dir_checksum(os.path.dirname(_file.name), ignore_list=[".aws-sam"]) + self.assertEqual(checksum_before, checksum_after_with_ignore_list) + def test_dir_cyclic_links(self): _file = tempfile.NamedTemporaryFile(delete=False, dir=self.temp_dir) _file.write(b"Testfile") From 954c5904e73790129f49669c9a8c164e472f763f Mon Sep 17 00:00:00 2001 From: Sam Liu Date: Thu, 13 May 2021 11:02:08 -0700 Subject: [PATCH 2/2] feat: Ignore .aws-sam when calculate cache md5 --- samcli/lib/build/build_strategy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samcli/lib/build/build_strategy.py b/samcli/lib/build/build_strategy.py index 829946b6bf..ecded3a743 100644 --- a/samcli/lib/build/build_strategy.py +++ b/samcli/lib/build/build_strategy.py @@ -214,7 +214,7 @@ def build_single_function_definition(self, build_definition: FunctionBuildDefini return self._delegate_build_strategy.build_single_function_definition(build_definition) code_dir = str(pathlib.Path(self._base_dir, cast(str, build_definition.codeuri)).resolve()) - source_md5 = dir_checksum(code_dir) + source_md5 = dir_checksum(code_dir, ignore_list=[".aws-sam"]) cache_function_dir = pathlib.Path(self._cache_dir, build_definition.uuid) function_build_results = {} @@ -253,7 +253,7 @@ def build_single_layer_definition(self, layer_definition: LayerBuildDefinition) Builds single layer definition with caching """ code_dir = str(pathlib.Path(self._base_dir, cast(str, layer_definition.codeuri)).resolve()) - source_md5 = dir_checksum(code_dir) + source_md5 = dir_checksum(code_dir, ignore_list=[".aws-sam"]) cache_function_dir = pathlib.Path(self._cache_dir, layer_definition.uuid) layer_build_result = {}