Skip to content

Commit

Permalink
Fix tree hashing with nested empty directories
Browse files Browse the repository at this point in the history
When testing for directories to exclude from hashing, we must exclude
not only empty directories, but also directories that themselves contain
nothing but empty directories; in essence, we suppress adding
directories that have no files contained within their entire subtree.

While fixing this, it seemed prudent to eliminate the `names` argument
to `tree_hash()`, especially as it was not actually used to iterate
over.

Fixes JuliaLang/julia#33979 (comment)

(cherry picked from commit 648d66c, PR #1522)
  • Loading branch information
staticfloat authored and fredrikekre committed Dec 3, 2019
1 parent 68a4cde commit 14f1ff4
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 11 deletions.
31 changes: 23 additions & 8 deletions src/GitTools.jl
Original file line number Diff line number Diff line change
Expand Up @@ -227,17 +227,30 @@ function blob_hash(path::AbstractString, HashType = SHA.SHA1_CTX)
return SHA.digest!(ctx)
end

"""
contains_no_files(root::AbstractString)
Helper function to determine whether a directory contains no files; e.g. it is
either empty or contains only other directories that contain nothing but other
directories. This is used to exclude directories from tree hashing.
"""
function contains_no_files(root::AbstractString)
for (root, dirs, files) in walkdir(root)
if !isempty(files)
return false
end
end
return true
end


"""
tree_hash(root::AbstractString)
Calculate the git tree hash of a given path. Note that attempting to take the
tree hash of an empty directory will throw an error.
"""
function tree_hash(
root::AbstractString,
names::Vector{String} = readdir(root);
HashType = SHA.SHA1_CTX,
)
function tree_hash(root::AbstractString; HashType = SHA.SHA1_CTX)
entries = Tuple{String, Vector{UInt8}, GitMode}[]
for f in readdir(root)
# Skip `.git` directories
Expand All @@ -248,9 +261,11 @@ function tree_hash(
filepath = abspath(root, f)
mode = gitmode(filepath)
if mode == mode_dir
names = readdir(filepath)
isempty(names) && continue
hash = tree_hash(filepath, names)
# If this directory contains no files, then skip it
contains_no_files(filepath) && continue

# Otherwise, hash it up!
hash = tree_hash(filepath)
else
hash = blob_hash(filepath)
end
Expand Down
9 changes: 6 additions & 3 deletions test/artifacts.jl
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,11 @@ end
end

# Empty directories do nothing to effect the hash, so we create one with a
# random name to prove that it does not get hashed into the rest.
mkpath(joinpath(path, Random.randstring(8)))
# random name to prove that it does not get hashed into the rest. Also, it
# turns out that life is cxomplex enough that we need to test the nested
# empty directories case as well.
rand_dir = joinpath(path, Random.randstring(8), "inner")
mkpath(rand_dir)

# Symlinks are not followed, even if they point to directories
symlink("foo3", joinpath(path, "foo3_link"))
Expand All @@ -147,7 +150,7 @@ end
cd(path) do
read(`git init .`)
read(`git add . `)
read(`git commit -m "foo"`)
read(`git commit --allow-empty -m "foo"`)
hash = chomp(String(read(`git log -1 --pretty='%T' HEAD`)))
println(hash)
end
Expand Down

0 comments on commit 14f1ff4

Please sign in to comment.