From 11f766797d31a90adf835d17eae8db2dc3fe6949 Mon Sep 17 00:00:00 2001 From: Simon Avery Date: Tue, 16 Jan 2024 21:35:31 -0800 Subject: [PATCH 01/15] WIP: Defining new function for computing the package verification code, while excluding symbolic links, returning an SpdxVerificationCodeV2 object. --- src/checksums.jl | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/src/checksums.jl b/src/checksums.jl index 06fec59..3758595 100644 --- a/src/checksums.jl +++ b/src/checksums.jl @@ -21,13 +21,14 @@ function spdxchecksum(algorithm::AbstractString, rootdir::AbstractString, exclud (algorithm == "SHA3-384") ? (sha3_384, SHA3_384_CTX) : (sha3_512, SHA3_512_CTX) - package_hash::Vector{UInt8}= spdxchecksum_sha(HashFunction, HashContext, rootdir, excluded_flist, excluded_dirlist, excluded_patterns) + package_hash, ignored_files= spdxchecksum_sha(HashFunction, HashContext, rootdir, excluded_flist, excluded_dirlist, excluded_patterns) - return package_hash + return (package_hash, ignored_files) end function spdxchecksum_sha(HashFunction::Function, HashContext::DataType, rootdir::AbstractString, excluded_flist::Vector{<:AbstractString}, excluded_dirlist::Vector{<:AbstractString}, excluded_patterns::Vector{Regex}) - flist_hash::Vector{Vector{UInt8}}= [file_hash(file, HashFunction) for file in getpackagefiles(rootdir, excluded_flist, excluded_dirlist, excluded_patterns)] + ignored_files= String[] + flist_hash::Vector{Vector{UInt8}}= [file_hash(file, HashFunction) for file in getpackagefiles(rootdir, excluded_flist, excluded_dirlist, excluded_patterns, ignored_files)] flist_hash= sort(flist_hash) ctx= HashContext() @@ -35,7 +36,7 @@ function spdxchecksum_sha(HashFunction::Function, HashContext::DataType, rootdir SHA.update!(ctx, hash) end - return SHA.digest!(ctx) + return (SHA.digest!(ctx), ignored_files) end file_hash(fpath::AbstractString, HashFunction::Function)= open(fpath) do f @@ -43,11 +44,11 @@ file_hash(fpath::AbstractString, HashFunction::Function)= open(fpath) do f end ############################### -function getpackagefiles(rootdir::AbstractString, excluded_flist::Vector{<:AbstractString}, excluded_dirlist::Vector{<:AbstractString}, excluded_patterns::Vector{Regex}) - return Channel{String}(chnl -> _getpackagefiles(chnl, rootdir, excluded_flist, excluded_dirlist, excluded_patterns)) +function getpackagefiles(rootdir, excluded_flist, excluded_dirlist, excluded_patterns, ignored_files) + return Channel{String}(chnl -> _getpackagefiles(chnl, rootdir, excluded_flist, excluded_dirlist, excluded_patterns, ignored_files)) end -function _getpackagefiles(chnl, root::AbstractString, excluded_flist::Vector{<:AbstractString}, excluded_dirlist::Vector{<:AbstractString}, excluded_patterns::Vector{Regex}) +function _getpackagefiles(chnl, root::AbstractString, excluded_flist::Vector{<:AbstractString}, excluded_dirlist::Vector{<:AbstractString}, excluded_patterns::Vector{Regex}, ignored_files::Vector{String}) # On first call of this function put an absolute path on root and exclusion lists isabspath(root) || (root= abspath(root)) all(isabspath.(excluded_flist)) || (excluded_flist= normpath.(joinpath.(root, excluded_flist))) @@ -62,16 +63,36 @@ function _getpackagefiles(chnl, root::AbstractString, excluded_flist::Vector{<:A if isdir(path) if any(excluded_dirlist .== path) continue # Skip over exluded directories + elseif islink(path) + push!(ignored_files, path) + continue # Skip over exluded directories else - _getpackagefiles(chnl, path, excluded_flist, excluded_dirlist, excluded_patterns) # Descend into the directory and get the files there + _getpackagefiles(chnl, path, excluded_flist, excluded_dirlist, excluded_patterns, ignored_files) # Descend into the directory and get the files there end elseif any(excluded_flist .== path) + push!(ignored_files, path) continue # Skip over excluded files elseif any(occursin.(excluded_patterns, path)) continue # Skip files that match one of the excluded patterns + elseif islink(path) + push!(ignored_files, path) # Any link that passes the previous checks is a part of the deployed code and it's exclusion from the computation needs to be noted + continue else push!(chnl, path) # Put the file path in the channel end end return nothing -end \ No newline at end of file +end + + +############################### +function ComputePackageVerificationCode(rootdir::AbstractString, excluded_flist::Vector{<:AbstractString}= String[], excluded_dirlist::Vector{<:AbstractString}= String[], excluded_patterns::Vector{Regex}=Regex[]) + package_hash, ignored_files= spdxchecksum_sha(sha1, SHA1_CTX, rootdir, excluded_flist, excluded_dirlist, excluded_patterns) + return SpdxPkgVerificationCodeV2(bytes2hex(package_hash), ignored_files) +end + + +############################### +#function ComputeFileChecksum() +# +#end \ No newline at end of file From b53237a2378f7ec5356e333daf8ac43dfed1bd41 Mon Sep 17 00:00:00 2001 From: Simon Avery Date: Wed, 24 Jan 2024 21:36:04 -0800 Subject: [PATCH 02/15] Add logging to the computation of the verification code --- Project.toml | 1 + src/SPDX.jl | 1 + src/checksums.jl | 6 +++++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 48e1e6e..285d55e 100644 --- a/Project.toml +++ b/Project.toml @@ -10,6 +10,7 @@ JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce" TimeZones = "f269a46b-ccf7-5d73-abea-4c690281aa53" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" +Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" [compat] DataStructures = "0.18" diff --git a/src/SPDX.jl b/src/SPDX.jl index 0a9ba41..2b5460d 100644 --- a/src/SPDX.jl +++ b/src/SPDX.jl @@ -8,6 +8,7 @@ using UUIDs using TimeZones using SHA using Base.Filesystem +using Logging ####################### Base.Bool(x::AbstractString)= parse(Bool, lowercase(x)) diff --git a/src/checksums.jl b/src/checksums.jl index 3758595..3cc84aa 100644 --- a/src/checksums.jl +++ b/src/checksums.jl @@ -40,6 +40,7 @@ function spdxchecksum_sha(HashFunction::Function, HashContext::DataType, rootdir end file_hash(fpath::AbstractString, HashFunction::Function)= open(fpath) do f + @logmsg Logging.LogLevel(-100) "File hashed: $fpath" return HashFunction(f) end @@ -87,8 +88,11 @@ end ############################### function ComputePackageVerificationCode(rootdir::AbstractString, excluded_flist::Vector{<:AbstractString}= String[], excluded_dirlist::Vector{<:AbstractString}= String[], excluded_patterns::Vector{Regex}=Regex[]) + @logmsg Logging.LogLevel(-50) "Computing Verification Code at: $rootdir" excluded_flist= excluded_flist excluded_dirlist= excluded_dirlist excluded_patterns= excluded_patterns package_hash, ignored_files= spdxchecksum_sha(sha1, SHA1_CTX, rootdir, excluded_flist, excluded_dirlist, excluded_patterns) - return SpdxPkgVerificationCodeV2(bytes2hex(package_hash), ignored_files) + verif_code= SpdxPkgVerificationCodeV2(bytes2hex(package_hash), ignored_files) + @logmsg Logging.LogLevel(-50) string(verif_code) + return verif_code end From 7ebdcc33fa7d5e1633de87ed704ed82f9adc3f4d Mon Sep 17 00:00:00 2001 From: Simon Avery Date: Wed, 24 Jan 2024 22:47:11 -0800 Subject: [PATCH 03/15] In ComputePackageVerificationCode, set the path of excluded files relative to the root directory --- src/checksums.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/checksums.jl b/src/checksums.jl index 3cc84aa..d642db7 100644 --- a/src/checksums.jl +++ b/src/checksums.jl @@ -90,6 +90,7 @@ end function ComputePackageVerificationCode(rootdir::AbstractString, excluded_flist::Vector{<:AbstractString}= String[], excluded_dirlist::Vector{<:AbstractString}= String[], excluded_patterns::Vector{Regex}=Regex[]) @logmsg Logging.LogLevel(-50) "Computing Verification Code at: $rootdir" excluded_flist= excluded_flist excluded_dirlist= excluded_dirlist excluded_patterns= excluded_patterns package_hash, ignored_files= spdxchecksum_sha(sha1, SHA1_CTX, rootdir, excluded_flist, excluded_dirlist, excluded_patterns) + ignored_files= relpath.(ignored_files, rootdir) verif_code= SpdxPkgVerificationCodeV2(bytes2hex(package_hash), ignored_files) @logmsg Logging.LogLevel(-50) string(verif_code) return verif_code From e74301f20e56111f1de651380cd014dc0aad7a54 Mon Sep 17 00:00:00 2001 From: Simon Avery Date: Fri, 26 Jan 2024 20:39:08 -0800 Subject: [PATCH 04/15] Improve the file hashing logging statement --- src/checksums.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/checksums.jl b/src/checksums.jl index d642db7..11dbbca 100644 --- a/src/checksums.jl +++ b/src/checksums.jl @@ -40,8 +40,9 @@ function spdxchecksum_sha(HashFunction::Function, HashContext::DataType, rootdir end file_hash(fpath::AbstractString, HashFunction::Function)= open(fpath) do f - @logmsg Logging.LogLevel(-100) "File hashed: $fpath" - return HashFunction(f) + hash= HashFunction(f) + @logmsg Logging.LogLevel(-100) "$(string(HashFunction))($fpath)= $(bytes2hex(hash))" + return hash end ############################### From 8b7679ecd37e17ad4f06385ef2e0de9b1dcc57b2 Mon Sep 17 00:00:00 2001 From: Simon Avery Date: Sat, 27 Jan 2024 18:58:29 -0800 Subject: [PATCH 05/15] New function for computing the checksum of a single file. The function spdxchecksum is removed. --- src/checksums.jl | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/checksums.jl b/src/checksums.jl index 11dbbca..892df14 100644 --- a/src/checksums.jl +++ b/src/checksums.jl @@ -1,8 +1,8 @@ # SPDX-License-Identifier: MIT -export spdxchecksum +export ComputePackageVerificationCode, ComputeFileChecksum -function spdxchecksum(algorithm::AbstractString, rootdir::AbstractString, excluded_flist::Vector{<:AbstractString}= String[], excluded_dirlist::Vector{<:AbstractString}= String[], excluded_patterns::Vector{Regex}=Regex[]) +function determine_checksum_algorithm(algorithm::AbstractString) # Check to see if algorithm is in the list of support algorithms, unsupported algorithms, or not recognized # TODO: substitute "_" for "-" and other things to account for user typos supported_algorithms= Set(["SHA1", "SHA224", "SHA256", "SHA384", "SHA512", "SHA3-256", "SHA3-384", "SHA3-512"]) @@ -21,9 +21,7 @@ function spdxchecksum(algorithm::AbstractString, rootdir::AbstractString, exclud (algorithm == "SHA3-384") ? (sha3_384, SHA3_384_CTX) : (sha3_512, SHA3_512_CTX) - package_hash, ignored_files= spdxchecksum_sha(HashFunction, HashContext, rootdir, excluded_flist, excluded_dirlist, excluded_patterns) - - return (package_hash, ignored_files) + return (HashFunction, HashContext) end function spdxchecksum_sha(HashFunction::Function, HashContext::DataType, rootdir::AbstractString, excluded_flist::Vector{<:AbstractString}, excluded_dirlist::Vector{<:AbstractString}, excluded_patterns::Vector{Regex}) @@ -99,6 +97,9 @@ end ############################### -#function ComputeFileChecksum() -# -#end \ No newline at end of file +function ComputeFileChecksum(algorithm::AbstractString, filepath::AbstractString) + @logmsg Logging.LogLevel(-50) "Computing File Checksum on $filepath" + HashFunction, HashContext= determine_checksum_algorithm(algorithm) + fhash= file_hash(filepath, HashFunction) + return SpdxChecksumV2(algorithm, bytes2hex(fhash)) +end \ No newline at end of file From 3d4b98e6a6ca6c50eac561392cdec95479524906 Mon Sep 17 00:00:00 2001 From: Simon Avery Date: Sat, 27 Jan 2024 19:02:43 -0800 Subject: [PATCH 06/15] Logging message tweak --- src/checksums.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/checksums.jl b/src/checksums.jl index 892df14..faf1b97 100644 --- a/src/checksums.jl +++ b/src/checksums.jl @@ -101,5 +101,7 @@ function ComputeFileChecksum(algorithm::AbstractString, filepath::AbstractString @logmsg Logging.LogLevel(-50) "Computing File Checksum on $filepath" HashFunction, HashContext= determine_checksum_algorithm(algorithm) fhash= file_hash(filepath, HashFunction) - return SpdxChecksumV2(algorithm, bytes2hex(fhash)) + checksum_obj= SpdxChecksumV2(algorithm, bytes2hex(fhash)) + @logmsg Logging.LogLevel(-50) string(checksum_obj) + return checksum_obj end \ No newline at end of file From 8730c5f3b4a4714c4e01cc87d9a8ba6ff809249f Mon Sep 17 00:00:00 2001 From: Simon Avery Date: Sat, 27 Jan 2024 20:51:41 -0800 Subject: [PATCH 07/15] On further review of the SPDX specification, updating the algorithm for computing a package verification code. --- src/checksums.jl | 41 ++++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/src/checksums.jl b/src/checksums.jl index faf1b97..08f64f3 100644 --- a/src/checksums.jl +++ b/src/checksums.jl @@ -12,35 +12,30 @@ function determine_checksum_algorithm(algorithm::AbstractString) issubset(Set([algorithm]), unsupported_algorithms) && error("checksum(): The hash algorithm $(algorithm) is not supported by SPDX.jl") issubset(Set([algorithm]), supported_algorithms) || error("checksum(): algorithm $(algorithm) is not recognized") - HashFunction, HashContext= (algorithm == "SHA1") ? (sha1, SHA1_CTX) : - (algorithm == "SHA224") ? (sha224, SHA224_CTX) : - (algorithm == "SHA256") ? (sha256, SHA256_CTX) : - (algorithm == "SHA384") ? (sha384, SHA384_CTX) : - (algorithm == "SHA512") ? (sha512, SHA256_CTX) : - (algorithm == "SHA3-256") ? (sha3_256, SHA3_256_CTX) : - (algorithm == "SHA3-384") ? (sha3_384, SHA3_384_CTX) : - (sha3_512, SHA3_512_CTX) - - return (HashFunction, HashContext) + HashFunction= (algorithm == "SHA1") ? sha1 : + (algorithm == "SHA224") ? sha224 : + (algorithm == "SHA256") ? sha256 : + (algorithm == "SHA384") ? sha384 : + (algorithm == "SHA512") ? sha512 : + (algorithm == "SHA3-256") ? sha3_256 : + (algorithm == "SHA3-384") ? sha3_384 : + sha3_512 + + return HashFunction end -function spdxchecksum_sha(HashFunction::Function, HashContext::DataType, rootdir::AbstractString, excluded_flist::Vector{<:AbstractString}, excluded_dirlist::Vector{<:AbstractString}, excluded_patterns::Vector{Regex}) +function spdxchecksum(HashFunction::Function, rootdir::AbstractString, excluded_flist::Vector{<:AbstractString}, excluded_dirlist::Vector{<:AbstractString}, excluded_patterns::Vector{Regex}) ignored_files= String[] - flist_hash::Vector{Vector{UInt8}}= [file_hash(file, HashFunction) for file in getpackagefiles(rootdir, excluded_flist, excluded_dirlist, excluded_patterns, ignored_files)] + flist_hash::Vector{String}= [file_hash(file, HashFunction) for file in getpackagefiles(rootdir, excluded_flist, excluded_dirlist, excluded_patterns, ignored_files)] flist_hash= sort(flist_hash) - - ctx= HashContext() - for hash in flist_hash - SHA.update!(ctx, hash) - end - - return (SHA.digest!(ctx), ignored_files) + combined_hashes= join(flist_hash) + return (HashFunction(combined_hashes), ignored_files) end file_hash(fpath::AbstractString, HashFunction::Function)= open(fpath) do f hash= HashFunction(f) @logmsg Logging.LogLevel(-100) "$(string(HashFunction))($fpath)= $(bytes2hex(hash))" - return hash + return bytes2hex(hash) end ############################### @@ -88,7 +83,7 @@ end ############################### function ComputePackageVerificationCode(rootdir::AbstractString, excluded_flist::Vector{<:AbstractString}= String[], excluded_dirlist::Vector{<:AbstractString}= String[], excluded_patterns::Vector{Regex}=Regex[]) @logmsg Logging.LogLevel(-50) "Computing Verification Code at: $rootdir" excluded_flist= excluded_flist excluded_dirlist= excluded_dirlist excluded_patterns= excluded_patterns - package_hash, ignored_files= spdxchecksum_sha(sha1, SHA1_CTX, rootdir, excluded_flist, excluded_dirlist, excluded_patterns) + package_hash, ignored_files= spdxchecksum(sha1, rootdir, excluded_flist, excluded_dirlist, excluded_patterns) ignored_files= relpath.(ignored_files, rootdir) verif_code= SpdxPkgVerificationCodeV2(bytes2hex(package_hash), ignored_files) @logmsg Logging.LogLevel(-50) string(verif_code) @@ -99,9 +94,9 @@ end ############################### function ComputeFileChecksum(algorithm::AbstractString, filepath::AbstractString) @logmsg Logging.LogLevel(-50) "Computing File Checksum on $filepath" - HashFunction, HashContext= determine_checksum_algorithm(algorithm) + HashFunction= determine_checksum_algorithm(algorithm) fhash= file_hash(filepath, HashFunction) - checksum_obj= SpdxChecksumV2(algorithm, bytes2hex(fhash)) + checksum_obj= SpdxChecksumV2(algorithm, fhash) @logmsg Logging.LogLevel(-50) string(checksum_obj) return checksum_obj end \ No newline at end of file From 5ff20acb9c49d6bed37e4ba7b19c44dc4bd75baf Mon Sep 17 00:00:00 2001 From: Simon Avery Date: Sat, 27 Jan 2024 21:07:48 -0800 Subject: [PATCH 08/15] Checksum function cleanup --- src/checksums.jl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/checksums.jl b/src/checksums.jl index 08f64f3..16a1826 100644 --- a/src/checksums.jl +++ b/src/checksums.jl @@ -24,14 +24,15 @@ function determine_checksum_algorithm(algorithm::AbstractString) return HashFunction end -function spdxchecksum(HashFunction::Function, rootdir::AbstractString, excluded_flist::Vector{<:AbstractString}, excluded_dirlist::Vector{<:AbstractString}, excluded_patterns::Vector{Regex}) +function spdxverifcode(rootdir::AbstractString, excluded_flist::Vector{<:AbstractString}, excluded_dirlist::Vector{<:AbstractString}, excluded_patterns::Vector{Regex}) ignored_files= String[] - flist_hash::Vector{String}= [file_hash(file, HashFunction) for file in getpackagefiles(rootdir, excluded_flist, excluded_dirlist, excluded_patterns, ignored_files)] + flist_hash::Vector{String}= [file_hash(file, sha1) for file in getpackagefiles(rootdir, excluded_flist, excluded_dirlist, excluded_patterns, ignored_files)] flist_hash= sort(flist_hash) combined_hashes= join(flist_hash) - return (HashFunction(combined_hashes), ignored_files) + return (sha1(combined_hashes), ignored_files) end +############################### file_hash(fpath::AbstractString, HashFunction::Function)= open(fpath) do f hash= HashFunction(f) @logmsg Logging.LogLevel(-100) "$(string(HashFunction))($fpath)= $(bytes2hex(hash))" @@ -83,7 +84,7 @@ end ############################### function ComputePackageVerificationCode(rootdir::AbstractString, excluded_flist::Vector{<:AbstractString}= String[], excluded_dirlist::Vector{<:AbstractString}= String[], excluded_patterns::Vector{Regex}=Regex[]) @logmsg Logging.LogLevel(-50) "Computing Verification Code at: $rootdir" excluded_flist= excluded_flist excluded_dirlist= excluded_dirlist excluded_patterns= excluded_patterns - package_hash, ignored_files= spdxchecksum(sha1, rootdir, excluded_flist, excluded_dirlist, excluded_patterns) + package_hash, ignored_files= spdxverifcode(rootdir, excluded_flist, excluded_dirlist, excluded_patterns) ignored_files= relpath.(ignored_files, rootdir) verif_code= SpdxPkgVerificationCodeV2(bytes2hex(package_hash), ignored_files) @logmsg Logging.LogLevel(-50) string(verif_code) From 37890e348a4c01991a705fbe22123c38f8a099fb Mon Sep 17 00:00:00 2001 From: Simon Avery Date: Sat, 27 Jan 2024 21:08:44 -0800 Subject: [PATCH 09/15] Whitespace --- src/checksums.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/checksums.jl b/src/checksums.jl index 16a1826..857b8cb 100644 --- a/src/checksums.jl +++ b/src/checksums.jl @@ -24,6 +24,8 @@ function determine_checksum_algorithm(algorithm::AbstractString) return HashFunction end + +############################### function spdxverifcode(rootdir::AbstractString, excluded_flist::Vector{<:AbstractString}, excluded_dirlist::Vector{<:AbstractString}, excluded_patterns::Vector{Regex}) ignored_files= String[] flist_hash::Vector{String}= [file_hash(file, sha1) for file in getpackagefiles(rootdir, excluded_flist, excluded_dirlist, excluded_patterns, ignored_files)] @@ -32,6 +34,7 @@ function spdxverifcode(rootdir::AbstractString, excluded_flist::Vector{<:Abstrac return (sha1(combined_hashes), ignored_files) end + ############################### file_hash(fpath::AbstractString, HashFunction::Function)= open(fpath) do f hash= HashFunction(f) @@ -39,6 +42,7 @@ file_hash(fpath::AbstractString, HashFunction::Function)= open(fpath) do f return bytes2hex(hash) end + ############################### function getpackagefiles(rootdir, excluded_flist, excluded_dirlist, excluded_patterns, ignored_files) return Channel{String}(chnl -> _getpackagefiles(chnl, rootdir, excluded_flist, excluded_dirlist, excluded_patterns, ignored_files)) From bfb8d575a3a23aea96bd58d530134a676f4370cf Mon Sep 17 00:00:00 2001 From: Simon Avery Date: Sat, 27 Jan 2024 21:22:38 -0800 Subject: [PATCH 10/15] Update CHANGELOG --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6cf827b..f4545eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # CHANGELOG +## New Version +* On further review on the SPDX specification, updated the algorithm for computing a package verification code +* Replaced the function spdxchecksum() with ComputePackageVerificationCode() and ComputeFileChecksum() +* Resolved [#40](https://github.com/SamuraiAku/SPDX.jl/issues/40): Handling of symbolic links when computing the package verification code +* Resolved [#29](https://github.com/SamuraiAku/SPDX.jl/issues/29): Support checksum calculation on a single file +* Resolved [#28](https://github.com/SamuraiAku/SPDX.jl/issues/28): Use the Logging standard library to record all the files processed and their checksums + ## v0.3.2 * Add lots of tests to improve Code Coverage From 71399f4900ed04376578c5020fc610e6d083ddee Mon Sep 17 00:00:00 2001 From: Simon Avery Date: Sat, 27 Jan 2024 21:30:26 -0800 Subject: [PATCH 11/15] Update tests --- test/test_checksums.jl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/test/test_checksums.jl b/test/test_checksums.jl index 9a29b19..46593f6 100644 --- a/test/test_checksums.jl +++ b/test/test_checksums.jl @@ -1,4 +1,7 @@ @testset "checksums" begin - checksum= spdxchecksum("SHA1", pkgdir(SPDX), String["SPDX.spdx.json"], String[".git"]) - @test checksum isa Vector{UInt8} # No good way to indepently verify that the calculation is correct. + verifcode= ComputePackageVerificationCode(pkgdir(SPDX), String["SPDX.spdx.json"], String[".git"]) + @test verifcode isa SpdxPkgVerificationCodeV2 # No good way to indepently verify that the calculation is correct. + + checksum= ComputeFileChecksum("SHA256", joinpath(pkgdir(SPDX), "Project.toml")) + @test checksum isa SpdxChecksumV2 # No good way to indepently verify that the calculation is correct. end \ No newline at end of file From a4ba1ab07f73576936d539feb835491ee7d9ddcd Mon Sep 17 00:00:00 2001 From: Simon Avery Date: Sun, 28 Jan 2024 22:57:14 -0800 Subject: [PATCH 12/15] Improves testing of ComputePackageVerificationCode() and ComputeFileChecksum() --- test/runtests.jl | 1 + test/test_checksums.jl | 9 ++++++++- test/test_package/dir_link | 1 + test/test_package/file1.txt | 1 + test/test_package/file2.txt | 1 + test/test_package/file_link | 1 + test/test_package/src/bad_link | 1 + test/test_package/src/file3.txt | 1 + 8 files changed, 15 insertions(+), 1 deletion(-) create mode 120000 test/test_package/dir_link create mode 100644 test/test_package/file1.txt create mode 100644 test/test_package/file2.txt create mode 120000 test/test_package/file_link create mode 120000 test/test_package/src/bad_link create mode 100644 test/test_package/src/file3.txt diff --git a/test/runtests.jl b/test/runtests.jl index f251fed..785db18 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,6 +5,7 @@ using Test using JSON using Dates using TimeZones +using SHA @testset "Bool check" begin @test Bool(" True ") diff --git a/test/test_checksums.jl b/test/test_checksums.jl index 46593f6..927aa3e 100644 --- a/test/test_checksums.jl +++ b/test/test_checksums.jl @@ -1,7 +1,14 @@ @testset "checksums" begin verifcode= ComputePackageVerificationCode(pkgdir(SPDX), String["SPDX.spdx.json"], String[".git"]) @test verifcode isa SpdxPkgVerificationCodeV2 # No good way to indepently verify that the calculation is correct. + @test issubset(["SPDX.spdx.json"], verifcode.ExcludedFiles) checksum= ComputeFileChecksum("SHA256", joinpath(pkgdir(SPDX), "Project.toml")) - @test checksum isa SpdxChecksumV2 # No good way to indepently verify that the calculation is correct. + @test checksum isa SpdxChecksumV2 + @test checksum.Hash == open(joinpath(pkgdir(SPDX), "Project.toml")) do f + return bytes2hex(sha256(f)) + end + + linktest_code= ComputePackageVerificationCode(joinpath(pkgdir(SPDX), "test", "test_package")) + @test issetequal(linktest_code.ExcludedFiles, ["dir_link", "file_link", "src/bad_link"]) end \ No newline at end of file diff --git a/test/test_package/dir_link b/test/test_package/dir_link new file mode 120000 index 0000000..e831038 --- /dev/null +++ b/test/test_package/dir_link @@ -0,0 +1 @@ +src \ No newline at end of file diff --git a/test/test_package/file1.txt b/test/test_package/file1.txt new file mode 100644 index 0000000..4dd1ef7 --- /dev/null +++ b/test/test_package/file1.txt @@ -0,0 +1 @@ +This is a file. diff --git a/test/test_package/file2.txt b/test/test_package/file2.txt new file mode 100644 index 0000000..35eaca1 --- /dev/null +++ b/test/test_package/file2.txt @@ -0,0 +1 @@ +This is also a file diff --git a/test/test_package/file_link b/test/test_package/file_link new file mode 120000 index 0000000..c3ee11c --- /dev/null +++ b/test/test_package/file_link @@ -0,0 +1 @@ +file2.txt \ No newline at end of file diff --git a/test/test_package/src/bad_link b/test/test_package/src/bad_link new file mode 120000 index 0000000..b93736b --- /dev/null +++ b/test/test_package/src/bad_link @@ -0,0 +1 @@ +../temp.txt \ No newline at end of file diff --git a/test/test_package/src/file3.txt b/test/test_package/src/file3.txt new file mode 100644 index 0000000..050a3a6 --- /dev/null +++ b/test/test_package/src/file3.txt @@ -0,0 +1 @@ +This is a file as well. From 94cef9c9a930d392982194cc15c165735c5dd676 Mon Sep 17 00:00:00 2001 From: Simon Avery Date: Mon, 29 Jan 2024 22:56:02 -0800 Subject: [PATCH 13/15] Update the README --- README.md | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 8215296..b027718 100644 --- a/README.md +++ b/README.md @@ -62,11 +62,28 @@ updatenamespace!(myDoc) # Updates only the UUID portion of the namespace setcreationtime!(myDoc) # Sets document creation time to the local time, taking the local timezone into account -# Compute a verification code or checksum of a directory [Clauses 7.9, 7.10] +# Compute a verification code of a package directory [Clause 7.9] +# Returns object of type SpdxPkgVerificationCodeV2 +# NOTES: +# Files that are excluded by name are included in the ExcludedFiles property +# Symbolic links are automatically excluded from the computation and included in the ExcludedFiles property, unless the links are inside an excluded directory or pattern +# Directories and excluded patterns (not shown in example below) are NOT included in the ExcludedFiles property. The reasoning being that these are temporary/version control locations that are not part of the released package. +# +# Example Call: Compute a verification code that ignores a specific file and a .git directory at the root level. A common usage pattern. +verif_code= ComputePackageVerificationCode("/path/to/pkgdir", ["IgnoreThisFile.spdx.json"], [".git"]) # +# Example Return: +# e0b4c73534bc495ebf43effa633b424d52899183 (excludes: IgnoreThisFile.spdx.json link_to_file) +# Logging: +# If LoggingLevel is set to -100 or lower, then a full file listing will be logged along with the hash of each file for user review. See the documention of Julia standard logging facilities for details. + + +# Compute the checksum of a package tarball [Clause 7.10] +# Returns object of type SpdxChecksumV2 # Supported checksum algorithms are: # ["SHA1", "SHA224", "SHA256", "SHA384", "SHA512", "SHA3-256", "SHA3-384", "SHA3-512"] -spdxchecksum("SHA1", "/path/to/dir", ["IgnoreThisFile.spdx.json"], [".git"]) # Compute a checksum that ignores a specific file and a .git directory at the root level. A common usage pattern. - +file_cksum= ComputeFileChecksum(("SHA256", "/path/to/package.tar.gz") +# Example Return: +# SHA256: 4b1dfe7b8886825527a362ee37244a665a32f68d9e7ca53c521dfec9ae8cd41a ``` ## SPDX Document Structure From c05ea4c96de61de30dd0cbb221429cbaa9b0723f Mon Sep 17 00:00:00 2001 From: Simon Avery Date: Mon, 29 Jan 2024 23:10:22 -0800 Subject: [PATCH 14/15] Add more logging --- src/checksums.jl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/checksums.jl b/src/checksums.jl index 857b8cb..d54cd9f 100644 --- a/src/checksums.jl +++ b/src/checksums.jl @@ -62,23 +62,28 @@ function _getpackagefiles(chnl, root::AbstractString, excluded_flist::Vector{<:A for path in content if isdir(path) if any(excluded_dirlist .== path) - continue # Skip over exluded directories + @logmsg Logging.LogLevel(-100) "Skipping Directory $path" + continue # Skip over excluded directories elseif islink(path) push!(ignored_files, path) - continue # Skip over exluded directories + @logmsg Logging.LogLevel(-100) "Excluding symbolic link $path" + continue # Skip over excluded directories else _getpackagefiles(chnl, path, excluded_flist, excluded_dirlist, excluded_patterns, ignored_files) # Descend into the directory and get the files there end elseif any(excluded_flist .== path) + @logmsg Logging.LogLevel(-100) "Excluding File $path" push!(ignored_files, path) continue # Skip over excluded files elseif any(occursin.(excluded_patterns, path)) + @logmsg Logging.LogLevel(-100) "Ignoring $path which matches an excluded pattern" pattern_regexes= excluded_patterns continue # Skip files that match one of the excluded patterns elseif islink(path) + @logmsg Logging.LogLevel(-100) "Excluding symbolic link $path" push!(ignored_files, path) # Any link that passes the previous checks is a part of the deployed code and it's exclusion from the computation needs to be noted continue else - push!(chnl, path) # Put the file path in the channel + push!(chnl, path) # Put the file path in the channel. Then block until it is taken end end return nothing From d4132c96bab5c480f086b86007623d9a1154108f Mon Sep 17 00:00:00 2001 From: Simon Avery Date: Mon, 29 Jan 2024 23:21:26 -0800 Subject: [PATCH 15/15] Cleanuo --- src/checksums.jl | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/checksums.jl b/src/checksums.jl index d54cd9f..988576c 100644 --- a/src/checksums.jl +++ b/src/checksums.jl @@ -63,25 +63,20 @@ function _getpackagefiles(chnl, root::AbstractString, excluded_flist::Vector{<:A if isdir(path) if any(excluded_dirlist .== path) @logmsg Logging.LogLevel(-100) "Skipping Directory $path" - continue # Skip over excluded directories elseif islink(path) push!(ignored_files, path) @logmsg Logging.LogLevel(-100) "Excluding symbolic link $path" - continue # Skip over excluded directories else _getpackagefiles(chnl, path, excluded_flist, excluded_dirlist, excluded_patterns, ignored_files) # Descend into the directory and get the files there end elseif any(excluded_flist .== path) @logmsg Logging.LogLevel(-100) "Excluding File $path" push!(ignored_files, path) - continue # Skip over excluded files elseif any(occursin.(excluded_patterns, path)) @logmsg Logging.LogLevel(-100) "Ignoring $path which matches an excluded pattern" pattern_regexes= excluded_patterns - continue # Skip files that match one of the excluded patterns elseif islink(path) @logmsg Logging.LogLevel(-100) "Excluding symbolic link $path" - push!(ignored_files, path) # Any link that passes the previous checks is a part of the deployed code and it's exclusion from the computation needs to be noted - continue + push!(ignored_files, path) # Any link that passes the previous checks is a part of the deployed code and it's exclusion from the computation needs to be noted else push!(chnl, path) # Put the file path in the channel. Then block until it is taken end @@ -96,7 +91,7 @@ function ComputePackageVerificationCode(rootdir::AbstractString, excluded_flist: package_hash, ignored_files= spdxverifcode(rootdir, excluded_flist, excluded_dirlist, excluded_patterns) ignored_files= relpath.(ignored_files, rootdir) verif_code= SpdxPkgVerificationCodeV2(bytes2hex(package_hash), ignored_files) - @logmsg Logging.LogLevel(-50) string(verif_code) + @logmsg Logging.LogLevel(-50) "Verification Code= $(string(verif_code))" return verif_code end