diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ccae327..017975d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,11 @@ with `HEAD` being symbolic reference to an annotated tag. This usually happens via `git symbolic-ref HEAD`. ([#347](https://github.com/crashappsec/chalk/pull/347)) +- Chalk misreported annotated git tag as not annotated. + To ensure tag is up-to-date with origin, chalk refetches + regular tags (not annotated) from origin. To customize + this behavior use `git.refetch_lightweight_tags` config. + ([#349](https://github.com/crashappsec/chalk/pull/349)) ## 0.4.5 diff --git a/src/configs/base_init.c4m b/src/configs/base_init.c4m index a3181f82..9d4ac22e 100644 --- a/src/configs/base_init.c4m +++ b/src/configs/base_init.c4m @@ -25,6 +25,8 @@ docker { } load { } +git {} + cloud_provider { cloud_instance_hw_identifiers { } } diff --git a/src/configs/chalk.c42spec b/src/configs/chalk.c42spec index ae2a82e1..58480698 100644 --- a/src/configs/chalk.c42spec +++ b/src/configs/chalk.c42spec @@ -85,7 +85,6 @@ object key { gen_typename: "KeyConfig" gen_setters: false doc: """ - These objects are used in reporting templates and chalking templates to help determine what to produce. The two fields in this object are: @@ -124,7 +123,6 @@ object mark_template { gen_typename: "MarkTemplate" gen_setters: false doc: """ - # Chalk Mark Templates Chalk decides what metadata keys should be added to a chalk mark based @@ -266,7 +264,6 @@ object report_template { gen_typename: "ReportTemplate" gen_setters: false doc: """ - # Report Templates Report templates specify what metadata gets added into reports. You @@ -326,7 +323,6 @@ object tool { gen_typename: "ToolInfo" gen_setters: false doc: """ - Tool sections allow you to automatically run external tools for collecting metadata, for tool types that are known to chalk (This doesn't preclude chalk from providing its own collection for these @@ -409,7 +405,6 @@ be used directly, or can be called if you want to do something fancier. type: func (string) -> bool require: true doc: """ - A callback used when implementing tools. You must implement this for any new tool you add, even if you have no intention of ever attempting an actual installation (in which case, it can simply return false). @@ -982,7 +977,6 @@ object sink { user_def_ok: true validator: func sink_object_check doc: """ - This object type is needed to add new data sinks to chalk. If you're not a Chalk developer, this probably isn't going to be particularly useful; Instead, use `sink_config` to configure a sink, and then @@ -1410,7 +1404,6 @@ object outconf { gen_setters: false user_def_ok: false doc: """ - ## Changing reports for operations Each chalk operation that reports metadata will have one or more @@ -1665,7 +1658,6 @@ singleton extract { gen_setters: false user_def_ok: false doc: """ - These are configuration options specific to how container extraction works for containers (plenty of the global options apply to extraction). Currently, the only options involve how we handle looking @@ -1977,6 +1969,39 @@ ENV ARTIFACT_IDENTIFIER="X6VRPZ-C828-KDNS-QDXRT0" } } +singleton git { + gen_fieldname: "gitConfig" + gen_typename: "GitConfig" + gen_setters: false + user_def_ok: false + doc: """ +Options how chalk interacts with git. +""" + + field refetch_lightweight_tags { + type: bool + default: true + shortdoc: "Refetch latest tag from origin" + doc: """ +During chalk insertion, when chalk encounters a git tag, +there is a possibility the tag might not be up to date. +For example if repo is fetched via: + +``` +git fetch origin --force +:refs/tags/ +``` + +Git will explicitly create tag locally which will point to the commit. +This might not be accurate as the tag might be annotated in origin. +As such chalk will not be able to report accurately metadata about the tag +such as date tagged, tagger, etc. + +When this config is true, chalk will refetch lightweight tags (not annotated) +from the origin to ensure its local definition is up to date. +""" + } +} + singleton load { gen_fieldname: "loadConfig" gen_typename: "LoadConfig" @@ -1993,7 +2018,6 @@ configuration being loaded. default: false shortdoc: "Replace on load" doc: """ - When this value is true, the entire stored configuration file will be REPLACED with the specified configuration, as long as that configuration loads successfully. @@ -2255,7 +2279,6 @@ singleton env_config { gen_setters: false user_def_ok: false doc: """ - This section is for internal configuration information gathering runtime environment information when running with the 'env' command, which is similar to the exec command, but where the exec command @@ -2343,7 +2366,6 @@ unless you provide a custom callback. default: false shortdoc: "Marking requires +x" doc: """ - When this is true, Chalk will not attempt to mark source code *unless* the executable bit is set. However, the execute bit can get added later; it's a trade-off! @@ -2669,6 +2691,7 @@ root { allow cloud_provider allow tech_stack_rule allow linguist_language + allow git shortdoc: "Chalk Configuration Options" @@ -2750,7 +2773,6 @@ then chalk will produce a help message. write_lock: false shortdoc: "The currently running command" doc: """ - Once the command line is fully parsed, this will get the value of the selected command. If the command is ambiguous, fill it in with the value 'default_commmand'. @@ -2936,7 +2958,6 @@ Set the default path to search for artifacts, unless overridden by command-line require: false shortdoc: "Specify a default place for /tmp files if needed" doc: """ - Generally, systems use `/tmp` for temporary files, and most modern API interfaces to using `/tmp` take mitigation against file-based race conditions, for instance, by leveraging per-app directories and diff --git a/src/docker/git.nim b/src/docker/git.nim index 12a387a1..da5f95cc 100644 --- a/src/docker/git.nim +++ b/src/docker/git.nim @@ -5,7 +5,7 @@ ## (see https://crashoverride.com/docs/chalk) ## import std/[base64, strutils, uri] -import ".."/[config, util] +import ".."/[config, git, util] import "."/[base] const @@ -15,13 +15,6 @@ const GIT_USER = "x-access-token" DEFAULT_BRANCH = "main" -proc setGitExeLocation() = - once: - gitExeLocation = util.findExePath("git").get("") - if gitExeLocation == "": - error("No git command found in PATH") - raise newException(ValueError, "No git") - proc setSshKeyscanExeLocation() = once: sshKeyscanExeLocation = util.findExePath("ssh-keyscan").get("") @@ -152,7 +145,7 @@ proc run(git: DockerGitContext, # therefore the cd here is required so that git operations # are isolated in their own directory withWorkingDir(git.tmpGitDir): - result = runCmdGetEverything(gitExeLocation, allArgs.raw()) + result = runCmdGetEverything(getGitExeLocation(), allArgs.raw()) if strict and result.exitCode != 0: error("Failed to run git " & allArgs.redacted().join(" ")) error(strip(result.stdOut & result.stdErr)) diff --git a/src/git.nim b/src/git.nim new file mode 100644 index 00000000..665a2237 --- /dev/null +++ b/src/git.nim @@ -0,0 +1,23 @@ +## +## Copyright (c) 2024, Crash Override, Inc. +## +## This file is part of Chalk +## (see https://crashoverride.com/docs/chalk) +## + +import "."/[config, util] + +proc setGitExeLocation*() = + once: + gitExeLocation = util.findExePath("git").get("") + if gitExeLocation == "": + error("No git command found in PATH") + raise newException(ValueError, "No git") + +proc getGitExeLocation*(): string = + once: + try: + setGitExeLocation() + except: + discard + return gitExeLocation diff --git a/src/plugins/vctlGit.nim b/src/plugins/vctlGit.nim index b27fc61c..2cfbcc8f 100644 --- a/src/plugins/vctlGit.nim +++ b/src/plugins/vctlGit.nim @@ -10,7 +10,7 @@ import std/[algorithm, nativesockets, sequtils, times] import pkg/[zippy, zippy/inflate] -import ".."/[config, plugin_api, util] +import ".."/[config, git, plugin_api, util] const eBadGitConf = "Git configuration file is invalid" @@ -272,6 +272,9 @@ type origin: Option[string] vcsDirs: OrderedTable[string, RepoInfo] +proc isAnnotated(self: GitTag): bool = + return self.tagCommitId != "" + proc getUint32BE(data: string, whence: SomeInteger=0): uint32 = result = ntohl(cast[ptr [uint32]](addr data[whence])[]) @@ -509,8 +512,8 @@ proc loadRef(info: RepoInfo, gitRef: string): string = trace("git object for " & gitRef & ": " & result & " which points to commit: " & commitId) result = commitId -proc loadAuthor(info: RepoInfo, commitId: string) = - let fields = info.loadObject(commitId) +proc loadAuthor(info: RepoInfo) = + let fields = info.loadObject(info.commitId) # this makes only the same assumptions as git itself: # https://github.com/git/git/blob/master/commit.c#L97 ddcb8fd info.author = fields.getOrDefault(gitAuthor, "") @@ -522,70 +525,112 @@ proc loadAuthor(info: RepoInfo, commitId: string) = info.message = fields.getOrDefault(gitMessage, "") info.signed = gitSign in fields -proc loadTags(info: RepoInfo, commitId: string) = - # need commit to compare with - if commitId == "": - return - - let tagPath = info.vcsDir.joinPath("refs", "tags") - var allTags = initTable[string, string]() +proc loadTag(info: RepoInfo, tag: string, tagCommit: string) = + # lightweight tag which points directly to the current commit ID + if tagCommit == info.commitId: + trace("tag: " & tag) + info.tags[tag] = GitTag(name: tag, + commitId: tagCommit) + # otherwise we need to check where tag points to as the tag can be either: + # * pointing to another commit + # * annotated commit object pointing to another commit + # * annotated commit object pointing to the current commit + else: + try: + let fields = info.loadObject(tagCommit) + # not an annotated tag + if not fields.isTag(): + return + # we found annotated commit pointing to current commit + if fields[gitTag] == tag and fields[gitObject] == info.commitId: + let + tagger = fields[gitTagger] + unixTime = parseTime(tagger) + date = formatCommitObjectTime(tagger) + signed = gitSign in fields + message = fields[gitMessage] + info.tags[tag] = GitTag(name: tag, + commitId: fields[gitObject], + tagCommitId: tagCommit, + tagger: tagger, + unixTime: unixTime, + date: date, + signed: signed, + message: message) + trace("annotated tag: " & tag) + except: + warn(tag & ": Git tag couldn't be loaded") +proc loadAllTags(info: RepoInfo): Table[string, string] = + result = initTable[string, string]() for gitRef, objId in info.getAllPackedRefs(): if not gitRef.startsWith("refs/tags/"): continue let parts = gitRef.split("/", maxsplit = 2) - allTags[parts[2]] = objId + result[parts[2]] = objId + let tagPath = info.vcsDir.joinPath("refs", "tags") for tag in tagPath.walkDirRec(relative = true): let tagCommit = tryToLoadFile(tagPath.joinPath(tag)).strip() if tagCommit != "": - allTags[tag] = tagCommit - - for tag, tagCommit in allTags: - # regular tag which points directly to the current commit ID - if tagCommit == commitId: - trace("tag: " & tag) - info.tags[tag] = GitTag(name: tag, - commitId: tagCommit) - # otherwise we need to check where tag points to as the tag can be either: - # * pointing to another commit - # * annotated commit object pointing to another commit - # * annotated commit object pointing to the current commit - else: - try: - let fields = info.loadObject(tagCommit) - # not an annotated tag - if not fields.isTag(): - continue - # we found annotated commit pointing to current commit - if fields[gitTag] == tag and fields[gitObject] == info.commitId: - let - tagger = fields[gitTagger] - unixTime = parseTime(tagger) - date = formatCommitObjectTime(tagger) - signed = gitSign in fields - message = fields[gitMessage] - info.tags[tag] = GitTag(name: tag, - commitId: fields[gitObject], - tagCommitId: tagCommit, - tagger: tagger, - unixTime: unixTime, - date: date, - signed: signed, - message: message) - trace("annotated tag: " & tag) - except: - warn(tag & ": Git tag couldn't be loaded") + result[tag] = tagCommit + +proc loadTags(info: RepoInfo) = + # need commit to compare with + if info.commitId == "": + return + + info.tags = initTable[string, GitTag]() + for tag, tagCommit in info.loadAllTags(): + info.loadTag(tag = tag, + tagCommit = tagCommit) if len(info.tags) > 0: let sortedTags = info.tags.values().toSeq().sortedByIt((it.unixTime, it.name)) info.latestTag = sortedTags[^1] trace("latest tag: " & info.latestTag.name) +proc refetchTags(info: RepoInfo) = + if info.origin == "": + return + if not get[bool](getChalkScope(), "git.refetch_lightweight_tags"): + return + var toRefetch: seq[GitTag] = @[] + for _, tag in info.tags: + if not tag.isAnnotated(): + toRefetch.add(tag) + if len(toRefetch) == 0: + return + let exe = getGitExeLocation() + if exe == "": + return + var args = @[ + "fetch", + info.origin, + "--force", # allow to update the tag + "--no-tags", # dont fetch everything + "--no-recurse-submodules", # ignore submodules + "--depth=1", # faster fetch + info.commitId, + ] + for tag in toRefetch: + args.add(tag.name & ":refs/tags/" & tag.name) + trace("git " & args.join(" ")) + let output = runCmdGetEverything(getGitExeLocation(), args) + if output.getExit() != 0: + trace("git: could not fetch latest tag from origin: " & output.getStdErr()) + return + let oldLatestTag = info.latestTag + info.loadTags() + if oldLatestTag.tagCommitId != info.latestTag.tagCommitId: + info("git: origin fetch updated tag (" & info.latestTag.name & ") from " & + "lightweight tag to annotated tag. Its object id changed from commit " & + oldLatestTag.commitId & " to tag commit " & info.latestTag.tagCommitId) + proc loadCommit(info: RepoInfo, commitId: string) = info.commitId = commitId trace("commit ID: " & info.commitID) - info.loadAuthor(commitId) - info.loadTags(commitId) + info.loadAuthor() + info.loadTags() proc loadSymref(info: RepoInfo, gitRef: string) = let @@ -697,6 +742,7 @@ proc findAndLoad(plugin: GitInfo, path: string) = if stream != nil: let config = stream.parseGitConfig() info.origin = info.calcOrigin(config) + info.refetchTags() except: error(confFileName & ": Git configuration file not parsed.") dumpExOnDebug() diff --git a/tests/functional/test_git.py b/tests/functional/test_git.py index 0ec83725..d953c2d1 100644 --- a/tests/functional/test_git.py +++ b/tests/functional/test_git.py @@ -111,3 +111,26 @@ def test_empty_repo( ORIGIN_URI=MISSING, VCS_DIR_WHEN_CHALKED=MISSING, ) + + +@pytest.mark.parametrize("copy_files", [[LS_PATH]], indirect=True) +def test_refetch_tag( + tmp_data_dir: Path, + chalk_copy: Chalk, + copy_files: list[Path], +): + repo = Git(tmp_data_dir).clone( + "https://github.com/crashappsec/chalk-docker-git-context.git" + ) + # replicate what github checkout action does + # https://github.com/crashappsec/chalk/issues/345 + repo.fetch( + ref="1-signed", + refs={repo.latest_commit: "refs/tags/1-signed"}, + ) + artifact = copy_files[0] + result = chalk_copy.insert(artifact) + assert result.mark.has( + TAG="1-signed", + TAG_SIGNED=True, + ) diff --git a/tests/functional/utils/git.py b/tests/functional/utils/git.py index b13fb1e1..f7b25a1f 100644 --- a/tests/functional/utils/git.py +++ b/tests/functional/utils/git.py @@ -31,7 +31,6 @@ def init( self, *, first_commit: bool = True, - add: bool = True, remote: Optional[str] = None, branch: str = "main", ): @@ -51,6 +50,12 @@ def init( self.run(["git", "remote", "add", "origin", remote]) return self + def clone(self, origin: str, ref: str = "main"): + self.init(first_commit=False, remote=origin) + self.fetch() + self.checkout(ref) + return self + def config(self, key: str, value: str): self.run(["git", "config", key, value]) @@ -73,6 +78,21 @@ def checkout(self, spec: str): self.run(["git", "checkout", spec]) return self + def fetch( + self, + remote: str = "origin", + *, + ref: Optional[str] = None, + refs: Optional[dict[str, str]] = None, + ): + args = ["git", "fetch", "--force", remote] + if refs: + assert ref + args.append(ref) + args += [f"{k}:{v}" for k, v in (refs or {}).items()] + self.run(args) + return self + def symbolic_ref(self, ref: str): self.run(["git", "symbolic-ref", "HEAD", ref]) return self