diff --git a/.github/actions/bootstrap/action.yaml b/.github/actions/bootstrap/action.yaml
new file mode 100644
index 00000000..d4ed5970
--- /dev/null
+++ b/.github/actions/bootstrap/action.yaml
@@ -0,0 +1,80 @@
+name: "Bootstrap"
+description: "Bootstrap all tools and dependencies"
+inputs:
+ go-version:
+ description: "Go version to install"
+ required: true
+ default: "1.19.x"
+ use-go-cache:
+ description: "Restore go cache"
+ required: true
+ default: "true"
+ cache-key-prefix:
+ description: "Prefix all cache keys with this value"
+ required: true
+ default: "831180ac25"
+ build-cache-key-prefix:
+ description: "Prefix build cache key with this value"
+ required: true
+ default: "f8b6d31dea"
+ bootstrap-apt-packages:
+ description: "Space delimited list of tools to install via apt"
+ default: ""
+
+runs:
+ using: "composite"
+ steps:
+ - uses: actions/setup-go@v3
+ with:
+ go-version: ${{ inputs.go-version }}
+
+ - name: Restore tool cache
+ id: tool-cache
+ uses: actions/cache@v3
+ with:
+ path: ${{ github.workspace }}/.tmp
+ key: ${{ inputs.cache-key-prefix }}-${{ runner.os }}-tool-${{ hashFiles('Makefile') }}
+
+ # note: we need to keep restoring the go mod cache before bootstrapping tools since `go install` is used in
+ # some installations of project tools.
+ - name: Restore go module cache
+ id: go-mod-cache
+ if: inputs.use-go-cache == 'true'
+ uses: actions/cache@v3
+ with:
+ path: |
+ ~/go/pkg/mod
+ key: ${{ inputs.cache-key-prefix }}-${{ runner.os }}-go-${{ inputs.go-version }}-${{ hashFiles('**/go.sum') }}
+ restore-keys: |
+ ${{ inputs.cache-key-prefix }}-${{ runner.os }}-go-${{ inputs.go-version }}-
+
+ - name: (cache-miss) Bootstrap project tools
+ shell: bash
+ if: steps.tool-cache.outputs.cache-hit != 'true'
+ run: make bootstrap-tools
+
+ - name: Restore go build cache
+ id: go-cache
+ if: inputs.use-go-cache == 'true'
+ uses: actions/cache@v3
+ with:
+ path: |
+ ~/.cache/go-build
+ key: ${{ inputs.cache-key-prefix }}-${{ inputs.build-cache-key-prefix }}-${{ runner.os }}-go-${{ inputs.go-version }}-${{ hashFiles('**/go.sum') }}
+ restore-keys: |
+ ${{ inputs.cache-key-prefix }}-${{ inputs.build-cache-key-prefix }}-${{ runner.os }}-go-${{ inputs.go-version }}-
+
+ - name: (cache-miss) Bootstrap go dependencies
+ shell: bash
+ if: steps.go-mod-cache.outputs.cache-hit != 'true' && inputs.use-go-cache == 'true'
+ run: make bootstrap-go
+
+ - name: Bootstrap CI dependencies
+ shell: bash
+ run: make ci-bootstrap
+
+ - name: Install apt packages
+ if: inputs.bootstrap-apt-packages != ''
+ shell: bash
+ run: |
+ DEBIAN_FRONTEND=noninteractive sudo apt update && sudo -E apt install -y ${{ inputs.bootstrap-apt-packages }}
diff --git a/.github/scripts/build.sh b/.github/scripts/build.sh
new file mode 100755
index 00000000..50beb016
--- /dev/null
+++ b/.github/scripts/build.sh
@@ -0,0 +1,82 @@
+#!/usr/bin/env bash
+set -uo pipefail
+
+SNAPSHOT_DIR=$1
+
+# Based on https://gist.github.com/eduncan911/68775dba9d3c028181e4 and https://gist.github.com/makeworld-the-better-one/e1bb127979ae4195f43aaa3ad46b1097
+# but improved to use the `go` command so it never goes out of date.
+
+type setopt >/dev/null 2>&1
+
+contains() {
+ # Source: https://stackoverflow.com/a/8063398/7361270
+ [[ $1 =~ (^|[[:space:]])$2($|[[:space:]]) ]]
+}
+
+mkdir -p "${SNAPSHOT_DIR}"
+
+BUILD_TARGET=./examples
+OUTPUT=${SNAPSHOT_DIR}/stereoscope-example
+FAILURES=""
+
+# You can set your own flags on the command line
+FLAGS=${FLAGS:-"-ldflags=\"-s -w\""}
+
+# A list of OSes and architectures to not build for, space-separated
+# It can be set from the command line when the script is called.
+NOT_ALLOWED_OS=${NOT_ALLOWED_OS:-"js android ios solaris illumos aix dragonfly plan9 freebsd openbsd netbsd"}
+NOT_ALLOWED_ARCH=${NOT_ALLOWED_ARCH:-"riscv64 mips mips64 mips64le ppc64 ppc64le s390x wasm"}
+
+
+# Get all targets
+while IFS= read -r target; do
+ GOOS=${target%/*}
+ GOARCH=${target#*/}
+ BIN_FILENAME="${OUTPUT}-${GOOS}-${GOARCH}"
+
+ if contains "$NOT_ALLOWED_OS" "$GOOS" ; then
+ continue
+ fi
+
+ if contains "$NOT_ALLOWED_ARCH" "$GOARCH" ; then
+ continue
+ fi
+
+ # Check for arm and set arm version
+ if [[ $GOARCH == "arm" ]]; then
+ # Set what arm versions each platform supports
+ if [[ $GOOS == "darwin" ]]; then
+ arms="7"
+ elif [[ $GOOS == "windows" ]]; then
+ # This is a guess, it's not clear what Windows supports from the docs
+ # But I was able to build all these on my machine
+ arms="5 6 7"
+ elif [[ $GOOS == *"bsd" ]]; then
+ arms="6 7"
+ else
+ # Linux goes here
+ arms="5 6 7"
+ fi
+
+ # Now do the arm build
+ for GOARM in $arms; do
+ BIN_FILENAME="${OUTPUT}-${GOOS}-${GOARCH}${GOARM}"
+ if [[ "${GOOS}" == "windows" ]]; then BIN_FILENAME="${BIN_FILENAME}.exe"; fi
+ CMD="GOARM=${GOARM} GOOS=${GOOS} GOARCH=${GOARCH} go build $FLAGS -o ${BIN_FILENAME} ${BUILD_TARGET}"
+ echo "${CMD}"
+ eval "${CMD}" || FAILURES="${FAILURES} ${GOOS}/${GOARCH}${GOARM}"
+ done
+ else
+ # Build non-arm here
+ if [[ "${GOOS}" == "windows" ]]; then BIN_FILENAME="${BIN_FILENAME}.exe"; fi
+ CMD="GOOS=${GOOS} GOARCH=${GOARCH} go build $FLAGS -o ${BIN_FILENAME} ${BUILD_TARGET}"
+ echo "${CMD}"
+ eval "${CMD}" || FAILURES="${FAILURES} ${GOOS}/${GOARCH}"
+ fi
+done <<< "$(go tool dist list)"
+
+if [[ "${FAILURES}" != "" ]]; then
+ echo ""
+ echo "build failed for: ${FAILURES}"
+ exit 1
+fi
\ No newline at end of file
diff --git a/.github/scripts/coverage.py b/.github/scripts/coverage.py
new file mode 100755
index 00000000..db14135c
--- /dev/null
+++ b/.github/scripts/coverage.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+import subprocess
+import sys
+import shlex
+
+
+class bcolors:
+ HEADER = '\033[95m'
+ OKBLUE = '\033[94m'
+ OKCYAN = '\033[96m'
+ OKGREEN = '\033[92m'
+ WARNING = '\033[93m'
+ FAIL = '\033[91m'
+ ENDC = '\033[0m'
+ BOLD = '\033[1m'
+ UNDERLINE = '\033[4m'
+
+
+if len(sys.argv) < 3:
+ print("Usage: coverage.py [threshold] [go-coverage-report]")
+ sys.exit(1)
+
+
+threshold = float(sys.argv[1])
+report = sys.argv[2]
+
+
+args = shlex.split(f"go tool cover -func {report}")
+p = subprocess.run(args, capture_output=True, text=True)
+
+percent_coverage = float(p.stdout.splitlines()[-1].split()[-1].replace("%", ""))
+print(f"{bcolors.BOLD}Coverage: {percent_coverage}%{bcolors.ENDC}")
+
+if percent_coverage < threshold:
+ print(f"{bcolors.BOLD}{bcolors.FAIL}Coverage below threshold of {threshold}%{bcolors.ENDC}")
+ sys.exit(1)
diff --git a/.github/scripts/go-mod-tidy-check.sh b/.github/scripts/go-mod-tidy-check.sh
new file mode 100755
index 00000000..28f22fcd
--- /dev/null
+++ b/.github/scripts/go-mod-tidy-check.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+set -eu
+
+ORIGINAL_STATE_DIR=$(mktemp -d "TEMP-original-state-XXXXXXXXX")
+TIDY_STATE_DIR=$(mktemp -d "TEMP-tidy-state-XXXXXXXXX")
+
+trap "cp -p ${ORIGINAL_STATE_DIR}/* ./ && git update-index -q --refresh && rm -fR ${ORIGINAL_STATE_DIR} ${TIDY_STATE_DIR}" EXIT
+
+# capturing original state of files...
+cp go.mod go.sum "${ORIGINAL_STATE_DIR}"
+
+# capturing state of go.mod and go.sum after running go mod tidy...
+go mod tidy
+cp go.mod go.sum "${TIDY_STATE_DIR}"
+
+set +e
+
+# detect difference between the git HEAD state and the go mod tidy state
+DIFF_MOD=$(diff -u "${ORIGINAL_STATE_DIR}/go.mod" "${TIDY_STATE_DIR}/go.mod")
+DIFF_SUM=$(diff -u "${ORIGINAL_STATE_DIR}/go.sum" "${TIDY_STATE_DIR}/go.sum")
+
+if [[ -n "${DIFF_MOD}" || -n "${DIFF_SUM}" ]]; then
+ echo "go.mod diff:"
+ echo "${DIFF_MOD}"
+ echo "go.sum diff:"
+ echo "${DIFF_SUM}"
+ echo ""
+ printf "FAILED! go.mod and/or go.sum are NOT tidy; please run 'go mod tidy'.\n\n"
+ exit 1
+fi
diff --git a/.github/workflows/benchmark-testing.yaml b/.github/workflows/benchmark-testing.yaml
new file mode 100644
index 00000000..4cd87594
--- /dev/null
+++ b/.github/workflows/benchmark-testing.yaml
@@ -0,0 +1,58 @@
+name: "Benchmark testing"
+
+on:
+ workflow_dispatch:
+ pull_request:
+
+jobs:
+
+ Benchmark-Test:
+ name: "Benchmark tests"
+ runs-on: ubuntu-20.04
+ # note: we want benchmarks to run on pull_request events in order to publish results to a sticky comment, and
+ # we also want to run on push such that merges to main are recorded to the cache. For this reason we don't filter
+ # the job by event.
+ steps:
+ - uses: actions/checkout@v3
+
+ - name: Bootstrap environment
+ uses: ./.github/actions/bootstrap
+
+ - name: Restore base benchmark result
+ uses: actions/cache@v3
+ with:
+ path: test/results/benchmark-main.txt
+ # use base sha for PR or new commit hash for main push in benchmark result key
+ key: ${{ runner.os }}-bench-${{ (github.event.pull_request.base.sha != github.event.after) && github.event.pull_request.base.sha || github.event.after }}
+
+ - name: Run benchmark tests
+ id: benchmark
+ run: |
+ REF_NAME=${GITHUB_REF##*/} make benchmark
+ OUTPUT=$(make show-benchstat)
+ OUTPUT="${OUTPUT//'%'/'%25'}" # URL encode all '%' characters
+ OUTPUT="${OUTPUT//$'\n'/'%0A'}" # URL encode all '\n' characters
+ OUTPUT="${OUTPUT//$'\r'/'%0D'}" # URL encode all '\r' characters
+ echo "::set-output name=result::$OUTPUT"
+
+ - uses: actions/upload-artifact@v3
+ with:
+ name: benchmark-test-results
+ path: test/results/**/*
+
+ - name: Update PR benchmark results comment
+ uses: marocchino/sticky-pull-request-comment@v2
+ continue-on-error: true
+ with:
+ header: benchmark
+ message: |
+ ### Benchmark Test Results
+
+
+ Benchmark results from the latest changes vs base branch
+
+ ```
+ ${{ steps.benchmark.outputs.result }}
+ ```
+
+
diff --git a/.github/workflows/validations.yaml b/.github/workflows/validations.yaml
index 4ed98466..f0314a6b 100644
--- a/.github/workflows/validations.yaml
+++ b/.github/workflows/validations.yaml
@@ -16,79 +16,28 @@ on:
- main
pull_request:
-env:
- GO_VERSION: "1.19.x"
-
jobs:
Static-Analysis:
name: "Static analysis"
runs-on: ubuntu-20.04
steps:
- - uses: actions/setup-go@v2
- with:
- go-version: ${{ env.GO_VERSION }}
-
- - uses: actions/checkout@v2
-
- - name: Restore tool cache
- id: tool-cache
- uses: actions/cache@v2.1.3
- with:
- path: ${{ github.workspace }}/.tmp
- key: ${{ runner.os }}-tool-${{ hashFiles('Makefile') }}
+ - uses: actions/checkout@v3
- - name: Restore go cache
- id: go-cache
- uses: actions/cache@v2.1.3
- with:
- path: ~/go/pkg/mod
- key: ${{ runner.os }}-go-${{ env.GO_VERSION }}-${{ hashFiles('**/go.sum') }}
- restore-keys: |
- ${{ runner.os }}-go-${{ env.GO_VERSION }}-
-
- - name: (cache-miss) Bootstrap all project dependencies
- if: steps.tool-cache.outputs.cache-hit != 'true' || steps.go-cache.outputs.cache-hit != 'true'
- run: make bootstrap
+ - name: Bootstrap environment
+ uses: ./.github/actions/bootstrap
- - name: Bootstrap CI environment dependencies
- run: make ci-bootstrap
-
- - name: Run static analysis
- run: make static-analysis
+ - name: Run static analysis
+ run: make static-analysis
Unit-Test:
name: "Unit tests"
runs-on: ubuntu-20.04
steps:
- - uses: actions/setup-go@v2
- with:
- go-version: ${{ env.GO_VERSION }}
-
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
- - name: Restore tool cache
- id: tool-cache
- uses: actions/cache@v2.1.3
- with:
- path: ${{ github.workspace }}/.tmp
- key: ${{ runner.os }}-tool-${{ hashFiles('Makefile') }}
-
- - name: Restore go cache
- id: go-cache
- uses: actions/cache@v2.1.3
- with:
- path: ~/go/pkg/mod
- key: ${{ runner.os }}-go-${{ env.GO_VERSION }}-${{ hashFiles('**/go.sum') }}
- restore-keys: |
- ${{ runner.os }}-go-${{ env.GO_VERSION }}-
-
- - name: (cache-miss) Bootstrap all project dependencies
- if: steps.tool-cache.outputs.cache-hit != 'true' || steps.go-cache.outputs.cache-hit != 'true'
- run: make bootstrap
-
- - name: Bootstrap CI environment dependencies
- run: make ci-bootstrap
+ - name: Bootstrap environment
+ uses: ./.github/actions/bootstrap
- name: Run unit tests
run: make unit
@@ -102,11 +51,10 @@ jobs:
name: "Integration tests"
runs-on: ubuntu-20.04
steps:
- - uses: actions/setup-go@v2
- with:
- go-version: ${{ env.GO_VERSION }}
+ - uses: actions/checkout@v3
- - uses: actions/checkout@v2
+ - name: Bootstrap environment
+ uses: ./.github/actions/bootstrap
- name: Enable systemd for podman socket activation
run: |
@@ -128,29 +76,6 @@ jobs:
with:
limit-access-to-actor: true
- - name: Restore tool cache
- id: tool-cache
- uses: actions/cache@v2.1.3
- with:
- path: ${{ github.workspace }}/.tmp
- key: ${{ runner.os }}-tool-${{ hashFiles('Makefile') }}
-
- - name: Restore go cache
- id: go-cache
- uses: actions/cache@v2.1.3
- with:
- path: ~/go/pkg/mod
- key: ${{ runner.os }}-go-${{ env.GO_VERSION }}-${{ hashFiles('**/go.sum') }}
- restore-keys: |
- ${{ runner.os }}-go-${{ env.GO_VERSION }}-
-
- - name: (cache-miss) Bootstrap all project dependencies
- if: steps.tool-cache.outputs.cache-hit != 'true' || steps.go-cache.outputs.cache-hit != 'true'
- run: make bootstrap
-
- - name: Bootstrap CI environment dependencies
- run: make ci-bootstrap
-
- name: Build key for test-fixture cache
run: make integration-fingerprint
@@ -177,77 +102,15 @@ jobs:
- name: Run integration tests
run: make integration
- Benchmark-Test:
- name: "Benchmark tests"
+ Build-Snapshot-Artifacts:
+ name: "Build snapshot artifacts"
runs-on: ubuntu-20.04
- # note: we want benchmarks to run on pull_request events in order to publish results to a sticky comment, and
- # we also want to run on push such that merges to main are recorded to the cache. For this reason we don't filter
- # the job by event.
steps:
- - uses: actions/setup-go@v2
- with:
- go-version: ${{ env.GO_VERSION }}
-
- - uses: actions/checkout@v2
-
- - name: Restore tool cache
- id: tool-cache
- uses: actions/cache@v2.1.3
- with:
- path: ${{ github.workspace }}/.tmp
- key: ${{ runner.os }}-tool-${{ hashFiles('Makefile') }}
-
- - name: Restore go cache
- id: go-cache
- uses: actions/cache@v2.1.3
- with:
- path: ~/go/pkg/mod
- key: ${{ runner.os }}-go-${{ env.GO_VERSION }}-${{ hashFiles('**/go.sum') }}
- restore-keys: |
- ${{ runner.os }}-go-${{ env.GO_VERSION }}-
-
- - name: (cache-miss) Bootstrap all project dependencies
- if: steps.tool-cache.outputs.cache-hit != 'true' || steps.go-cache.outputs.cache-hit != 'true'
- run: make bootstrap
-
- - name: Bootstrap CI environment dependencies
- run: make ci-bootstrap
-
- - name: Restore base benchmark result
- uses: actions/cache@v2
- with:
- path: test/results/benchmark-main.txt
- # use base sha for PR or new commit hash for main push in benchmark result key
- key: ${{ runner.os }}-bench-${{ (github.event.pull_request.base.sha != github.event.after) && github.event.pull_request.base.sha || github.event.after }}
-
- - name: Run benchmark tests
- id: benchmark
- run: |
- REF_NAME=${GITHUB_REF##*/} make benchmark
- OUTPUT=$(make show-benchstat)
- OUTPUT="${OUTPUT//'%'/'%25'}" # URL encode all '%' characters
- OUTPUT="${OUTPUT//$'\n'/'%0A'}" # URL encode all '\n' characters
- OUTPUT="${OUTPUT//$'\r'/'%0D'}" # URL encode all '\r' characters
- echo "::set-output name=result::$OUTPUT"
-
- - uses: actions/upload-artifact@v2
- with:
- name: benchmark-test-results
- path: test/results/**/*
-
- - name: Update PR benchmark results comment
- uses: marocchino/sticky-pull-request-comment@v2
- continue-on-error: true
- with:
- header: benchmark
- message: |
- ### Benchmark Test Results
+ - uses: actions/checkout@v3
-
- Benchmark results from the latest changes vs base branch
+ - name: Bootstrap environment
+ uses: ./.github/actions/bootstrap
- ```
- ${{ steps.benchmark.outputs.result }}
- ```
+ - name: Build snapshot artifacts
+ run: make snapshot
-
diff --git a/.gitignore b/.gitignore
index 930ca30b..26630caf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,7 @@
coverage.txt
**/test-fixtures/cache/
**/*.fingerprint
+snapshot/
# Binaries for programs and plugins
*.exe
diff --git a/.golangci.yaml b/.golangci.yaml
index 1cf5f179..a92c5c43 100644
--- a/.golangci.yaml
+++ b/.golangci.yaml
@@ -26,7 +26,6 @@ linters:
- ineffassign
- misspell
- nakedret
- - nolintlint
- revive
- staticcheck
- stylecheck
@@ -37,19 +36,23 @@ linters:
- whitespace
# do not enable...
+# - deadcode # The owner seems to have abandoned the linter. Replaced by "unused".
# - gochecknoglobals
# - gochecknoinits # this is too aggressive
# - godot
# - godox
# - goerr113
-# - golint # deprecated
-# - gomnd # this is too aggressive
-# - interfacer # this is a good idea, but is no longer supported and is prone to false positives
-# - lll # without a way to specify per-line exception cases, this is not usable
-# - maligned # this is an excellent linter, but tricky to optimize and we are not sensitive to memory layout optimizations
+# - golint # deprecated
+# - gomnd # this is too aggressive
+# - interfacer # this is a good idea, but is no longer supported and is prone to false positives
+# - lll # without a way to specify per-line exception cases, this is not usable
+# - maligned # this is an excellent linter, but tricky to optimize and we are not sensitive to memory layout optimizations
# - nestif
-# - prealloc # following this rule isn't consistently a good idea, as it sometimes forces unnecessary allocations that result in less idiomatic code
-# - scopelint # deprecated
+# - nolintlint # as of go1.19 this conflicts with the behavior of gofmt, which is a deal-breaker (lint-fix will still fail when running lint)
+# - prealloc # following this rule isn't consistently a good idea, as it sometimes forces unnecessary allocations that result in less idiomatic code
+# - rowserrcheck # not in a repo with sql, so this is not useful
+# - scopelint # deprecated
+# - structcheck # The owner seems to have abandoned the linter. Replaced by "unused".
# - testpackage
-# - wsl # this doens't have an auto-fixer yet and is pretty noisy (https://github.com/bombsimon/wsl/issues/90)
-
+# - varcheck # The owner seems to have abandoned the linter. Replaced by "unused".
+# - wsl # this doens't have an auto-fixer yet and is pretty noisy (https://github.com/bombsimon/wsl/issues/90)
diff --git a/DEVELOPING.md b/DEVELOPING.md
new file mode 100644
index 00000000..43577bb2
--- /dev/null
+++ b/DEVELOPING.md
@@ -0,0 +1,53 @@
+# Developing
+
+## Getting started
+
+In order to test and develop in this repo you will need the following dependencies installed:
+- Golang
+- docker
+- make
+- podman (for benchmark and integration tests only)
+
+After cloning the following step can help you get setup:
+1. run `make bootstrap` to download go mod dependencies, create the `/.tmp` dir, and download helper utilities.
+2. run `make help` to view the selection of developer commands in the Makefile
+
+The main make tasks for common static analysis and testing are `lint`, `lint-fix`, `unit`, and `integration`.
+
+See `make help` for all the current make tasks.
+
+## Background
+
+Stereoscope is a library for reading and manipulating container images. It is capable of parsing multiple image
+sources, providing a single abstraction for interacting with them. Ultimately this provides a squashfs-like
+interface for interacting with image layers as well as a content API for accessing files contained within
+the image.
+
+**Overview of objects:**
+- `image.Image`: Once parsed with `image.Read()` this object represents a container image. Consists of a sequence of `image.Layer` objects, a `image.FileCatalog` for accessing files, and `filetree.SearchContext` for searching for files from the squashed representation of the image filesystem. Additionally exposes GGCR `v1.Image` objects for accessing the raw image metadata.
+- `image.Layer`: represents a single layer of the image. Consists of a `filetree.FileTree` that represents the raw layer contents, and a `filetree.SearchContext` for searching for files relative to the raw (single layer) filetree as well as the squashed representation of the layer relative to all layers below this one. Additionally exposes GGCR `v1.Layer` objects for accessing the raw layer metadata.
+- `filetree.FileTree`: a tree representing a filesystem. All nodes represent real paths (paths with no link resolution anywhere in the path) and are absolute paths (start with / and contain no relative path elements [e.g. ../ or ./]). This represents the filesystem structure and each node has a reference to the file metadata for that path.
+- `file.Reference`: a unique file in the filesystem, identified by an absolute, real path as well as an integer ID (`file.ID`s). These are used to reference concrete nodes in the `filetree.FileTree` and `image.FileCatalog` objects.
+- `file.Index`: stores all known `file.Reference` and `file.Metadata`. Entries are indexed with a variety of ways to provide fast access to references and metadata without needing to crawl the tree. This is especially useful for speeding up globbing.
+- `image.FileCatalog`: an image-aware extension of `file.Index` that additionally relates `image.Layers` to `file.IDs` and provides a content API for any files contained within the image (regardless of which layer or squashed representation it exists in).
+
+### Searching for files
+
+Searching for files is exposed to users in three ways:
+- search by file path
+- search by file glob
+- search by file content MIME type
+
+Searching itself is performed two different ways:
+- search the `image.FileCatalog` on the image by a heuristic
+- search the `filetree.FileTree` directly
+
+The "best way" to search is automatically determined in the `filetree.searchContext` object, exposed on `image.Image` and `image.Layer` objects as a `filetree.Searcher` for general use.
+
+### File trees
+
+The `filetree.FileTree` object represents a filesystem and consists of `filenode.Node` objects. The tree itself leverages `tree.Tree` as a generic datastructure. What `filetree.FileTree` adds is the concept of file types, the semantics of each type, the ability to resolve links based on a given strategy, merging of trees with the same semantics of a union filesystem (e.g. whiteout files), and the ability to search for files via direct paths or globs.
+
+The `fs.FS` abstraction has been implemented on `filetree.FileTree` to allow for easy integration with the standard library as well as to interop with the `doublestar` library to facilitate globing. Using the `fs.FS` abstraction for filetree operations is faster than OS interactions with the filesystem directly but relatively slower than the indexes provided by `image.FileCatalog` and `file.Index`.
+
+`filetre.FileTree` objects can be created with a corresponding `file.Index` object by leveraging the `filetree.Builder` object, which aids in the indexing of files.
diff --git a/Makefile b/Makefile
index 7bdbc175..b5f3d3d6 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,15 @@
-TEMPDIR = ./.tmp
-RESULTSDIR = test/results
-COVER_REPORT = $(RESULTSDIR)/unit-coverage-details.txt
-COVER_TOTAL = $(RESULTSDIR)/unit-coverage-summary.txt
-LINTCMD = $(TEMPDIR)/golangci-lint run --tests=false --config .golangci.yaml
+TEMP_DIR = ./.tmp
+
+# Command templates #################################
+LINT_CMD = $(TEMP_DIR)/golangci-lint run --tests=false --config .golangci.yaml
+
+# Tool versions #################################
+GOLANGCILINT_VERSION := v1.51.0
+GOSIMPORTS_VERSION := v0.3.5
+BOUNCER_VERSION := v0.4.0
+CHRONICLE_VERSION := v0.5.1
+
+# Formatting variables #################################
BOLD := $(shell tput -T linux bold)
PURPLE := $(shell tput -T linux setaf 5)
GREEN := $(shell tput -T linux setaf 2)
@@ -11,57 +18,72 @@ RED := $(shell tput -T linux setaf 1)
RESET := $(shell tput -T linux sgr0)
TITLE := $(BOLD)$(PURPLE)
SUCCESS := $(BOLD)$(GREEN)
-# the quality gate lower threshold for unit test total % coverage (by function statements)
-COVERAGE_THRESHOLD := 48
-ifeq "$(strip $(VERSION))" ""
- override VERSION = $(shell git describe --always --tags --dirty)
-endif
+# Test variables #################################
+COVERAGE_THRESHOLD := 55 # the quality gate lower threshold for unit test total % coverage (by function statements)
+
+## Build variables #################################
+SNAPSHOT_DIR := ./snapshot
+VERSION := $(shell git describe --dirty --always --tags)
-ifndef TEMPDIR
- $(error TEMPDIR is not set)
+ifndef VERSION
+ $(error VERSION is not set)
endif
-ifndef REF_NAME
- REF_NAME = $(VERSION)
+ifndef TEMP_DIR
+ $(error TEMP_DIR is not set)
endif
define title
@printf '$(TITLE)$(1)$(RESET)\n'
endef
+define safe_rm_rf
+ bash -c 'test -z "$(1)" && false || rm -rf $(1)'
+endef
+
+define safe_rm_rf_children
+ bash -c 'test -z "$(1)" && false || rm -rf $(1)/*'
+endef
+
.PHONY: all
-all: static-analysis test ## Run all checks (linting, all tests, and dependencies license checks)
+all: static-analysis test ## Run all linux-based checks (linting, license check, unit, integration, and linux compare tests)
@printf '$(SUCCESS)All checks pass!$(RESET)\n'
+.PHONY: static-analysis
+static-analysis: check-go-mod-tidy lint check-licenses ## Run all static analysis checks
+
.PHONY: test
-test: unit integration benchmark ## Run all levels of test
+test: unit integration benchmark ## Run all tests (currently unit and integrations)
-.PHONY: help
-help:
- @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "$(BOLD)$(CYAN)%-25s$(RESET)%s\n", $$1, $$2}'
+
+## Bootstrapping targets #################################
.PHONY: ci-bootstrap
ci-bootstrap: bootstrap
- sudo apt install -y bc
curl -sLO https://github.com/sylabs/singularity/releases/download/v3.10.0/singularity-ce_3.10.0-focal_amd64.deb && sudo apt-get install -y -f ./singularity-ce_3.10.0-focal_amd64.deb
-$(RESULTSDIR):
- mkdir -p $(RESULTSDIR)
-
-.PHONY: boostrap
-bootstrap: $(RESULTSDIR) ## Download and install all project dependencies (+ prep tooling in the ./tmp dir)
- $(call title,Downloading dependencies)
- @pwd
- # prep temp dirs
- mkdir -p $(TEMPDIR)
- mkdir -p $(RESULTSDIR)
- # install go dependencies
+.PHONY: bootstrap
+bootstrap: $(TEMP_DIR) bootstrap-go bootstrap-tools ## Download and install all tooling dependencies (+ prep tooling in the ./tmp dir)
+ $(call title,Bootstrapping dependencies)
+
+.PHONY: bootstrap-tools
+bootstrap-tools: $(TEMP_DIR)
+ GO111MODULE=off GOBIN=$(realpath $(TEMP_DIR)) go get -u golang.org/x/perf/cmd/benchstat
+ curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(TEMP_DIR)/ $(GOLANGCILINT_VERSION)
+ curl -sSfL https://raw.githubusercontent.com/wagoodman/go-bouncer/master/bouncer.sh | sh -s -- -b $(TEMP_DIR)/ $(BOUNCER_VERSION)
+ curl -sSfL https://raw.githubusercontent.com/anchore/chronicle/main/install.sh | sh -s -- -b $(TEMP_DIR)/ $(CHRONICLE_VERSION)
+ # the only difference between goimports and gosimports is that gosimports removes extra whitespace between import blocks (see https://github.com/golang/go/issues/20818)
+ GOBIN="$(realpath $(TEMP_DIR))" go install github.com/rinchsan/gosimports/cmd/gosimports@$(GOSIMPORTS_VERSION)
+
+.PHONY: bootstrap-go
+bootstrap-go:
go mod download
- # install utilities
- [ -f "$(TEMPDIR)/benchstat" ] || GO111MODULE=off GOBIN=$(shell realpath $(TEMPDIR)) go get -u golang.org/x/perf/cmd/benchstat
- [ -f "$(TEMPDIR)/golangci" ] || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(TEMPDIR)/ v1.50.1
- [ -f "$(TEMPDIR)/bouncer" ] || curl -sSfL https://raw.githubusercontent.com/wagoodman/go-bouncer/master/bouncer.sh | sh -s -- -b $(TEMPDIR)/ v0.4.0
+
+$(TEMP_DIR):
+ mkdir -p $(TEMP_DIR)
+
+## Static analysis targets #################################
.PHONY: static-analysis
static-analysis: check-licenses lint
@@ -71,40 +93,55 @@ lint: ## Run gofmt + golangci lint checks
$(call title,Running linters)
@printf "files with gofmt issues: [$(shell gofmt -l -s .)]\n"
@test -z "$(shell gofmt -l -s .)"
- $(LINTCMD)
+ $(LINT_CMD)
.PHONY: lint-fix
lint-fix: ## Auto-format all source code + run golangci lint fixers
$(call title,Running lint fixers)
gofmt -w -s .
- $(LINTCMD) --fix
+ $(LINT_CMD) --fix
go mod tidy
.PHONY: check-licenses
check-licenses:
$(call title,Validating licenses for go dependencies)
- $(TEMPDIR)/bouncer check
+ $(TEMP_DIR)/bouncer check
+
+check-go-mod-tidy:
+ @ .github/scripts/go-mod-tidy-check.sh && echo "go.mod and go.sum are tidy!"
+
+## Testing targets #################################
.PHONY: unit
-unit: $(RESULTSDIR) ## Run unit tests (with coverage)
+unit: $(TEMP_DIR) ## Run unit tests (with coverage)
$(call title,Running unit tests)
- go test --race -coverprofile $(COVER_REPORT) $(shell go list ./... | grep -v anchore/stereoscope/test/integration)
- @go tool cover -func $(COVER_REPORT) | grep total | awk '{print substr($$3, 1, length($$3)-1)}' > $(COVER_TOTAL)
- @echo "Coverage: $$(cat $(COVER_TOTAL))"
- @if [ $$(echo "$$(cat $(COVER_TOTAL)) >= $(COVERAGE_THRESHOLD)" | bc -l) -ne 1 ]; then echo "$(RED)$(BOLD)Failed coverage quality gate (> $(COVERAGE_THRESHOLD)%)$(RESET)" && false; fi
+ go test -coverprofile $(TEMP_DIR)/unit-coverage-details.txt $(shell go list ./... | grep -v anchore/stereoscope/test)
+ @.github/scripts/coverage.py $(COVERAGE_THRESHOLD) $(TEMP_DIR)/unit-coverage-details.txt
+
+
+.PHONY: integration
+integration: integration-tools ## Run integration tests
+ $(call title,Running integration tests)
+ go test -v ./test/integration
+
+## Benchmark test targets #################################
+
.PHONY: benchmark
-benchmark: $(RESULTSDIR) ## Run benchmark tests and compare against the baseline (if available)
+benchmark: $(TEMP_DIR) ## Run benchmark tests and compare against the baseline (if available)
$(call title,Running benchmark tests)
- go test -cpu 2 -p 1 -run=^Benchmark -bench=. -count=5 -benchmem ./... | tee $(RESULTSDIR)/benchmark-$(REF_NAME).txt
- (test -s $(RESULTSDIR)/benchmark-main.txt && \
- $(TEMPDIR)/benchstat $(RESULTSDIR)/benchmark-main.txt $(RESULTSDIR)/benchmark-$(REF_NAME).txt || \
- $(TEMPDIR)/benchstat $(RESULTSDIR)/benchmark-$(REF_NAME).txt) \
- | tee $(RESULTSDIR)/benchstat.txt
+ go test -cpu 2 -p 1 -run=^Benchmark -bench=. -count=5 -benchmem ./... | tee $(TEMP_DIR)/benchmark-$(VERSION).txt
+ (test -s $(TEMP_DIR)/benchmark-main.txt && \
+ $(TEMP_DIR)/benchstat $(TEMP_DIR)/benchmark-main.txt $(TEMP_DIR)/benchmark-$(VERSION).txt || \
+ $(TEMP_DIR)/benchstat $(TEMP_DIR)/benchmark-$(VERSION).txt) \
+ | tee $(TEMP_DIR)/benchstat.txt
+
.PHONY: show-benchstat
show-benchstat:
- @cat $(RESULTSDIR)/benchstat.txt
+ @cat $(TEMP_DIR)/benchstat.txt
+
+## Test-fixture-related targets #################################
# note: this is used by CI to determine if the integration test fixture cache (docker image tars) should be busted
.PHONY: integration-fingerprint
@@ -127,11 +164,30 @@ integration-tools-load:
integration-tools-save:
@cd test/integration/tools && make save-cache
-.PHONY: integration
-integration: integration-tools ## Run integration tests
- $(call title,Running integration tests)
- go test -v ./test/integration
+## Build-related targets #################################
+
+.PHONY: snapshot
+snapshot: clean-snapshot ## Build the binary
+ $(call title,Build compatability test)
+ @.github/scripts/build.sh $(SNAPSHOT_DIR)
+
+## Cleanup targets #################################
+
+.PHONY: clean
+clean: clear-test-cache clean-snapshot ## Delete all generated artifacts
+ $(call safe_rm_rf_children,$(TEMP_DIR))
+
+.PHONY: clean-snapshot
+clean-snapshot: ## Delete all snapshot builds
+ $(call safe_rm_rf,$(SNAPSHOT_DIR))
.PHONY: clear-test-cache
clear-test-cache: ## Delete all test cache (built docker image tars)
find . -type f -wholename "**/test-fixtures/cache/*.tar" -delete
+
+
+## Halp! #################################
+
+.PHONY: help
+help: ## Display this help
+ @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "$(BOLD)$(CYAN)%-25s$(RESET)%s\n", $$1, $$2}'
diff --git a/examples/basic.go b/examples/basic.go
index 23d3883a..687245b9 100644
--- a/examples/basic.go
+++ b/examples/basic.go
@@ -89,7 +89,7 @@ func main() {
//////////////////////////////////////////////////////////////////
// Fetch file contents from the (squashed) image
filePath := file.Path("/etc/group")
- contentReader, err := image.FileContentsFromSquash(filePath)
+ contentReader, err := image.OpenPathFromSquash(filePath)
if err != nil {
panic(err)
}
diff --git a/go.mod b/go.mod
index eeae65c5..9cec3854 100644
--- a/go.mod
+++ b/go.mod
@@ -7,12 +7,14 @@ require (
github.com/anchore/go-logger v0.0.0-20220728155337-03b66a5207d8
github.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04
github.com/awslabs/amazon-ecr-credential-helper/ecr-login v0.0.0-20220517224237-e6f29200ae04
+ github.com/becheran/wildmatch-go v1.0.0
github.com/bmatcuk/doublestar/v4 v4.0.2
github.com/containerd/containerd v1.6.12
github.com/docker/cli v20.10.12+incompatible
github.com/docker/docker v20.10.12+incompatible
github.com/gabriel-vasile/mimetype v1.4.0
github.com/go-test/deep v1.0.8
+ github.com/google/go-cmp v0.5.8
github.com/google/go-containerregistry v0.7.0
github.com/hashicorp/go-multierror v1.1.1
github.com/logrusorgru/aurora v0.0.0-20200102142835-e9ef32dff381
@@ -23,7 +25,7 @@ require (
github.com/scylladb/go-set v1.0.3-0.20200225121959-cc7b2070d91e
github.com/sergi/go-diff v1.2.0
github.com/spf13/afero v1.6.0
- github.com/stretchr/testify v1.7.0
+ github.com/stretchr/testify v1.8.1
github.com/sylabs/sif/v2 v2.8.1
github.com/sylabs/squashfs v0.6.1
github.com/wagoodman/go-partybus v0.0.0-20200526224238-eb215533f07d
@@ -66,14 +68,14 @@ require (
github.com/pierrec/lz4/v4 v4.1.15 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/sirupsen/logrus v1.8.1 // indirect
- github.com/stretchr/objx v0.2.0 // indirect
+ github.com/stretchr/objx v0.5.0 // indirect
github.com/therootcompany/xz v1.0.1 // indirect
github.com/ulikunitz/xz v0.5.10 // indirect
github.com/vbatts/tar-split v0.11.2 // indirect
golang.org/x/net v0.0.0-20220722155237-a158d28d115b // indirect
golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8 // indirect
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c // indirect
- golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f // indirect
+ golang.org/x/sys v0.1.0 // indirect
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect
golang.org/x/text v0.3.7 // indirect
google.golang.org/appengine v1.6.7 // indirect
diff --git a/go.sum b/go.sum
index 18aa55bd..33eeee5a 100644
--- a/go.sum
+++ b/go.sum
@@ -130,6 +130,8 @@ github.com/aws/smithy-go v1.6.0 h1:T6puApfBcYiTIsaI+SYWqanjMt5pc3aoyyDrI+0YH54=
github.com/aws/smithy-go v1.6.0/go.mod h1:SObp3lf9smib00L/v3U2eAKG8FyQ7iLrJnQiAmR5n+E=
github.com/awslabs/amazon-ecr-credential-helper/ecr-login v0.0.0-20220517224237-e6f29200ae04 h1:p2I85zYI9z5/c/3Q0LiO3RtNXcmXHTtJfml/hV16zNg=
github.com/awslabs/amazon-ecr-credential-helper/ecr-login v0.0.0-20220517224237-e6f29200ae04/go.mod h1:Z+bXnIbhKJYSvxNwsNnwde7pDKxuqlEZCbUBoTwAqf0=
+github.com/becheran/wildmatch-go v1.0.0 h1:mE3dGGkTmpKtT4Z+88t8RStG40yN9T+kFEGj2PZFSzA=
+github.com/becheran/wildmatch-go v1.0.0/go.mod h1:gbMvj0NtVdJ15Mg/mH9uxk2R1QCistMyU7d9KFzroX4=
github.com/beorn7/perks v0.0.0-20160804104726-4c0e84591b9a/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
@@ -428,8 +430,9 @@ github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ=
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg=
+github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-containerregistry v0.7.0 h1:u0onUUOcyoCDHEiJoyR1R1gx5er1+r06V5DBhUU5ndk=
github.com/google/go-containerregistry v0.7.0/go.mod h1:2zaoelrL0d08gGbpdP3LqyUuBmhWbpD6IOe2s9nLS2k=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
@@ -750,16 +753,21 @@ github.com/stefanberger/go-pkcs11uri v0.0.0-20201008174630-78d3cae3a980/go.mod h
github.com/stretchr/objx v0.0.0-20180129172003-8a3f7159479f/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
-github.com/stretchr/objx v0.2.0 h1:Hbg2NidpLE8veEBkEZTL3CvlkUIVzuU9jDplZO54c48=
github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
+github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
+github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c=
+github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v0.0.0-20180303142811-b89eecf5ca5d/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
+github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
+github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
github.com/sylabs/sif/v2 v2.8.1 h1:whr4Vz12RXfLnYyVGHoD/rD/hbF2g9OW7BJHa+WIqW8=
github.com/sylabs/sif/v2 v2.8.1/go.mod h1:LQOdYXC9a8i7BleTKRw9lohi0rTbXkJOeS9u0ebvgyM=
@@ -1054,8 +1062,8 @@ golang.org/x/sys v0.0.0-20210823070655-63515b42dcdf/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20210908233432-aa78b53d3365/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211110154304-99a53858aa08/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f h1:v4INt8xihDGvnrfjMDVXGxw9wrfxYyCjk0KbXjhR55s=
-golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U=
+golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 h1:JGgROgKl9N8DuW20oFS5gxc+lE67/N3FcwmBPMe7ArY=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
@@ -1137,7 +1145,6 @@ golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/api v0.0.0-20160322025152-9bf6e6e569ff/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0=
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
diff --git a/internal/string_set.go b/internal/string_set.go
new file mode 100644
index 00000000..42f00165
--- /dev/null
+++ b/internal/string_set.go
@@ -0,0 +1,76 @@
+package internal
+
+import (
+ "sort"
+)
+
+type StringSet map[string]struct{}
+
+func NewStringSet(is ...string) StringSet {
+ // TODO: replace with single generic implementation that also incorporates other set implementations
+ s := make(StringSet)
+ s.Add(is...)
+ return s
+}
+
+func (s StringSet) Size() int {
+ return len(s)
+}
+
+func (s StringSet) Merge(other StringSet) {
+ for _, i := range other.List() {
+ s.Add(i)
+ }
+}
+
+func (s StringSet) Add(ids ...string) {
+ for _, i := range ids {
+ s[i] = struct{}{}
+ }
+}
+
+func (s StringSet) Remove(ids ...string) {
+ for _, i := range ids {
+ delete(s, i)
+ }
+}
+
+func (s StringSet) Contains(i string) bool {
+ _, ok := s[i]
+ return ok
+}
+
+func (s StringSet) Clear() {
+ // TODO: replace this with the new 'clear' keyword when it's available in go 1.20 or 1.21
+ for i := range s {
+ delete(s, i)
+ }
+}
+
+func (s StringSet) List() []string {
+ ret := make([]string, 0, len(s))
+ for i := range s {
+ ret = append(ret, i)
+ }
+ return ret
+}
+
+func (s StringSet) Sorted() []string {
+ ids := s.List()
+
+ sort.Slice(ids, func(i, j int) bool {
+ return ids[i] < ids[j]
+ })
+
+ return ids
+}
+
+func (s StringSet) ContainsAny(ids ...string) bool {
+ for _, i := range ids {
+ _, ok := s[i]
+ if ok {
+ return true
+ }
+ }
+ return false
+}
diff --git a/internal/string_set_test.go b/internal/string_set_test.go
new file mode 100644
index 00000000..e04727ae
--- /dev/null
+++ b/internal/string_set_test.go
@@ -0,0 +1,226 @@
+package internal
+
+import (
+ "fmt"
+ "github.com/stretchr/testify/assert"
+ "testing"
+)
+
+func TestStringSet_Size(t *testing.T) {
+ type testCase struct {
+ name string
+ s StringSet
+ want int
+ }
+ tests := []testCase{
+ {
+ name: "empty set",
+ s: NewStringSet(),
+ want: 0,
+ },
+ {
+ name: "non-empty set",
+ s: NewStringSet("items", "in", "set"),
+ want: 3,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ if got := tt.s.Size(); got != tt.want {
+ t.Errorf("Size() = %v, want %v", got, tt.want)
+ }
+ })
+ }
+}
+
+func TestStringSet_Add(t *testing.T) {
+ type args struct {
+ ids []string
+ }
+ type testCase struct {
+ name string
+ s StringSet
+ args args
+ }
+ tests := []testCase{
+ {
+ name: "add multiple",
+ s: NewStringSet(),
+ args: args{ids: []string{"a", "b", "c"}},
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ tt.s.Add(tt.args.ids...)
+ for _, id := range tt.args.ids {
+ if !tt.s.Contains(id) {
+ t.Errorf("expected set to contain %q", id)
+ }
+ }
+ })
+ }
+}
+
+func TestStringSet_Remove(t *testing.T) {
+ type args struct {
+ ids []string
+ }
+ type testCase struct {
+ name string
+ s StringSet
+ args args
+ expected []string
+ }
+ tests := []testCase{
+ {
+ name: "remove multiple",
+ s: NewStringSet("a", "b", "c"),
+ args: args{ids: []string{"a", "b"}},
+ expected: []string{"c"},
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ tt.s.Remove(tt.args.ids...)
+ for _, id := range tt.args.ids {
+ if tt.s.Contains(id) {
+ t.Errorf("expected set to NOT contain %q", id)
+ }
+ }
+ for _, id := range tt.expected {
+ if !tt.s.Contains(id) {
+ t.Errorf("expected set to contain %q", id)
+ }
+ }
+ })
+ }
+}
+
+func TestStringSet_Contains(t *testing.T) {
+ type args struct {
+ i string
+ }
+ type testCase struct {
+ name string
+ s StringSet
+ args args
+ want bool
+ }
+ tests := []testCase{
+ {
+ name: "contains",
+ s: NewStringSet("a", "b", "c"),
+ args: args{i: "a"},
+ want: true,
+ },
+ {
+ name: "not contains",
+ s: NewStringSet("a", "b", "c"),
+ args: args{i: "x"},
+ want: false,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ if got := tt.s.Contains(tt.args.i); got != tt.want {
+ t.Errorf("Contains() = %v, want %v", got, tt.want)
+ }
+ })
+ }
+}
+
+func TestStringSet_Clear(t *testing.T) {
+ type testCase struct {
+ name string
+ s StringSet
+ }
+ tests := []testCase{
+ {
+ name: "go case",
+ s: NewStringSet("a", "b", "c"),
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ tt.s.Clear()
+ assert.Equal(t, 0, tt.s.Size())
+ })
+ }
+}
+
+func TestStringSet_List(t *testing.T) {
+ type testCase struct {
+ name string
+ s StringSet
+ want []string
+ }
+ tests := []testCase{
+ {
+ name: "go case",
+ s: NewStringSet("a", "b", "c"),
+ want: []string{"a", "b", "c"},
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ assert.ElementsMatchf(t, tt.want, tt.s.List(), "List()")
+ })
+ }
+}
+
+func TestStringSet_Sorted(t *testing.T) {
+ type testCase struct {
+ name string
+ s StringSet
+ want []string
+ }
+ tests := []testCase{
+ {
+ name: "go case",
+ s: NewStringSet("a", "b", "c"),
+ want: []string{"a", "b", "c"},
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ assert.Equalf(t, tt.want, tt.s.Sorted(), "Sorted()")
+ })
+ }
+}
+
+func TestStringSet_ContainsAny(t *testing.T) {
+ type args struct {
+ ids []string
+ }
+ type testCase struct {
+ name string
+ s StringSet
+ args args
+ want bool
+ }
+ tests := []testCase{
+ {
+ name: "contains one",
+ s: NewStringSet("a", "b", "c"),
+ args: args{ids: []string{"a", "x"}},
+ want: true,
+ },
+ {
+ name: "contains all",
+ s: NewStringSet("a", "b", "c"),
+ args: args{ids: []string{"a", "b"}},
+ want: true,
+ },
+ {
+ name: "contains none",
+ s: NewStringSet("a", "b", "c"),
+ args: args{ids: []string{"x", "y"}},
+ want: false,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ assert.Equal(t, tt.want, tt.s.ContainsAny(tt.args.ids...), fmt.Sprintf("ContainsAny(%v)", tt.args.ids))
+ })
+ }
+}
diff --git a/internal/stringset.go b/internal/stringset.go
deleted file mode 100644
index 327312b0..00000000
--- a/internal/stringset.go
+++ /dev/null
@@ -1,38 +0,0 @@
-package internal
-
-import "sort"
-
-type Set map[string]struct{}
-
-func NewStringSet(start ...string) Set {
- ret := make(Set)
- for _, s := range start {
- ret.Add(s)
- }
- return ret
-}
-
-func (s Set) Add(i string) {
- s[i] = struct{}{}
-}
-
-func (s Set) Remove(i string) {
- delete(s, i)
-}
-
-func (s Set) Contains(i string) bool {
- _, ok := s[i]
- return ok
-}
-
-// ToSlice returns a sorted slice of strings that are contained within the set.
-func (s Set) ToSlice() []string {
- ret := make([]string, len(s))
- idx := 0
- for v := range s {
- ret[idx] = v
- idx++
- }
- sort.Strings(ret)
- return ret
-}
diff --git a/pkg/file/get_xid.go b/pkg/file/get_xid.go
new file mode 100644
index 00000000..29a7b02a
--- /dev/null
+++ b/pkg/file/get_xid.go
@@ -0,0 +1,20 @@
+//go:build !windows
+
+package file
+
+import (
+ "os"
+ "syscall"
+)
+
+// getXid is the UID GID system info for unix
+func getXid(info os.FileInfo) (uid, gid int) {
+ uid = -1
+ gid = -1
+ if stat, ok := info.Sys().(*syscall.Stat_t); ok {
+ uid = int(stat.Uid)
+ gid = int(stat.Gid)
+ }
+
+ return uid, gid
+}
diff --git a/pkg/file/get_xid_win.go b/pkg/file/get_xid_win.go
new file mode 100644
index 00000000..abe28de8
--- /dev/null
+++ b/pkg/file/get_xid_win.go
@@ -0,0 +1,12 @@
+//go:build windows
+
+package file
+
+import (
+ "os"
+)
+
+// getXid is a placeholder for windows file information
+func getXid(info os.FileInfo) (uid, gid int) {
+ return -1, -1
+}
diff --git a/pkg/file/id.go b/pkg/file/id.go
new file mode 100644
index 00000000..75f05dc5
--- /dev/null
+++ b/pkg/file/id.go
@@ -0,0 +1,20 @@
+package file
+
+var nextID = 0 // note: this is governed by the reference constructor
+
+// ID is used for file tree manipulation to uniquely identify tree nodes.
+type ID uint64
+
+type IDs []ID
+
+func (ids IDs) Len() int {
+ return len(ids)
+}
+
+func (ids IDs) Less(i, j int) bool {
+ return ids[i] < ids[j]
+}
+
+func (ids IDs) Swap(i, j int) {
+ ids[i], ids[j] = ids[j], ids[i]
+}
diff --git a/pkg/file/id_set.go b/pkg/file/id_set.go
new file mode 100644
index 00000000..eebe00fa
--- /dev/null
+++ b/pkg/file/id_set.go
@@ -0,0 +1,75 @@
+//nolint:dupl
+package file
+
+import "sort"
+
+type IDSet map[ID]struct{}
+
+func NewIDSet(is ...ID) IDSet {
+ // TODO: replace with single generic implementation that also incorporates other set implementations
+ s := make(IDSet)
+ s.Add(is...)
+ return s
+}
+
+func (s IDSet) Size() int {
+ return len(s)
+}
+
+func (s IDSet) Merge(other IDSet) {
+ for _, i := range other.List() {
+ s.Add(i)
+ }
+}
+
+func (s IDSet) Add(ids ...ID) {
+ for _, i := range ids {
+ s[i] = struct{}{}
+ }
+}
+
+func (s IDSet) Remove(ids ...ID) {
+ for _, i := range ids {
+ delete(s, i)
+ }
+}
+
+func (s IDSet) Contains(i ID) bool {
+ _, ok := s[i]
+ return ok
+}
+
+func (s IDSet) Clear() {
+ // TODO: replace this with the new 'clear' keyword when it's available in go 1.20 or 1.21
+ for i := range s {
+ delete(s, i)
+ }
+}
+
+func (s IDSet) List() []ID {
+ ret := make([]ID, 0, len(s))
+ for i := range s {
+ ret = append(ret, i)
+ }
+ return ret
+}
+
+func (s IDSet) Sorted() []ID {
+ ids := s.List()
+
+ sort.Slice(ids, func(i, j int) bool {
+ return ids[i] < ids[j]
+ })
+
+ return ids
+}
+
+func (s IDSet) ContainsAny(ids ...ID) bool {
+ for _, i := range ids {
+ _, ok := s[i]
+ if ok {
+ return true
+ }
+ }
+ return false
+}
diff --git a/pkg/file/id_set_test.go b/pkg/file/id_set_test.go
new file mode 100644
index 00000000..b0d146db
--- /dev/null
+++ b/pkg/file/id_set_test.go
@@ -0,0 +1,226 @@
+package file
+
+import (
+ "fmt"
+ "github.com/stretchr/testify/assert"
+ "testing"
+)
+
+func TestIDSet_Size(t *testing.T) {
+ type testCase struct {
+ name string
+ s IDSet
+ want int
+ }
+ tests := []testCase{
+ {
+ name: "empty set",
+ s: NewIDSet(),
+ want: 0,
+ },
+ {
+ name: "non-empty set",
+ s: NewIDSet(1, 2, 3),
+ want: 3,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ if got := tt.s.Size(); got != tt.want {
+ t.Errorf("Size() = %v, want %v", got, tt.want)
+ }
+ })
+ }
+}
+
+func TestIDSet_Add(t *testing.T) {
+ type args struct {
+ ids []ID
+ }
+ type testCase struct {
+ name string
+ s IDSet
+ args args
+ }
+ tests := []testCase{
+ {
+ name: "add multiple",
+ s: NewIDSet(),
+ args: args{ids: []ID{1, 2, 3}},
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ tt.s.Add(tt.args.ids...)
+ for _, id := range tt.args.ids {
+ if !tt.s.Contains(id) {
+ t.Errorf("expected set to contain %q", id)
+ }
+ }
+ })
+ }
+}
+
+func TestIDSet_Remove(t *testing.T) {
+ type args struct {
+ ids []ID
+ }
+ type testCase struct {
+ name string
+ s IDSet
+ args args
+ expected []ID
+ }
+ tests := []testCase{
+ {
+ name: "remove multiple",
+ s: NewIDSet(1, 2, 3),
+ args: args{ids: []ID{1, 2}},
+ expected: []ID{3},
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ tt.s.Remove(tt.args.ids...)
+ for _, id := range tt.args.ids {
+ if tt.s.Contains(id) {
+ t.Errorf("expected set to NOT contain %q", id)
+ }
+ }
+ for _, id := range tt.expected {
+ if !tt.s.Contains(id) {
+ t.Errorf("expected set to contain %q", id)
+ }
+ }
+ })
+ }
+}
+
+func TestIDSet_Contains(t *testing.T) {
+ type args struct {
+ i ID
+ }
+ type testCase struct {
+ name string
+ s IDSet
+ args args
+ want bool
+ }
+ tests := []testCase{
+ {
+ name: "contains",
+ s: NewIDSet(1, 2, 3),
+ args: args{i: 1},
+ want: true,
+ },
+ {
+ name: "not contains",
+ s: NewIDSet(1, 2, 3),
+ args: args{i: 97},
+ want: false,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ if got := tt.s.Contains(tt.args.i); got != tt.want {
+ t.Errorf("Contains() = %v, want %v", got, tt.want)
+ }
+ })
+ }
+}
+
+func TestIDSet_Clear(t *testing.T) {
+ type testCase struct {
+ name string
+ s IDSet
+ }
+ tests := []testCase{
+ {
+ name: "go case",
+ s: NewIDSet(1, 2, 3),
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ tt.s.Clear()
+ assert.Equal(t, 0, tt.s.Size())
+ })
+ }
+}
+
+func TestIDSet_List(t *testing.T) {
+ type testCase struct {
+ name string
+ s IDSet
+ want []ID
+ }
+ tests := []testCase{
+ {
+ name: "go case",
+ s: NewIDSet(1, 2, 3),
+ want: []ID{1, 2, 3},
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ assert.ElementsMatchf(t, tt.want, tt.s.List(), "List()")
+ })
+ }
+}
+
+func TestIDSet_Sorted(t *testing.T) {
+ type testCase struct {
+ name string
+ s IDSet
+ want []ID
+ }
+ tests := []testCase{
+ {
+ name: "go case",
+ s: NewIDSet(1, 2, 3),
+ want: []ID{1, 2, 3},
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ assert.Equalf(t, tt.want, tt.s.Sorted(), "Sorted()")
+ })
+ }
+}
+
+func TestIDSet_ContainsAny(t *testing.T) {
+ type args struct {
+ ids []ID
+ }
+ type testCase struct {
+ name string
+ s IDSet
+ args args
+ want bool
+ }
+ tests := []testCase{
+ {
+ name: "contains one",
+ s: NewIDSet(1, 2, 3),
+ args: args{ids: []ID{1, 97}},
+ want: true,
+ },
+ {
+ name: "contains all",
+ s: NewIDSet(1, 2, 3),
+ args: args{ids: []ID{1, 2}},
+ want: true,
+ },
+ {
+ name: "contains none",
+ s: NewIDSet(1, 2, 3),
+ args: args{ids: []ID{97, 98}},
+ want: false,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ assert.Equal(t, tt.want, tt.s.ContainsAny(tt.args.ids...), fmt.Sprintf("ContainsAny(%v)", tt.args.ids))
+ })
+ }
+}
diff --git a/pkg/file/metadata.go b/pkg/file/metadata.go
index 1cbd0d92..990e7a4f 100644
--- a/pkg/file/metadata.go
+++ b/pkg/file/metadata.go
@@ -7,6 +7,8 @@ import (
"path"
"path/filepath"
+ "github.com/anchore/stereoscope/internal/log"
+
"github.com/sylabs/squashfs"
)
@@ -14,36 +16,29 @@ import (
type Metadata struct {
// Path is the absolute path representation to the file
Path string
- // TarHeaderName is the exact entry name as found within a tar header
- TarHeaderName string
- // TarSequence is the nth header in the tar file this entry was found
- TarSequence int64
- // Linkname is populated only for hardlinks / symlinks, can be an absolute or relative
- Linkname string
+ // LinkDestination is populated only for hardlinks / symlinks, can be an absolute or relative
+ LinkDestination string
// Size of the file in bytes
- Size int64
- UserID int
- GroupID int
- // TypeFlag is the tar.TypeFlag entry for the file
- TypeFlag byte
+ Size int64
+ UserID int
+ GroupID int
+ Type Type
IsDir bool
Mode os.FileMode
MIMEType string
}
-func NewMetadata(header tar.Header, sequence int64, content io.Reader) Metadata {
+func NewMetadata(header tar.Header, content io.Reader) Metadata {
return Metadata{
- Path: path.Clean(DirSeparator + header.Name),
- TarHeaderName: header.Name,
- TarSequence: sequence,
- TypeFlag: header.Typeflag,
- Linkname: header.Linkname,
- Size: header.FileInfo().Size(),
- Mode: header.FileInfo().Mode(),
- UserID: header.Uid,
- GroupID: header.Gid,
- IsDir: header.FileInfo().IsDir(),
- MIMEType: MIMEType(content),
+ Path: path.Clean(DirSeparator + header.Name),
+ Type: TypeFromTarType(header.Typeflag),
+ LinkDestination: header.Linkname,
+ Size: header.FileInfo().Size(),
+ Mode: header.FileInfo().Mode(),
+ UserID: header.Uid,
+ GroupID: header.Gid,
+ IsDir: header.FileInfo().IsDir(),
+ MIMEType: MIMEType(content),
}
}
@@ -54,12 +49,37 @@ func NewMetadataFromSquashFSFile(path string, f *squashfs.File) (Metadata, error
return Metadata{}, err
}
+ var ty Type
+ switch {
+ case fi.IsDir():
+ ty = TypeDirectory
+ case f.IsRegular():
+ ty = TypeRegular
+ case f.IsSymlink():
+ ty = TypeSymLink
+ default:
+ switch fi.Mode() & os.ModeType {
+ case os.ModeNamedPipe:
+ ty = TypeFIFO
+ case os.ModeSocket:
+ ty = TypeSocket
+ case os.ModeDevice:
+ ty = TypeBlockDevice
+ case os.ModeCharDevice:
+ ty = TypeCharacterDevice
+ case os.ModeIrregular:
+ ty = TypeIrregular
+ }
+ // note: cannot determine hardlink from squashfs.File (but case us not possible)
+ }
+
md := Metadata{
- Path: filepath.Clean(filepath.Join("/", path)),
- Linkname: f.SymlinkPath(),
- Size: fi.Size(),
- IsDir: f.IsDir(),
- Mode: fi.Mode(),
+ Path: filepath.Clean(filepath.Join("/", path)),
+ LinkDestination: f.SymlinkPath(),
+ Size: fi.Size(),
+ IsDir: f.IsDir(),
+ Mode: fi.Mode(),
+ Type: ty,
}
if f.IsRegular() {
@@ -68,3 +88,38 @@ func NewMetadataFromSquashFSFile(path string, f *squashfs.File) (Metadata, error
return md, nil
}
+
+func NewMetadataFromPath(path string, info os.FileInfo) Metadata {
+ var mimeType string
+ uid, gid := getXid(info)
+
+ ty := TypeFromMode(info.Mode())
+
+ if ty == TypeRegular {
+ f, err := os.Open(path)
+ if err != nil {
+ // TODO: it may be that the file is inaccessible, however, this is not an error or a warning. In the future we need to track these as known-unknowns
+ f = nil
+ } else {
+ defer func() {
+ if err := f.Close(); err != nil {
+ log.Warnf("unable to close file while obtaining metadata: %s", path)
+ }
+ }()
+ }
+
+ mimeType = MIMEType(f)
+ }
+
+ return Metadata{
+ Path: path,
+ Mode: info.Mode(),
+ Type: ty,
+ // unsupported across platforms
+ UserID: uid,
+ GroupID: gid,
+ Size: info.Size(),
+ MIMEType: mimeType,
+ IsDir: info.IsDir(),
+ }
+}
diff --git a/pkg/file/metadata_test.go b/pkg/file/metadata_test.go
index b89ed431..aa60bc95 100644
--- a/pkg/file/metadata_test.go
+++ b/pkg/file/metadata_test.go
@@ -4,6 +4,8 @@
package file
import (
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
"io"
"os"
"strings"
@@ -16,13 +18,13 @@ func TestFileMetadataFromTar(t *testing.T) {
tarReader := getTarFixture(t, "fixture-1")
expected := []Metadata{
- {Path: "/path", TarSequence: 0, TarHeaderName: "path/", TypeFlag: 53, Linkname: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""},
- {Path: "/path/branch", TarSequence: 1, TarHeaderName: "path/branch/", TypeFlag: 53, Linkname: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""},
- {Path: "/path/branch/one", TarSequence: 2, TarHeaderName: "path/branch/one/", TypeFlag: 53, Linkname: "", Size: 0, Mode: os.ModeDir | 0o700, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""},
- {Path: "/path/branch/one/file-1.txt", TarSequence: 3, TarHeaderName: "path/branch/one/file-1.txt", TypeFlag: 48, Linkname: "", Size: 11, Mode: 0o700, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"},
- {Path: "/path/branch/two", TarSequence: 4, TarHeaderName: "path/branch/two/", TypeFlag: 53, Linkname: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""},
- {Path: "/path/branch/two/file-2.txt", TarSequence: 5, TarHeaderName: "path/branch/two/file-2.txt", TypeFlag: 48, Linkname: "", Size: 12, Mode: 0o755, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"},
- {Path: "/path/file-3.txt", TarSequence: 6, TarHeaderName: "path/file-3.txt", TypeFlag: 48, Linkname: "", Size: 11, Mode: 0o664, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"},
+ {Path: "/path", Type: TypeDirectory, LinkDestination: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""},
+ {Path: "/path/branch", Type: TypeDirectory, LinkDestination: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""},
+ {Path: "/path/branch/one", Type: TypeDirectory, LinkDestination: "", Size: 0, Mode: os.ModeDir | 0o700, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""},
+ {Path: "/path/branch/one/file-1.txt", Type: TypeRegular, LinkDestination: "", Size: 11, Mode: 0o700, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"},
+ {Path: "/path/branch/two", Type: TypeDirectory, LinkDestination: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""},
+ {Path: "/path/branch/two/file-2.txt", Type: TypeRegular, LinkDestination: "", Size: 12, Mode: 0o755, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"},
+ {Path: "/path/file-3.txt", Type: TypeRegular, LinkDestination: "", Size: 11, Mode: 0o664, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"},
}
var actual []Metadata
@@ -31,7 +33,7 @@ func TestFileMetadataFromTar(t *testing.T) {
if strings.HasSuffix(entry.Header.Name, ".txt") {
contents = strings.NewReader("#!/usr/bin/env bash\necho 'awesome script'")
}
- actual = append(actual, NewMetadata(entry.Header, entry.Sequence, contents))
+ actual = append(actual, NewMetadata(entry.Header, contents))
return nil
}
@@ -43,3 +45,43 @@ func TestFileMetadataFromTar(t *testing.T) {
t.Errorf("diff: %s", d)
}
}
+
+func TestFileMetadataFromPath(t *testing.T) {
+
+ tests := []struct {
+ path string
+ expectedType Type
+ expectedMIMEType string
+ }{
+ {
+ path: "test-fixtures/symlinks-simple/readme",
+ expectedType: TypeRegular,
+ expectedMIMEType: "text/plain",
+ },
+ {
+ path: "test-fixtures/symlinks-simple/link_to_new_readme",
+ expectedType: TypeSymLink,
+ expectedMIMEType: "",
+ },
+ {
+ path: "test-fixtures/symlinks-simple/link_to_link_to_new_readme",
+ expectedType: TypeSymLink,
+ expectedMIMEType: "",
+ },
+ {
+ path: "test-fixtures/symlinks-simple",
+ expectedType: TypeDirectory,
+ expectedMIMEType: "",
+ },
+ }
+ for _, test := range tests {
+ t.Run(test.path, func(t *testing.T) {
+ info, err := os.Lstat(test.path)
+ require.NoError(t, err)
+
+ actual := NewMetadataFromPath(test.path, info)
+ assert.Equal(t, test.expectedMIMEType, actual.MIMEType, "unexpected MIME type for %s", test.path)
+ assert.Equal(t, test.expectedType, actual.Type, "unexpected type for %s", test.path)
+ })
+ }
+}
diff --git a/pkg/file/path_set.go b/pkg/file/path_set.go
index fe7280fc..a46f342a 100644
--- a/pkg/file/path_set.go
+++ b/pkg/file/path_set.go
@@ -1,20 +1,77 @@
+//nolint:dupl
package file
+import (
+ "sort"
+)
+
type PathSet map[Path]struct{}
-func NewPathSet() PathSet {
- return make(PathSet)
+func NewPathSet(is ...Path) PathSet {
+ // TODO: replace with single generic implementation that also incorporates other set implementations
+ s := make(PathSet)
+ s.Add(is...)
+ return s
+}
+
+func (s PathSet) Size() int {
+ return len(s)
+}
+
+func (s PathSet) Merge(other PathSet) {
+ for _, i := range other.List() {
+ s.Add(i)
+ }
}
-func (s PathSet) Add(i Path) {
- s[i] = struct{}{}
+func (s PathSet) Add(ids ...Path) {
+ for _, i := range ids {
+ s[i] = struct{}{}
+ }
}
-func (s PathSet) Remove(i Path) {
- delete(s, i)
+func (s PathSet) Remove(ids ...Path) {
+ for _, i := range ids {
+ delete(s, i)
+ }
}
func (s PathSet) Contains(i Path) bool {
_, ok := s[i]
return ok
}
+
+func (s PathSet) Clear() {
+ // TODO: replace this with the new 'clear' keyword when it's available in go 1.20 or 1.21
+ for i := range s {
+ delete(s, i)
+ }
+}
+
+func (s PathSet) List() []Path {
+ ret := make([]Path, 0, len(s))
+ for i := range s {
+ ret = append(ret, i)
+ }
+ return ret
+}
+
+func (s PathSet) Sorted() []Path {
+ ids := s.List()
+
+ sort.Slice(ids, func(i, j int) bool {
+ return ids[i] < ids[j]
+ })
+
+ return ids
+}
+
+func (s PathSet) ContainsAny(ids ...Path) bool {
+ for _, i := range ids {
+ _, ok := s[i]
+ if ok {
+ return true
+ }
+ }
+ return false
+}
diff --git a/pkg/file/path_set_test.go b/pkg/file/path_set_test.go
new file mode 100644
index 00000000..5d296649
--- /dev/null
+++ b/pkg/file/path_set_test.go
@@ -0,0 +1,226 @@
+package file
+
+import (
+ "fmt"
+ "github.com/stretchr/testify/assert"
+ "testing"
+)
+
+func TestPathSet_Size(t *testing.T) {
+ type testCase struct {
+ name string
+ s PathSet
+ want int
+ }
+ tests := []testCase{
+ {
+ name: "empty set",
+ s: NewPathSet(),
+ want: 0,
+ },
+ {
+ name: "non-empty set",
+ s: NewPathSet("items", "in", "set"),
+ want: 3,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ if got := tt.s.Size(); got != tt.want {
+ t.Errorf("Size() = %v, want %v", got, tt.want)
+ }
+ })
+ }
+}
+
+func TestPathSet_Add(t *testing.T) {
+ type args struct {
+ ids []Path
+ }
+ type testCase struct {
+ name string
+ s PathSet
+ args args
+ }
+ tests := []testCase{
+ {
+ name: "add multiple",
+ s: NewPathSet(),
+ args: args{ids: []Path{"a", "b", "c"}},
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ tt.s.Add(tt.args.ids...)
+ for _, id := range tt.args.ids {
+ if !tt.s.Contains(id) {
+ t.Errorf("expected set to contain %q", id)
+ }
+ }
+ })
+ }
+}
+
+func TestPathSet_Remove(t *testing.T) {
+ type args struct {
+ ids []Path
+ }
+ type testCase struct {
+ name string
+ s PathSet
+ args args
+ expected []Path
+ }
+ tests := []testCase{
+ {
+ name: "remove multiple",
+ s: NewPathSet("a", "b", "c"),
+ args: args{ids: []Path{"a", "b"}},
+ expected: []Path{"c"},
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ tt.s.Remove(tt.args.ids...)
+ for _, id := range tt.args.ids {
+ if tt.s.Contains(id) {
+ t.Errorf("expected set to NOT contain %q", id)
+ }
+ }
+ for _, id := range tt.expected {
+ if !tt.s.Contains(id) {
+ t.Errorf("expected set to contain %q", id)
+ }
+ }
+ })
+ }
+}
+
+func TestPathSet_Contains(t *testing.T) {
+ type args struct {
+ i Path
+ }
+ type testCase struct {
+ name string
+ s PathSet
+ args args
+ want bool
+ }
+ tests := []testCase{
+ {
+ name: "contains",
+ s: NewPathSet("a", "b", "c"),
+ args: args{i: "a"},
+ want: true,
+ },
+ {
+ name: "not contains",
+ s: NewPathSet("a", "b", "c"),
+ args: args{i: "x"},
+ want: false,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ if got := tt.s.Contains(tt.args.i); got != tt.want {
+ t.Errorf("Contains() = %v, want %v", got, tt.want)
+ }
+ })
+ }
+}
+
+func TestPathSet_Clear(t *testing.T) {
+ type testCase struct {
+ name string
+ s PathSet
+ }
+ tests := []testCase{
+ {
+ name: "go case",
+ s: NewPathSet("a", "b", "c"),
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ tt.s.Clear()
+ assert.Equal(t, 0, tt.s.Size())
+ })
+ }
+}
+
+func TestPathSet_List(t *testing.T) {
+ type testCase struct {
+ name string
+ s PathSet
+ want []Path
+ }
+ tests := []testCase{
+ {
+ name: "go case",
+ s: NewPathSet("a", "b", "c"),
+ want: []Path{"a", "b", "c"},
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ assert.ElementsMatchf(t, tt.want, tt.s.List(), "List()")
+ })
+ }
+}
+
+func TestPathSet_Sorted(t *testing.T) {
+ type testCase struct {
+ name string
+ s PathSet
+ want []Path
+ }
+ tests := []testCase{
+ {
+ name: "go case",
+ s: NewPathSet("a", "b", "c"),
+ want: []Path{"a", "b", "c"},
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ assert.Equalf(t, tt.want, tt.s.Sorted(), "Sorted()")
+ })
+ }
+}
+
+func TestPathSet_ContainsAny(t *testing.T) {
+ type args struct {
+ ids []Path
+ }
+ type testCase struct {
+ name string
+ s PathSet
+ args args
+ want bool
+ }
+ tests := []testCase{
+ {
+ name: "contains one",
+ s: NewPathSet("a", "b", "c"),
+ args: args{ids: []Path{"a", "x"}},
+ want: true,
+ },
+ {
+ name: "contains all",
+ s: NewPathSet("a", "b", "c"),
+ args: args{ids: []Path{"a", "b"}},
+ want: true,
+ },
+ {
+ name: "contains none",
+ s: NewPathSet("a", "b", "c"),
+ args: args{ids: []Path{"x", "y"}},
+ want: false,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ assert.Equal(t, tt.want, tt.s.ContainsAny(tt.args.ids...), fmt.Sprintf("ContainsAny(%v)", tt.args.ids))
+ })
+ }
+}
diff --git a/pkg/file/reference.go b/pkg/file/reference.go
index 47ed8ae9..1109227d 100644
--- a/pkg/file/reference.go
+++ b/pkg/file/reference.go
@@ -1,13 +1,6 @@
package file
-import (
- "fmt"
-)
-
-var nextID = 0
-
-// ID is used for file tree manipulation to uniquely identify tree nodes.
-type ID uint64
+import "fmt"
// Reference represents a unique file. This is useful when path is not good enough (i.e. you have the same file path for two files in two different container image layers, and you need to be able to distinguish them apart)
type Reference struct {
diff --git a/pkg/file/resolution.go b/pkg/file/resolution.go
new file mode 100644
index 00000000..5f10c719
--- /dev/null
+++ b/pkg/file/resolution.go
@@ -0,0 +1,158 @@
+package file
+
+import (
+ "sort"
+
+ "github.com/scylladb/go-set/strset"
+)
+
+// Resolution represents the fetching of a possibly non-existent file via a request path.
+type Resolution struct {
+ RequestPath Path
+ *Reference
+ // LinkResolutions represents the traversal through the filesystem to access to current reference, including all symlink and hardlink resolution.
+ // note: today this only shows resolutions via the basename of the request path, but in the future it may show all resolutions.
+ LinkResolutions []Resolution
+}
+
+type Resolutions []Resolution
+
+// NewResolution create a new Resolution for the given request path, showing the resolved reference (or
+// nil if it does not exist), and the link resolution of the basename of the request path transitively.
+func NewResolution(path Path, ref *Reference, leafs []Resolution) *Resolution {
+ return &Resolution{
+ RequestPath: path,
+ Reference: ref,
+ LinkResolutions: leafs,
+ }
+}
+
+func (f Resolutions) Len() int {
+ return len(f)
+}
+
+func (f Resolutions) Less(i, j int) bool {
+ ith := f[i]
+ jth := f[j]
+
+ ithIsReal := ith.Reference != nil && ith.Reference.RealPath == ith.RequestPath
+ jthIsReal := jth.Reference != nil && jth.Reference.RealPath == jth.RequestPath
+
+ switch {
+ case ithIsReal && !jthIsReal:
+ return true
+ case !ithIsReal && jthIsReal:
+ return false
+ }
+
+ return ith.RequestPath < jth.RequestPath
+}
+
+func (f Resolutions) Swap(i, j int) {
+ f[i], f[j] = f[j], f[i]
+}
+
+func (f *Resolution) HasReference() bool {
+ if f == nil {
+ return false
+ }
+ return f.Reference != nil
+}
+
+func (f *Resolution) AllPaths() []Path {
+ set := strset.New()
+ set.Add(string(f.RequestPath))
+ if f.Reference != nil {
+ set.Add(string(f.Reference.RealPath))
+ }
+ for _, p := range f.LinkResolutions {
+ set.Add(string(p.RequestPath))
+ if p.Reference != nil {
+ set.Add(string(p.Reference.RealPath))
+ }
+ }
+
+ paths := set.List()
+ sort.Strings(paths)
+
+ var results []Path
+ for _, p := range paths {
+ results = append(results, Path(p))
+ }
+ return results
+}
+
+func (f *Resolution) AllRequestPaths() []Path {
+ set := strset.New()
+ set.Add(string(f.RequestPath))
+ for _, p := range f.LinkResolutions {
+ set.Add(string(p.RequestPath))
+ }
+
+ paths := set.List()
+ sort.Strings(paths)
+
+ var results []Path
+ for _, p := range paths {
+ results = append(results, Path(p))
+ }
+ return results
+}
+
+// RequestResolutionPath represents the traversal through the filesystem to access to current reference, including all symlink and hardlink resolution.
+func (f *Resolution) RequestResolutionPath() []Path {
+ var paths []Path
+ var firstPath Path
+ var lastLinkResolutionIsDead bool
+
+ if string(f.RequestPath) != "" {
+ firstPath = f.RequestPath
+ paths = append(paths, f.RequestPath)
+ }
+ for i, p := range f.LinkResolutions {
+ if i == 0 && p.RequestPath == f.RequestPath {
+ // ignore link resolution that starts with the same user requested path
+ continue
+ }
+ if firstPath == "" {
+ firstPath = p.RequestPath
+ }
+
+ paths = append(paths, p.RequestPath)
+
+ if i == len(f.LinkResolutions)-1 {
+ // we've reached the final link resolution
+ if p.Reference == nil {
+ lastLinkResolutionIsDead = true
+ }
+ }
+ }
+ if f.HasReference() && firstPath != f.Reference.RealPath && !lastLinkResolutionIsDead {
+ // we've reached the final reference that was resolved
+ // we should only do this if there was a link resolution
+ paths = append(paths, f.Reference.RealPath)
+ }
+ return paths
+}
+
+// References represents the traversal through the filesystem to access to current reference, including all symlink and hardlink resolution.
+func (f *Resolution) References() []Reference {
+ var refs []Reference
+ var lastLinkResolutionIsDead bool
+
+ for i, p := range f.LinkResolutions {
+ if p.Reference != nil {
+ refs = append(refs, *p.Reference)
+ }
+ if i == len(f.LinkResolutions)-1 {
+ // we've reached the final link resolution
+ if p.Reference == nil {
+ lastLinkResolutionIsDead = true
+ }
+ }
+ }
+ if f.Reference != nil && !lastLinkResolutionIsDead {
+ refs = append(refs, *f.Reference)
+ }
+ return refs
+}
diff --git a/pkg/file/resolution_test.go b/pkg/file/resolution_test.go
new file mode 100644
index 00000000..3a444950
--- /dev/null
+++ b/pkg/file/resolution_test.go
@@ -0,0 +1,391 @@
+package file
+
+import (
+ "github.com/stretchr/testify/assert"
+ "sort"
+ "testing"
+)
+
+func TestResolution_Less(t *testing.T) {
+
+ realA := Resolution{
+ RequestPath: "/parent/a",
+ Reference: &Reference{
+ RealPath: "/parent/a",
+ },
+ }
+
+ realB := Resolution{
+ RequestPath: "/parent/b",
+ Reference: &Reference{
+ RealPath: "/parent/b",
+ },
+ }
+
+ linkToA := Resolution{
+ RequestPath: "/parent-link/a",
+ Reference: &Reference{
+ RealPath: "/a",
+ },
+ }
+
+ linkToB := Resolution{
+ RequestPath: "/parent-link/b",
+ Reference: &Reference{
+ RealPath: "/b",
+ },
+ }
+
+ tests := []struct {
+ name string
+ subject []Resolution
+ want []Resolution
+ }{
+ {
+ name: "references to real files are preferred first",
+ subject: []Resolution{
+ linkToA,
+ realA,
+ },
+ want: []Resolution{
+ realA,
+ linkToA,
+ },
+ },
+ {
+ name: "real files are treated equally by request name",
+ subject: []Resolution{
+ realB,
+ realA,
+ },
+ want: []Resolution{
+ realA,
+ realB,
+ },
+ },
+ {
+ name: "link files are treated equally by request name",
+ subject: []Resolution{
+ linkToB,
+ linkToA,
+ },
+ want: []Resolution{
+ linkToA,
+ linkToB,
+ },
+ },
+ {
+ name: "regression",
+ subject: []Resolution{
+ {
+
+ RequestPath: "/parent-link/file-4.txt",
+ Reference: &Reference{
+ RealPath: "/parent/file-4.txt",
+ },
+ },
+ {
+
+ RequestPath: "/parent/file-4.txt",
+ Reference: &Reference{
+ RealPath: "/parent/file-4.txt",
+ },
+ },
+ },
+ want: []Resolution{
+ {
+ RequestPath: "/parent/file-4.txt",
+ Reference: &Reference{
+ RealPath: "/parent/file-4.txt",
+ },
+ },
+ {
+
+ RequestPath: "/parent-link/file-4.txt",
+ Reference: &Reference{
+ RealPath: "/parent/file-4.txt",
+ },
+ },
+ },
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ sort.Sort(Resolutions(tt.subject))
+ assert.Equal(t, tt.want, tt.subject)
+ })
+ }
+}
+
+func TestResolution_RequestResolutionPath(t *testing.T) {
+ tests := []struct {
+ name string
+ subject Resolution
+ want []Path
+ }{
+ {
+ name: "empty",
+ subject: Resolution{
+ LinkResolutions: nil,
+ },
+ want: nil,
+ },
+ {
+ name: "single ref",
+ subject: Resolution{
+ RequestPath: "/home/wagoodman/file.txt",
+ Reference: &Reference{
+ id: 1,
+ RealPath: "/home/wagoodman/file.txt",
+ },
+ LinkResolutions: nil,
+ },
+ want: []Path{
+ "/home/wagoodman/file.txt",
+ },
+ },
+ {
+ // /home -> /another/place
+ name: "ref with 1 leaf link resolutions",
+ subject: Resolution{
+ RequestPath: "/home",
+ Reference: &Reference{RealPath: "/another/place"},
+ LinkResolutions: []Resolution{
+ {
+ RequestPath: "/home",
+ Reference: &Reference{RealPath: "/home"},
+ },
+ },
+ },
+ want: []Path{
+ "/home",
+ "/another/place",
+ },
+ },
+ {
+ // /home/wagoodman/file.txt -> /place/wagoodman/file.txt -> /1/file.txt -> /2/real-file.txt
+
+ // this is the current state of the filetree
+ // .
+ // ├── 1
+ // │ ├── file.txt -> 2/real-file.txt
+ // │ └── link-to-place -> place
+ // ├── 2
+ // │ └── real-file.txt
+ // ├── home -> link-to-1/link-to-place
+ // ├── link-to-1 -> 1
+ // └── place
+ // └── wagoodman
+ // └── file.txt -> link-to-1/file.txt
+
+ name: "ref with 2 leaf link resolutions",
+ subject: Resolution{
+ RequestPath: "/home/wagoodman/file.txt",
+ Reference: &Reference{RealPath: "/2/real-file.txt"},
+ LinkResolutions: []Resolution{
+ {
+ RequestPath: "/place/wagoodman/file.txt",
+ Reference: &Reference{RealPath: "/place/wagoodman/file.txt"},
+ },
+ {
+ RequestPath: "/1/file.txt",
+ Reference: &Reference{RealPath: "/1/file.txt"},
+ },
+ },
+ },
+ want: []Path{
+ "/home/wagoodman/file.txt", // request
+ "/place/wagoodman/file.txt", // real intermediate path
+ "/1/file.txt", // real intermediate path
+ "/2/real-file.txt", // final resolved path on the reference
+ },
+ },
+ {
+ // /home/wagoodman/file.txt -> /place/wagoodman/file.txt -> /1/file.txt -> /2/real-file.txt
+
+ // this is the current state of the filetree
+ // .
+ // ├── 1
+ // │ ├── file.txt -> 2/real-file.txt
+ // │ └── link-to-place -> place
+ // ├── home -> link-to-1/link-to-place
+ // ├── link-to-1 -> 1
+ // └── place
+ // └── wagoodman
+ // └── file.txt -> link-to-1/file.txt
+
+ name: "ref with dead link",
+ subject: Resolution{
+ RequestPath: "/home/wagoodman/file.txt",
+ // note: this falls back to the last path that exists which is the behavior for link resolution options:
+ // []LinkResolutionOption{FollowBasenameLinks, DoNotFollowDeadBasenameLinks}
+ Reference: &Reference{RealPath: "/1/file.txt"},
+ LinkResolutions: []Resolution{
+ {
+ RequestPath: "/place/wagoodman/file.txt",
+ Reference: &Reference{RealPath: "/place/wagoodman/file.txt"},
+ },
+ {
+ RequestPath: "/1/file.txt",
+ Reference: &Reference{RealPath: "/1/file.txt"},
+ },
+ {
+ RequestPath: "/2/real-file.txt",
+ // nope! it's dead!
+ //Reference: &file.Reference{RealPath: "/2/real-file.txt"},
+ },
+ },
+ },
+ want: []Path{
+ "/home/wagoodman/file.txt", // request
+ "/place/wagoodman/file.txt", // real intermediate path
+ "/1/file.txt", // real intermediate path
+ "/2/real-file.txt", // final resolved path on the reference (that does not exist)
+ },
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ assert.Equalf(t, tt.want, tt.subject.RequestResolutionPath(), "RequestResolutionPath()")
+ })
+ }
+}
+
+func TestResolution_References(t *testing.T) {
+ type fields struct {
+ ReferenceResolution Resolution
+ LeafLinkResolution []Resolution
+ }
+ tests := []struct {
+ name string
+ subject Resolution
+ want []Reference
+ }{
+ {
+ name: "empty",
+ subject: Resolution{
+ LinkResolutions: nil,
+ },
+ want: nil,
+ },
+ {
+ name: "single ref",
+ subject: Resolution{
+ RequestPath: "/home/wagoodman/file.txt",
+ Reference: &Reference{
+ id: 1,
+ RealPath: "/home/wagoodman/file.txt",
+ },
+ LinkResolutions: nil,
+ },
+ want: []Reference{
+ {
+ id: 1,
+ RealPath: "/home/wagoodman/file.txt",
+ },
+ },
+ },
+ {
+ // /home -> /another/place
+ name: "ref with 1 leaf link resolutions",
+ subject: Resolution{
+ RequestPath: "/home",
+ Reference: &Reference{RealPath: "/another/place"},
+ LinkResolutions: []Resolution{
+ {
+ RequestPath: "/home",
+ Reference: &Reference{RealPath: "/home"},
+ },
+ },
+ },
+ want: []Reference{
+ {RealPath: "/home"},
+ {RealPath: "/another/place"},
+ },
+ },
+ {
+ // /home/wagoodman/file.txt -> /place/wagoodman/file.txt -> /1/file.txt -> /2/real-file.txt
+
+ // this is the current state of the filetree
+ // .
+ // ├── 1
+ // │ ├── file.txt -> 2/real-file.txt
+ // │ └── link-to-place -> place
+ // ├── 2
+ // │ └── real-file.txt
+ // ├── home -> link-to-1/link-to-place
+ // ├── link-to-1 -> 1
+ // └── place
+ // └── wagoodman
+ // └── file.txt -> link-to-1/file.txt
+
+ name: "ref with 2 leaf link resolutions",
+ subject: Resolution{
+ RequestPath: "/home/wagoodman/file.txt",
+ Reference: &Reference{RealPath: "/2/real-file.txt"},
+ LinkResolutions: []Resolution{
+ {
+ RequestPath: "/place/wagoodman/file.txt",
+ Reference: &Reference{RealPath: "/place/wagoodman/file.txt"},
+ },
+ {
+ RequestPath: "/1/file.txt",
+ Reference: &Reference{RealPath: "/1/file.txt"},
+ },
+ },
+ },
+ want: []Reference{
+ {RealPath: "/place/wagoodman/file.txt"},
+ {RealPath: "/1/file.txt"},
+ {RealPath: "/2/real-file.txt"},
+ },
+ },
+ {
+ // /home/wagoodman/file.txt -> /place/wagoodman/file.txt -> /1/file.txt -> /2/real-file.txt
+
+ // this is the current state of the filetree
+ // .
+ // ├── 1
+ // │ ├── file.txt -> 2/real-file.txt
+ // │ └── link-to-place -> place
+ // ├── home -> link-to-1/link-to-place
+ // ├── link-to-1 -> 1
+ // └── place
+ // └── wagoodman
+ // └── file.txt -> link-to-1/file.txt
+
+ name: "ref with dead link",
+ subject: Resolution{
+ RequestPath: "/home/wagoodman/file.txt",
+ // note: this falls back to the last path that exists which is the behavior for link resolution options:
+ // []LinkResolutionOption{FollowBasenameLinks, DoNotFollowDeadBasenameLinks}
+ Reference: &Reference{RealPath: "/1/file.txt"},
+ LinkResolutions: []Resolution{
+ {
+ RequestPath: "/place/wagoodman/file.txt",
+ Reference: &Reference{RealPath: "/place/wagoodman/file.txt"},
+ },
+ {
+ RequestPath: "/1/file.txt",
+ Reference: &Reference{RealPath: "/1/file.txt"},
+ },
+ {
+ RequestPath: "/2/real-file.txt",
+ // nope! it's dead!
+ //Reference: &file.Reference{RealPath: "/2/real-file.txt"},
+ },
+ },
+ },
+ want: []Reference{
+ {RealPath: "/place/wagoodman/file.txt"},
+ {RealPath: "/1/file.txt"},
+ },
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ assert.Equalf(t, tt.want, tt.subject.References(), "References()")
+
+ })
+ }
+}
diff --git a/pkg/file/tarutil.go b/pkg/file/tarutil.go
index b08416b2..4a01c09a 100644
--- a/pkg/file/tarutil.go
+++ b/pkg/file/tarutil.go
@@ -108,7 +108,7 @@ func MetadataFromTar(reader io.ReadCloser, tarPath string) (Metadata, error) {
if entry.Header.Size > 0 {
content = reader
}
- m := NewMetadata(entry.Header, entry.Sequence, content)
+ m := NewMetadata(entry.Header, content)
metadata = &m
return ErrTarStopIteration
}
diff --git a/pkg/file/tarutil_test.go b/pkg/file/tarutil_test.go
index cd67f23e..2103f242 100644
--- a/pkg/file/tarutil_test.go
+++ b/pkg/file/tarutil_test.go
@@ -62,40 +62,36 @@ func TestMetadataFromTar(t *testing.T) {
name: "path/branch/two/file-2.txt",
fixture: "fixture-1",
expected: Metadata{
- Path: "/path/branch/two/file-2.txt",
- TarHeaderName: "path/branch/two/file-2.txt",
- TarSequence: 5,
- Linkname: "",
- Size: 12,
- UserID: 1337,
- GroupID: 5432,
- TypeFlag: 0x30,
- IsDir: false,
- Mode: 0x1ed,
- MIMEType: "application/octet-stream",
+ Path: "/path/branch/two/file-2.txt",
+ LinkDestination: "",
+ Size: 12,
+ UserID: 1337,
+ GroupID: 5432,
+ Type: TypeRegular,
+ IsDir: false,
+ Mode: 0x1ed,
+ MIMEType: "application/octet-stream",
},
},
{
name: "path/branch/two/",
fixture: "fixture-1",
expected: Metadata{
- Path: "/path/branch/two",
- TarHeaderName: "path/branch/two/",
- TarSequence: 4,
- Linkname: "",
- Size: 0,
- UserID: 1337,
- GroupID: 5432,
- TypeFlag: 0x35,
- IsDir: true,
- Mode: 0x800001ed,
- MIMEType: "",
+ Path: "/path/branch/two",
+ LinkDestination: "",
+ Size: 0,
+ UserID: 1337,
+ GroupID: 5432,
+ Type: TypeDirectory,
+ IsDir: true,
+ Mode: 0x800001ed,
+ MIMEType: "",
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
- f := getTarFixture(t, "fixture-1")
+ f := getTarFixture(t, test.fixture)
metadata, err := MetadataFromTar(f, test.name)
assert.NoError(t, err)
assert.Equal(t, test.expected, metadata)
diff --git a/pkg/file/test-fixtures/symlinks-simple/link_to_link_to_new_readme b/pkg/file/test-fixtures/symlinks-simple/link_to_link_to_new_readme
new file mode 120000
index 00000000..e348d807
--- /dev/null
+++ b/pkg/file/test-fixtures/symlinks-simple/link_to_link_to_new_readme
@@ -0,0 +1 @@
+link_to_new_readme
\ No newline at end of file
diff --git a/pkg/file/test-fixtures/symlinks-simple/link_to_new_readme b/pkg/file/test-fixtures/symlinks-simple/link_to_new_readme
new file mode 120000
index 00000000..ea786ff2
--- /dev/null
+++ b/pkg/file/test-fixtures/symlinks-simple/link_to_new_readme
@@ -0,0 +1 @@
+readme
\ No newline at end of file
diff --git a/pkg/file/test-fixtures/symlinks-simple/readme b/pkg/file/test-fixtures/symlinks-simple/readme
new file mode 100644
index 00000000..df85b76a
--- /dev/null
+++ b/pkg/file/test-fixtures/symlinks-simple/readme
@@ -0,0 +1,2 @@
+this directory exists for unit tests on irregular files. You can't see other files here because they are removed after each test.
+This readme is a better version of Russell's teapot.
diff --git a/pkg/file/type.go b/pkg/file/type.go
index c2f9db47..67562ae9 100644
--- a/pkg/file/type.go
+++ b/pkg/file/type.go
@@ -1,25 +1,109 @@
package file
-import "archive/tar"
+import (
+ "archive/tar"
+ "os"
+)
const (
- TypeReg Type = tar.TypeReg
- TypeDir Type = tar.TypeDir
- TypeSymlink Type = tar.TypeSymlink
- TypeHardLink Type = tar.TypeLink
- TypeCharacterDevice Type = tar.TypeChar
- TypeBlockDevice Type = tar.TypeBlock
- TypeFifo Type = tar.TypeFifo
+ TypeRegular Type = iota
+ TypeHardLink
+ TypeSymLink
+ TypeCharacterDevice
+ TypeBlockDevice
+ TypeDirectory
+ TypeFIFO
+ TypeSocket
+ TypeIrregular
)
-var AllTypes = []Type{
- TypeReg,
- TypeDir,
- TypeSymlink,
- TypeHardLink,
- TypeCharacterDevice,
- TypeBlockDevice,
- TypeFifo,
+// why use a rune type? we're looking for something that is memory compact but is easily human interpretable.
+
+type Type int
+
+func AllTypes() []Type {
+ return []Type{
+ TypeRegular,
+ TypeHardLink,
+ TypeSymLink,
+ TypeCharacterDevice,
+ TypeBlockDevice,
+ TypeDirectory,
+ TypeFIFO,
+ TypeSocket,
+ TypeIrregular,
+ }
+}
+
+func TypeFromTarType(ty byte) Type {
+ switch ty {
+ case tar.TypeReg, tar.TypeRegA: // nolint: staticcheck
+ return TypeRegular
+ case tar.TypeLink:
+ return TypeHardLink
+ case tar.TypeSymlink:
+ return TypeSymLink
+ case tar.TypeChar:
+ return TypeCharacterDevice
+ case tar.TypeBlock:
+ return TypeBlockDevice
+ case tar.TypeDir:
+ return TypeDirectory
+ case tar.TypeFifo:
+ return TypeFIFO
+ default:
+ return TypeIrregular
+ }
}
-type Type rune
+func TypeFromMode(mode os.FileMode) Type {
+ switch {
+ case isSet(mode, os.ModeSymlink):
+ return TypeSymLink
+ case isSet(mode, os.ModeIrregular):
+ return TypeIrregular
+ case isSet(mode, os.ModeCharDevice):
+ return TypeCharacterDevice
+ case isSet(mode, os.ModeDevice):
+ return TypeBlockDevice
+ case isSet(mode, os.ModeNamedPipe):
+ return TypeFIFO
+ case isSet(mode, os.ModeSocket):
+ return TypeSocket
+ case mode.IsDir():
+ return TypeDirectory
+ case mode.IsRegular():
+ return TypeRegular
+ default:
+ return TypeIrregular
+ }
+}
+
+func isSet(mode, field os.FileMode) bool {
+ return mode&field != 0
+}
+
+func (t Type) String() string {
+ switch t {
+ case TypeRegular:
+ return "RegularFile"
+ case TypeHardLink:
+ return "HardLink"
+ case TypeSymLink:
+ return "SymbolicLink"
+ case TypeCharacterDevice:
+ return "CharacterDevice"
+ case TypeBlockDevice:
+ return "BlockDevice"
+ case TypeDirectory:
+ return "Directory"
+ case TypeFIFO:
+ return "FIFONode"
+ case TypeSocket:
+ return "Socket"
+ case TypeIrregular:
+ return "IrregularFile"
+ default:
+ return "Unknown"
+ }
+}
diff --git a/pkg/filetree/builder.go b/pkg/filetree/builder.go
new file mode 100644
index 00000000..1f017f8e
--- /dev/null
+++ b/pkg/filetree/builder.go
@@ -0,0 +1,56 @@
+package filetree
+
+import (
+ "fmt"
+
+ "github.com/anchore/stereoscope/pkg/file"
+)
+
+// Builder is a helper for building a filetree and accompanying index in a coordinated fashion.
+type Builder struct {
+ tree Writer
+ index IndexWriter
+}
+
+func NewBuilder(tree Writer, index IndexWriter) *Builder {
+ return &Builder{
+ tree: tree,
+ index: index,
+ }
+}
+
+func (b *Builder) Add(metadata file.Metadata) (*file.Reference, error) {
+ var (
+ ref *file.Reference
+ err error
+ )
+ switch metadata.Type {
+ case file.TypeSymLink:
+ ref, err = b.tree.AddSymLink(file.Path(metadata.Path), file.Path(metadata.LinkDestination))
+ if err != nil {
+ return nil, err
+ }
+ case file.TypeHardLink:
+ ref, err = b.tree.AddHardLink(file.Path(metadata.Path), file.Path(metadata.LinkDestination))
+ if err != nil {
+ return nil, err
+ }
+ case file.TypeDirectory:
+ ref, err = b.tree.AddDir(file.Path(metadata.Path))
+ if err != nil {
+ return nil, err
+ }
+ default:
+ ref, err = b.tree.AddFile(file.Path(metadata.Path))
+ if err != nil {
+ return nil, err
+ }
+ }
+ if ref == nil {
+ return nil, fmt.Errorf("could not add path=%q link=%q during tar iteration", metadata.Path, metadata.LinkDestination)
+ }
+
+ b.index.Add(*ref, metadata)
+
+ return ref, nil
+}
diff --git a/pkg/filetree/depth_first_path_walker.go b/pkg/filetree/depth_first_path_walker.go
index f246d8a9..71d3d7f4 100644
--- a/pkg/filetree/depth_first_path_walker.go
+++ b/pkg/filetree/depth_first_path_walker.go
@@ -58,12 +58,15 @@ func NewDepthFirstPathWalker(tree *FileTree, visitor FileNodeVisitor, conditions
func (w *DepthFirstPathWalker) Walk(from file.Path) (file.Path, *filenode.FileNode, error) {
w.pathStack.Push(from)
- var currentPath file.Path
- var currentNode *filenode.FileNode
- var err error
+ var (
+ currentPath file.Path
+ currentNode *nodeAccess
+ err error
+ )
for w.pathStack.Size() > 0 {
currentPath = w.pathStack.Pop()
+ // TODO: should we make these link resolutions configurable so you can observe the links on walk as well? (take link resolution options as a parameter)
currentNode, err = w.tree.node(currentPath, linkResolutionStrategy{
FollowAncestorLinks: true,
FollowBasenameLinks: true,
@@ -72,32 +75,32 @@ func (w *DepthFirstPathWalker) Walk(from file.Path) (file.Path, *filenode.FileNo
if err != nil {
return "", nil, err
}
- if currentNode == nil {
+ if !currentNode.HasFileNode() {
return "", nil, fmt.Errorf("nil Node at path=%q", currentPath)
}
// prevent infinite loop
if strings.Count(string(currentPath.Normalize()), file.DirSeparator) >= maxDirDepth {
- return currentPath, currentNode, ErrMaxTraversalDepth
+ return currentPath, currentNode.FileNode, ErrMaxTraversalDepth
}
- if w.conditions.ShouldTerminate != nil && w.conditions.ShouldTerminate(currentPath, *currentNode) {
- return currentPath, currentNode, nil
+ if w.conditions.ShouldTerminate != nil && w.conditions.ShouldTerminate(currentPath, *currentNode.FileNode) {
+ return currentPath, currentNode.FileNode, nil
}
currentPath = currentPath.Normalize()
// visit
if w.visitor != nil && !w.visitedPaths.Contains(currentPath) {
- if w.conditions.ShouldVisit == nil || w.conditions.ShouldVisit != nil && w.conditions.ShouldVisit(currentPath, *currentNode) {
- err := w.visitor(currentPath, *currentNode)
+ if w.conditions.ShouldVisit == nil || w.conditions.ShouldVisit != nil && w.conditions.ShouldVisit(currentPath, *currentNode.FileNode) {
+ err := w.visitor(currentPath, *currentNode.FileNode)
if err != nil {
- return currentPath, currentNode, err
+ return currentPath, currentNode.FileNode, err
}
w.visitedPaths.Add(currentPath)
}
}
- if w.conditions.ShouldContinueBranch != nil && !w.conditions.ShouldContinueBranch(currentPath, *currentNode) {
+ if w.conditions.ShouldContinueBranch != nil && !w.conditions.ShouldContinueBranch(currentPath, *currentNode.FileNode) {
continue
}
@@ -112,7 +115,7 @@ func (w *DepthFirstPathWalker) Walk(from file.Path) (file.Path, *filenode.FileNo
}
}
- return currentPath, currentNode, nil
+ return currentPath, currentNode.FileNode, nil
}
func (w *DepthFirstPathWalker) WalkAll() error {
diff --git a/pkg/filetree/depth_first_path_walker_test.go b/pkg/filetree/depth_first_path_walker_test.go
index ee65a695..74678709 100644
--- a/pkg/filetree/depth_first_path_walker_test.go
+++ b/pkg/filetree/depth_first_path_walker_test.go
@@ -10,7 +10,7 @@ import (
)
func dfsTestTree(t *testing.T) (*FileTree, map[string]*file.Reference) {
- tr := NewFileTree()
+ tr := New()
possiblePaths := make(map[string]*file.Reference)
@@ -233,7 +233,7 @@ func TestDFS_WalkAll_ConditionalBranchPruning(t *testing.T) {
}
func TestDFS_WalkAll_MaxDirDepthTerminatesTraversal(t *testing.T) {
- tr := NewFileTree()
+ tr := New()
possiblePaths := make(map[string]*file.Reference)
diff --git a/pkg/filetree/filenode/filenode.go b/pkg/filetree/filenode/filenode.go
index aa9b0fdf..14f0fe7a 100644
--- a/pkg/filetree/filenode/filenode.go
+++ b/pkg/filetree/filenode/filenode.go
@@ -2,6 +2,7 @@ package filenode
import (
"path"
+ "path/filepath"
"github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/stereoscope/pkg/tree/node"
@@ -17,7 +18,7 @@ type FileNode struct {
func NewDir(p file.Path, ref *file.Reference) *FileNode {
return &FileNode{
RealPath: p,
- FileType: file.TypeDir,
+ FileType: file.TypeDirectory,
Reference: ref,
}
}
@@ -25,7 +26,7 @@ func NewDir(p file.Path, ref *file.Reference) *FileNode {
func NewFile(p file.Path, ref *file.Reference) *FileNode {
return &FileNode{
RealPath: p,
- FileType: file.TypeReg,
+ FileType: file.TypeRegular,
Reference: ref,
}
}
@@ -33,7 +34,7 @@ func NewFile(p file.Path, ref *file.Reference) *FileNode {
func NewSymLink(p, linkPath file.Path, ref *file.Reference) *FileNode {
return &FileNode{
RealPath: p,
- FileType: file.TypeSymlink,
+ FileType: file.TypeSymLink,
LinkPath: linkPath,
Reference: ref,
}
@@ -64,9 +65,27 @@ func (n *FileNode) Copy() node.Node {
}
func (n *FileNode) IsLink() bool {
- return n.FileType == file.TypeHardLink || n.FileType == file.TypeSymlink
+ return n.FileType == file.TypeHardLink || n.FileType == file.TypeSymLink
}
func IDByPath(p file.Path) node.ID {
return node.ID(p)
}
+
+func (n *FileNode) RenderLinkDestination() file.Path {
+ if !n.IsLink() {
+ return ""
+ }
+
+ if n.LinkPath.IsAbsolutePath() {
+ // use links with absolute paths blindly
+ return n.LinkPath
+ }
+
+ // resolve relative link paths
+ var parentDir string
+ parentDir, _ = filepath.Split(string(n.RealPath)) // TODO: alex: should this be path.Split, not filepath.Split?
+
+ // assemble relative link path by normalizing: "/cur/dir/../file1.txt" --> "/cur/file1.txt"
+ return file.Path(path.Clean(path.Join(parentDir, string(n.LinkPath))))
+}
diff --git a/pkg/filetree/filetree.go b/pkg/filetree/filetree.go
index 5ca0f413..9352e8d9 100644
--- a/pkg/filetree/filetree.go
+++ b/pkg/filetree/filetree.go
@@ -4,10 +4,13 @@ import (
"errors"
"fmt"
"path"
- "path/filepath"
+ "sort"
"strings"
- "github.com/anchore/stereoscope/internal"
+ "github.com/scylladb/go-set/strset"
+
+ "github.com/scylladb/go-set/iset"
+
"github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/stereoscope/pkg/filetree/filenode"
"github.com/anchore/stereoscope/pkg/tree"
@@ -24,7 +27,13 @@ type FileTree struct {
}
// NewFileTree creates a new FileTree instance.
+// Deprecated: use New() instead.
func NewFileTree() *FileTree {
+ return New()
+}
+
+// New creates a new FileTree instance.
+func New() *FileTree {
t := tree.NewTree()
// Initialize FileTree with a root "/" Node
@@ -36,8 +45,8 @@ func NewFileTree() *FileTree {
}
// Copy returns a Copy of the current FileTree.
-func (t *FileTree) Copy() (*FileTree, error) {
- ct := NewFileTree()
+func (t *FileTree) Copy() (ReadWriter, error) {
+ ct := New()
ct.tree = t.tree.Copy()
return ct, nil
}
@@ -45,18 +54,18 @@ func (t *FileTree) Copy() (*FileTree, error) {
// AllFiles returns all files within the FileTree (defaults to regular files only, but you can provide one or more allow types).
func (t *FileTree) AllFiles(types ...file.Type) []file.Reference {
if len(types) == 0 {
- types = []file.Type{file.TypeReg}
+ types = []file.Type{file.TypeRegular}
}
- typeSet := internal.NewStringSet()
+ typeSet := iset.New()
for _, t := range types {
- typeSet.Add(string(t))
+ typeSet.Add(int(t))
}
var files []file.Reference
for _, n := range t.tree.Nodes() {
f := n.(*filenode.FileNode)
- if typeSet.Contains(string(f.FileType)) && f.Reference != nil {
+ if typeSet.Has(int(f.FileType)) && f.Reference != nil {
files = append(files, *f.Reference)
}
}
@@ -75,7 +84,7 @@ func (t *FileTree) AllRealPaths() []file.Path {
}
func (t *FileTree) ListPaths(dir file.Path) ([]file.Path, error) {
- n, err := t.node(dir, linkResolutionStrategy{
+ fna, err := t.node(dir, linkResolutionStrategy{
FollowAncestorLinks: true,
FollowBasenameLinks: true,
})
@@ -83,16 +92,16 @@ func (t *FileTree) ListPaths(dir file.Path) ([]file.Path, error) {
return nil, err
}
- if n == nil {
+ if !fna.HasFileNode() {
return nil, nil
}
- if n.FileType != file.TypeDir {
+ if fna.FileNode.FileType != file.TypeDirectory {
return nil, nil
}
var listing []file.Path
- children := t.tree.Children(n)
+ children := t.tree.Children(fna.FileNode)
for _, child := range children {
if child == nil {
continue
@@ -106,13 +115,25 @@ func (t *FileTree) ListPaths(dir file.Path) ([]file.Path, error) {
return nil, err
}
- listing = append(listing, file.Path(path.Join(string(dir), fn.RealPath.Basename())))
+ listing = append(listing, file.Path(path.Join(string(dir), fn.FileNode.RealPath.Basename())))
}
return listing, nil
}
// File fetches a file.Reference for the given path. Returns nil if the path does not exist in the FileTree.
-func (t *FileTree) File(path file.Path, options ...LinkResolutionOption) (bool, *file.Reference, error) {
+func (t *FileTree) File(path file.Path, options ...LinkResolutionOption) (bool, *file.Resolution, error) {
+ currentNode, err := t.file(path, options...)
+ if err != nil {
+ return false, nil, err
+ }
+ if currentNode.HasFileNode() {
+ return true, currentNode.FileResolution(), err
+ }
+ return false, nil, err
+}
+
+// file fetches a file.Reference for the given path. Returns nil if the path does not exist in the FileTree.
+func (t *FileTree) file(path file.Path, options ...LinkResolutionOption) (*nodeAccess, error) {
userStrategy := newLinkResolutionStrategy(options...)
// For: /some/path/here
// Where: /some/path -> /other/place
@@ -131,13 +152,12 @@ func (t *FileTree) File(path file.Path, options ...LinkResolutionOption) (bool,
//
// Therefore we can safely lookup the path first without worrying about symlink resolution yet... if there is a
// hit, return it! If not, fallback to symlink resolution.
-
currentNode, err := t.node(path, linkResolutionStrategy{})
if err != nil {
- return false, nil, err
+ return nil, err
}
- if currentNode != nil && (!currentNode.IsLink() || currentNode.IsLink() && !userStrategy.FollowBasenameLinks) {
- return true, currentNode.Reference, nil
+ if currentNode.HasFileNode() && (!currentNode.FileNode.IsLink() || currentNode.FileNode.IsLink() && !userStrategy.FollowBasenameLinks) {
+ return currentNode, nil
}
// symlink resolution!... within the context of container images (which is outside of the responsibility of this object)
@@ -148,59 +168,97 @@ func (t *FileTree) File(path file.Path, options ...LinkResolutionOption) (bool,
FollowBasenameLinks: userStrategy.FollowBasenameLinks,
DoNotFollowDeadBasenameLinks: userStrategy.DoNotFollowDeadBasenameLinks,
})
- if currentNode != nil {
- return true, currentNode.Reference, err
+ if currentNode.HasFileNode() {
+ return currentNode, err
}
- return false, nil, err
+ return nil, err
+}
+
+func newResolutions(nodePath []nodeAccess) []file.Resolution {
+ var refPath []file.Resolution
+ for i, n := range nodePath {
+ if i == len(nodePath)-1 && n.FileNode != nil {
+ // this is already on the parent Access object (unless it is a dead link)
+ break
+ }
+ access := file.Resolution{
+ RequestPath: n.RequestPath,
+ }
+ if n.FileNode != nil {
+ access.Reference = n.FileNode.Reference
+ }
+
+ refPath = append(refPath, access)
+ }
+ return refPath
}
-func (t *FileTree) node(p file.Path, strategy linkResolutionStrategy) (*filenode.FileNode, error) {
+func (t *FileTree) node(p file.Path, strategy linkResolutionStrategy) (*nodeAccess, error) {
normalizedPath := p.Normalize()
nodeID := filenode.IDByPath(normalizedPath)
if !strategy.FollowLinks() {
n := t.tree.Node(nodeID)
if n == nil {
- return nil, nil
+ return &nodeAccess{
+ RequestPath: normalizedPath,
+ FileNode: nil,
+ }, nil
}
- return n.(*filenode.FileNode), nil
+ return &nodeAccess{
+ RequestPath: normalizedPath,
+ FileNode: n.(*filenode.FileNode),
+ }, nil
}
- var currentNode *filenode.FileNode
+ var currentNode *nodeAccess
var err error
if strategy.FollowAncestorLinks {
currentNode, err = t.resolveAncestorLinks(normalizedPath, nil)
if err != nil {
+ if currentNode != nil {
+ currentNode.RequestPath = normalizedPath
+ }
return currentNode, err
}
} else {
n := t.tree.Node(nodeID)
if n != nil {
- currentNode = n.(*filenode.FileNode)
+ currentNode = &nodeAccess{
+ RequestPath: normalizedPath,
+ FileNode: n.(*filenode.FileNode),
+ }
}
}
// link resolution has come up with nothing, return what we have so far
- if currentNode == nil {
+ if !currentNode.HasFileNode() {
+ if currentNode != nil {
+ currentNode.RequestPath = normalizedPath
+ }
return currentNode, nil
}
if strategy.FollowBasenameLinks {
currentNode, err = t.resolveNodeLinks(currentNode, !strategy.DoNotFollowDeadBasenameLinks, nil)
}
+ if currentNode != nil {
+ currentNode.RequestPath = normalizedPath
+ }
+
return currentNode, err
}
// return FileNode of the basename in the given path (no resolution is done at or past the basename). Note: it is
// assumed that the given path has already been normalized.
-func (t *FileTree) resolveAncestorLinks(path file.Path, attemptedPaths internal.Set) (*filenode.FileNode, error) {
+func (t *FileTree) resolveAncestorLinks(path file.Path, attemptedPaths file.PathSet) (*nodeAccess, error) {
// performance optimization... see if there is a node at the path (as if it is a real path). If so,
// use it, otherwise, continue with ancestor resolution
- currentNode, err := t.node(path, linkResolutionStrategy{})
+ currentNodeAccess, err := t.node(path, linkResolutionStrategy{})
if err != nil {
return nil, err
}
- if currentNode != nil {
- return currentNode, nil
+ if currentNodeAccess.HasFileNode() {
+ return currentNodeAccess, nil
}
var pathParts = strings.Split(string(path), file.DirSeparator)
@@ -219,101 +277,100 @@ func (t *FileTree) resolveAncestorLinks(path file.Path, attemptedPaths internal.
currentPathStr = string(currentPath)
// fetch the Node with NO link resolution strategy
- currentNode, err = t.node(currentPath, linkResolutionStrategy{})
+ currentNodeAccess, err = t.node(currentPath, linkResolutionStrategy{})
if err != nil {
// should never occur
return nil, err
}
- if currentNode == nil {
+ if !currentNodeAccess.HasFileNode() {
// we've reached a point where the given path that has never been observed. This can happen for one reason:
// 1. the current path is really invalid and we should return NIL indicating that it cannot be resolved.
// 2. the current path is a link? no, this isn't possible since we are iterating through constituent paths
// in order, so we are guaranteed to hit parent links in which we should adjust the search path accordingly.
- return nil, nil
+ return currentNodeAccess, nil
}
// keep track of what we've resolved to so far...
- currentPath = currentNode.RealPath
+ currentPath = currentNodeAccess.FileNode.RealPath
// this is positively a path, however, there is no information about this Node. This may be OK since we
// allow for adding children before parents (and even don't require the parent to ever be added --which is
// potentially valid given the underlying messy data [tar headers]). In this case we keep building the path
// (which we've already done at this point) and continue.
- if currentNode.Reference == nil {
+ if currentNodeAccess.FileNode.Reference == nil {
continue
}
// by this point we definitely have a file reference, if this is a link (and not the basename) resolve any
// links until the next Node is resolved (or not).
isLastPart := idx == len(pathParts)-1
- if !isLastPart && currentNode.IsLink() {
- currentNode, err = t.resolveNodeLinks(currentNode, true, attemptedPaths)
+ if !isLastPart && currentNodeAccess.FileNode.IsLink() {
+ currentNodeAccess, err = t.resolveNodeLinks(currentNodeAccess, true, attemptedPaths)
if err != nil {
// only expected to happen on cycles
- return currentNode, err
+ return currentNodeAccess, err
}
- if currentNode != nil {
- currentPath = currentNode.RealPath
+ if currentNodeAccess.HasFileNode() {
+ currentPath = currentNodeAccess.FileNode.RealPath
}
currentPathStr = string(currentPath)
}
}
// by this point we have processed all constituent paths; there were no un-added paths and the path is guaranteed
// to have followed link resolution.
- return currentNode, nil
+ return currentNodeAccess, nil
}
-// followNode takes the given FileNode and resolves all links at the base of the real path for the node (this implies
+// resolveNodeLinks takes the given FileNode and resolves all links at the base of the real path for the node (this implies
// that NO ancestors are considered).
-func (t *FileTree) resolveNodeLinks(n *filenode.FileNode, followDeadBasenameLinks bool, attemptedPaths internal.Set) (*filenode.FileNode, error) {
+// nolint: funlen
+func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, attemptedPaths file.PathSet) (*nodeAccess, error) {
if n == nil {
return nil, fmt.Errorf("cannot resolve links with nil Node given")
}
// we need to short-circuit link resolution that never resolves (cycles) due to a cycle referencing nodes that do not exist
if attemptedPaths == nil {
- attemptedPaths = internal.NewStringSet()
+ attemptedPaths = file.NewPathSet()
}
// note: this assumes that callers are passing paths in which the constituent parts are NOT symlinks
- var lastNode *filenode.FileNode
+ var lastNode *nodeAccess
+ var nodePath []nodeAccess
+ var nextPath file.Path
- currentNode := n
+ currentNodeAccess := n
// keep resolving links until a regular file or directory is found
- alreadySeen := internal.NewStringSet()
+ alreadySeen := strset.New()
var err error
for {
+ nodePath = append(nodePath, *currentNodeAccess)
+
// if there is no next path, return this reference (dead link)
- if currentNode == nil {
+ if !currentNodeAccess.HasFileNode() {
+ // the last path we tried to resolve is a dead link, persist the original path as the failed request
+ if len(nodePath) > 0 {
+ nodePath[len(nodePath)-1].RequestPath = nextPath
+ }
break
}
- if alreadySeen.Contains(string(currentNode.RealPath)) {
+ if alreadySeen.Has(string(currentNodeAccess.FileNode.RealPath)) {
return nil, ErrLinkCycleDetected
}
- if !currentNode.IsLink() {
+ if !currentNodeAccess.FileNode.IsLink() {
// no resolution and there is no next link (pseudo dead link)... return what you found
// any content fetches will fail, but that's ok
break
}
// prepare for the next iteration
- alreadySeen.Add(string(currentNode.RealPath))
+ alreadySeen.Add(string(currentNodeAccess.FileNode.RealPath))
- var nextPath file.Path
- if currentNode.LinkPath.IsAbsolutePath() {
- // use links with absolute paths blindly
- nextPath = currentNode.LinkPath
- } else {
- // resolve relative link paths
- var parentDir string
- parentDir, _ = filepath.Split(string(currentNode.RealPath))
- // assemble relative link path by normalizing: "/cur/dir/../file1.txt" --> "/cur/file1.txt"
- nextPath = file.Path(path.Clean(path.Join(parentDir, string(currentNode.LinkPath))))
- }
+ nextPath = currentNodeAccess.FileNode.RenderLinkDestination()
// no more links to follow
if string(nextPath) == "" {
@@ -321,32 +378,42 @@ func (t *FileTree) resolveNodeLinks(n *filenode.FileNode, followDeadBasenameLink
}
// preserve the current Node for the next loop (in case we shouldn't follow a potentially dead link)
- lastNode = currentNode
+ lastNode = currentNodeAccess
// break any cycles with non-existent paths (before attempting to look the path up again)
- if attemptedPaths.Contains(string(nextPath)) {
+ if attemptedPaths.Contains(nextPath) {
return nil, ErrLinkCycleDetected
}
// get the next Node (based on the next path)
- attemptedPaths.Add(string(nextPath))
- currentNode, err = t.resolveAncestorLinks(nextPath, attemptedPaths)
+ attemptedPaths.Add(nextPath)
+ currentNodeAccess, err = t.resolveAncestorLinks(nextPath, attemptedPaths)
if err != nil {
+ if currentNodeAccess != nil {
+ currentNodeAccess.LeafLinkResolution = append(currentNodeAccess.LeafLinkResolution, nodePath...)
+ }
+
// only expected to occur upon cycle detection
- return currentNode, err
+ return currentNodeAccess, err
}
}
- if currentNode == nil && !followDeadBasenameLinks {
+ if !currentNodeAccess.HasFileNode() && !followDeadBasenameLinks {
+ if lastNode != nil {
+ lastNode.LeafLinkResolution = append(lastNode.LeafLinkResolution, nodePath...)
+ }
return lastNode, nil
}
- return currentNode, nil
+ if currentNodeAccess != nil {
+ currentNodeAccess.LeafLinkResolution = append(currentNodeAccess.LeafLinkResolution, nodePath...)
+ }
+ return currentNodeAccess, nil
}
// FilesByGlob fetches zero to many file.References for the given glob pattern (considers symlinks).
-func (t *FileTree) FilesByGlob(query string, options ...LinkResolutionOption) ([]GlobResult, error) {
- results := make([]GlobResult, 0)
+func (t *FileTree) FilesByGlob(query string, options ...LinkResolutionOption) ([]file.Resolution, error) {
+ var results []file.Resolution
if len(query) == 0 {
return nil, fmt.Errorf("no glob pattern given")
@@ -379,7 +446,7 @@ func (t *FileTree) FilesByGlob(query string, options ...LinkResolutionOption) ([
if !path.IsAbs(match) {
matchPath = file.Path(path.Join("/", match))
}
- fn, err := t.node(matchPath, linkResolutionStrategy{
+ fna, err := t.node(matchPath, linkResolutionStrategy{
FollowAncestorLinks: true,
FollowBasenameLinks: true,
DoNotFollowDeadBasenameLinks: doNotFollowDeadBasenameLinks,
@@ -388,20 +455,20 @@ func (t *FileTree) FilesByGlob(query string, options ...LinkResolutionOption) ([
return nil, err
}
// the Node must exist and should not be a directory
- if fn != nil && fn.FileType != file.TypeDir {
- result := GlobResult{
- MatchPath: matchPath,
- RealPath: fn.RealPath,
- // we should not be given a link Node UNLESS it is dead
- IsDeadLink: fn.IsLink(),
+ if fna.HasFileNode() && fna.FileNode.FileType != file.TypeDirectory {
+ result := file.NewResolution(
+ matchPath,
+ fna.FileNode.Reference,
+ newResolutions(fna.LeafLinkResolution),
+ )
+ if result != nil {
+ results = append(results, *result)
}
- if fn.Reference != nil {
- result.Reference = *fn.Reference
- }
- results = append(results, result)
}
}
+ sort.Sort(file.Resolutions(results))
+
return results, nil
}
@@ -410,20 +477,20 @@ func (t *FileTree) FilesByGlob(query string, options ...LinkResolutionOption) ([
// hardlink resolution is performed on the given path --which implies that the given path MUST be a real path (have no
// links in constituent paths)
func (t *FileTree) AddFile(realPath file.Path) (*file.Reference, error) {
- fn, err := t.node(realPath, linkResolutionStrategy{})
+ fna, err := t.node(realPath, linkResolutionStrategy{})
if err != nil {
return nil, err
}
- if fn != nil {
+ if fna.HasFileNode() {
// this path already exists
- if fn.FileType != file.TypeReg {
+ if fna.FileNode.FileType != file.TypeRegular {
return nil, fmt.Errorf("path=%q already exists but is NOT a regular file", realPath)
}
// this is a regular file, provide a new or existing file.Reference
- if fn.Reference == nil {
- fn.Reference = file.NewFileReference(realPath)
+ if fna.FileNode.Reference == nil {
+ fna.FileNode.Reference = file.NewFileReference(realPath)
}
- return fn.Reference, nil
+ return fna.FileNode.Reference, nil
}
// this is a new path... add the new Node + parents
@@ -438,20 +505,20 @@ func (t *FileTree) AddFile(realPath file.Path) (*file.Reference, error) {
// link path captured and returned. Note: NO symlink or hardlink resolution is performed on the given path --which
// implies that the given path MUST be a real path (have no links in constituent paths)
func (t *FileTree) AddSymLink(realPath file.Path, linkPath file.Path) (*file.Reference, error) {
- fn, err := t.node(realPath, linkResolutionStrategy{})
+ fna, err := t.node(realPath, linkResolutionStrategy{})
if err != nil {
return nil, err
}
- if fn != nil {
+ if fna.HasFileNode() {
// this path already exists
- if fn.FileType != file.TypeSymlink {
+ if fna.FileNode.FileType != file.TypeSymLink {
return nil, fmt.Errorf("path=%q already exists but is NOT a symlink file", realPath)
}
// this is a symlink file, provide a new or existing file.Reference
- if fn.Reference == nil {
- fn.Reference = file.NewFileReference(realPath)
+ if fna.FileNode.Reference == nil {
+ fna.FileNode.Reference = file.NewFileReference(realPath)
}
- return fn.Reference, nil
+ return fna.FileNode.Reference, nil
}
// this is a new path... add the new Node + parents
@@ -466,20 +533,20 @@ func (t *FileTree) AddSymLink(realPath file.Path, linkPath file.Path) (*file.Ref
// path captured and returned. Note: NO symlink or hardlink resolution is performed on the given path --which
// implies that the given path MUST be a real path (have no links in constituent paths)
func (t *FileTree) AddHardLink(realPath file.Path, linkPath file.Path) (*file.Reference, error) {
- fn, err := t.node(realPath, linkResolutionStrategy{})
+ fna, err := t.node(realPath, linkResolutionStrategy{})
if err != nil {
return nil, err
}
- if fn != nil {
+ if fna.HasFileNode() {
// this path already exists
- if fn.FileType != file.TypeHardLink {
+ if fna.FileNode.FileType != file.TypeHardLink {
return nil, fmt.Errorf("path=%q already exists but is NOT a symlink file", realPath)
}
// this is a symlink file, provide a new or existing file.Reference
- if fn.Reference == nil {
- fn.Reference = file.NewFileReference(realPath)
+ if fna.FileNode.Reference == nil {
+ fna.FileNode.Reference = file.NewFileReference(realPath)
}
- return fn.Reference, nil
+ return fna.FileNode.Reference, nil
}
// this is a new path... add the new Node + parents
@@ -497,20 +564,20 @@ func (t *FileTree) AddHardLink(realPath file.Path, linkPath file.Path) (*file.Re
// Note: NO symlink or hardlink resolution is performed on the given path --which implies that the given path MUST
// be a real path (have no links in constituent paths)
func (t *FileTree) AddDir(realPath file.Path) (*file.Reference, error) {
- fn, err := t.node(realPath, linkResolutionStrategy{})
+ fna, err := t.node(realPath, linkResolutionStrategy{})
if err != nil {
return nil, err
}
- if fn != nil {
+ if fna.HasFileNode() {
// this path already exists
- if fn.FileType != file.TypeDir {
+ if fna.FileNode.FileType != file.TypeDirectory {
return nil, fmt.Errorf("path=%q already exists but is NOT a symlink file", realPath)
}
- // this is a symlink file, provide a new or existing file.Reference
- if fn.Reference == nil {
- fn.Reference = file.NewFileReference(realPath)
+ // this is a directory, provide a new or existing file.Reference
+ if fna.FileNode.Reference == nil {
+ fna.FileNode.Reference = file.NewFileReference(realPath)
}
- return fn.Reference, nil
+ return fna.FileNode.Reference, nil
}
// this is a new path... add the new Node + parents
@@ -532,22 +599,22 @@ func (t *FileTree) addParentPaths(realPath file.Path) error {
return fmt.Errorf("unable to determine parent path while adding path=%q: %w", realPath, err)
}
- fn, err := t.node(parentPath, linkResolutionStrategy{})
+ fna, err := t.node(parentPath, linkResolutionStrategy{})
if err != nil {
return err
}
- if fn == nil {
+ if !fna.HasFileNode() {
// add parents of the Node until an existent parent is found it's important to do this in reverse order
// to ensure we are checking the fewest amount of parents possible.
var pathsToAdd []file.Path
parentPaths := realPath.ConstituentPaths()
for idx := len(parentPaths) - 1; idx >= 0; idx-- {
- fn, err := t.node(parentPaths[idx], linkResolutionStrategy{})
+ resolvedFna, err := t.node(parentPaths[idx], linkResolutionStrategy{})
if err != nil {
return err
}
- if fn != nil {
+ if resolvedFna.HasFileNode() {
break
}
pathsToAdd = append(pathsToAdd, parentPaths[idx])
@@ -584,11 +651,11 @@ func (t *FileTree) setFileNode(fn *filenode.FileNode) error {
if err != nil {
return err
}
- if parentNode == nil {
+ if !parentNode.HasFileNode() {
return fmt.Errorf("unable to find parent path=%q while adding path=%q", parentPath, fn.RealPath)
}
- return t.tree.AddChild(parentNode, fn)
+ return t.tree.AddChild(parentNode.FileNode, fn)
}
// RemovePath deletes the file.Reference from the FileTree by the given path. If the basename of the given path
@@ -599,18 +666,18 @@ func (t *FileTree) RemovePath(path file.Path) error {
return ErrRemovingRoot
}
- fn, err := t.node(path, linkResolutionStrategy{
+ fna, err := t.node(path, linkResolutionStrategy{
FollowAncestorLinks: true,
FollowBasenameLinks: false,
})
if err != nil {
return err
}
- if fn == nil {
+ if !fna.HasFileNode() {
return nil
}
- _, err = t.tree.RemoveNode(fn)
+ _, err = t.tree.RemoveNode(fna.FileNode)
if err != nil {
return err
}
@@ -621,18 +688,18 @@ func (t *FileTree) RemovePath(path file.Path) error {
// basename is a symlink, then the symlink is followed before resolving children. If the path does not exist, this is a
// nop.
func (t *FileTree) RemoveChildPaths(path file.Path) error {
- fn, err := t.node(path, linkResolutionStrategy{
+ fna, err := t.node(path, linkResolutionStrategy{
FollowAncestorLinks: true,
FollowBasenameLinks: true,
})
if err != nil {
return err
}
- if fn == nil {
+ if !fna.HasFileNode() {
// can't remove child paths for Node that doesn't exist!
return nil
}
- for _, child := range t.tree.Children(fn) {
+ for _, child := range t.tree.Children(fna.FileNode) {
_, err := t.tree.RemoveNode(child)
if err != nil {
return err
@@ -641,31 +708,31 @@ func (t *FileTree) RemoveChildPaths(path file.Path) error {
return nil
}
-// Reader returns a tree.Reader useful for Tree traversal.
-func (t *FileTree) Reader() tree.Reader {
+// TreeReader returns a tree.Reader useful for Tree traversal.
+func (t *FileTree) TreeReader() tree.Reader {
return t.tree
}
// PathDiff shows the path differences between two trees (useful for testing)
func (t *FileTree) PathDiff(other *FileTree) (extra, missing []file.Path) {
- ourPaths := internal.NewStringSet()
+ ourPaths := strset.New()
for _, fn := range t.tree.Nodes() {
ourPaths.Add(string(fn.ID()))
}
- theirPaths := internal.NewStringSet()
+ theirPaths := strset.New()
for _, fn := range other.tree.Nodes() {
theirPaths.Add(string(fn.ID()))
}
for _, fn := range other.tree.Nodes() {
- if !ourPaths.Contains(string(fn.ID())) {
+ if !ourPaths.Has(string(fn.ID())) {
extra = append(extra, file.Path(fn.ID()))
}
}
for _, fn := range t.tree.Nodes() {
- if !theirPaths.Contains(string(fn.ID())) {
+ if !theirPaths.Has(string(fn.ID())) {
missing = append(missing, file.Path(fn.ID()))
}
}
@@ -698,12 +765,12 @@ func (t *FileTree) Walk(fn func(path file.Path, f filenode.FileNode) error, cond
return NewDepthFirstPathWalker(t, fn, conditions).WalkAll()
}
-// merge takes the given Tree and combines it with the current Tree, preferring files in the other Tree if there
+// Merge takes the given Tree and combines it with the current Tree, preferring files in the other Tree if there
// are path conflicts. This is the basis function for squashing (where the current Tree is the bottom Tree and the
// given Tree is the top Tree).
//
//nolint:gocognit,funlen
-func (t *FileTree) merge(upper *FileTree) error {
+func (t *FileTree) Merge(upper Reader) error {
conditions := tree.WalkConditions{
ShouldContinueBranch: func(n node.Node) bool {
p := file.Path(n.ID())
@@ -721,22 +788,22 @@ func (t *FileTree) merge(upper *FileTree) error {
}
upperNode := n.(*filenode.FileNode)
// opaque directories must be processed first
- if upper.hasOpaqueDirectory(upperNode.RealPath) {
+ if hasOpaqueDirectory(upper, upperNode.RealPath) {
err := t.RemoveChildPaths(upperNode.RealPath)
if err != nil {
- return fmt.Errorf("filetree merge failed to remove child paths (upperPath=%s): %w", upperNode.RealPath, err)
+ return fmt.Errorf("filetree Merge failed to remove child paths (upperPath=%s): %w", upperNode.RealPath, err)
}
}
if upperNode.RealPath.IsWhiteout() {
lowerPath, err := upperNode.RealPath.UnWhiteoutPath()
if err != nil {
- return fmt.Errorf("filetree merge failed to find original upperPath for whiteout (upperPath=%s): %w", upperNode.RealPath, err)
+ return fmt.Errorf("filetree Merge failed to find original upperPath for whiteout (upperPath=%s): %w", upperNode.RealPath, err)
}
err = t.RemovePath(lowerPath)
if err != nil {
- return fmt.Errorf("filetree merge failed to remove upperPath (upperPath=%s): %w", lowerPath, err)
+ return fmt.Errorf("filetree Merge failed to remove upperPath (upperPath=%s): %w", lowerPath, err)
}
return nil
@@ -747,9 +814,9 @@ func (t *FileTree) merge(upper *FileTree) error {
FollowBasenameLinks: false,
})
if err != nil {
- return fmt.Errorf("filetree merge failed when looking for path=%q : %w", upperNode.RealPath, err)
+ return fmt.Errorf("filetree Merge failed when looking for path=%q : %w", upperNode.RealPath, err)
}
- if lowerNode == nil {
+ if !lowerNode.HasFileNode() {
// there is no existing Node... add parents and prepare to set
if err := t.addParentPaths(upperNode.RealPath); err != nil {
return fmt.Errorf("could not add parent paths to lower: %w", err)
@@ -759,21 +826,21 @@ func (t *FileTree) merge(upper *FileTree) error {
nodeCopy := *upperNode
// keep original file references if the upper tree does not have them (only for the same file types)
- if lowerNode != nil && lowerNode.Reference != nil && upperNode.Reference == nil && upperNode.FileType == lowerNode.FileType {
- nodeCopy.Reference = lowerNode.Reference
+ if lowerNode.HasFileNode() && lowerNode.FileNode.Reference != nil && upperNode.Reference == nil && upperNode.FileType == lowerNode.FileNode.FileType {
+ nodeCopy.Reference = lowerNode.FileNode.Reference
}
- if lowerNode != nil && upperNode.FileType != file.TypeDir && lowerNode.FileType == file.TypeDir {
+ if lowerNode.HasFileNode() && upperNode.FileType != file.TypeDirectory && lowerNode.FileNode.FileType == file.TypeDirectory {
// NOTE: both upperNode and lowerNode paths are the same, and does not have an effect
// on removal of child paths
err := t.RemoveChildPaths(upperNode.RealPath)
if err != nil {
- return fmt.Errorf("filetree merge failed to remove children for non-directory upper node (%s): %w", upperNode.RealPath, err)
+ return fmt.Errorf("filetree Merge failed to remove children for non-directory upper node (%s): %w", upperNode.RealPath, err)
}
}
// graft a copy of the upper Node with potential lower information into the lower tree
if err := t.setFileNode(&nodeCopy); err != nil {
- return fmt.Errorf("filetree merge failed to set file Node (Node=%+v): %w", nodeCopy, err)
+ return fmt.Errorf("filetree Merge failed to set file Node (Node=%+v): %w", nodeCopy, err)
}
return nil
@@ -782,10 +849,10 @@ func (t *FileTree) merge(upper *FileTree) error {
// we are using the tree walker instead of the path walker to only look at an resolve merging of real files
// with no consideration to virtual paths (paths that are valid in the filetree because constituent paths
// contain symlinks).
- return tree.NewDepthFirstWalkerWithConditions(upper.Reader(), visitor, conditions).WalkAll()
+ return tree.NewDepthFirstWalkerWithConditions(upper.TreeReader(), visitor, conditions).WalkAll()
}
-func (t *FileTree) hasOpaqueDirectory(directoryPath file.Path) bool {
+func hasOpaqueDirectory(t Reader, directoryPath file.Path) bool {
opaqueWhiteoutChild := file.Path(path.Join(string(directoryPath), file.OpaqueWhiteout))
return t.HasPath(opaqueWhiteoutChild)
}
diff --git a/pkg/filetree/filetree_test.go b/pkg/filetree/filetree_test.go
index 0d3e0c27..e2d592cc 100644
--- a/pkg/filetree/filetree_test.go
+++ b/pkg/filetree/filetree_test.go
@@ -2,18 +2,20 @@ package filetree
import (
"errors"
- "fmt"
- "github.com/stretchr/testify/require"
+ "github.com/scylladb/go-set/strset"
"testing"
- "github.com/anchore/stereoscope/internal"
+ "github.com/google/go-cmp/cmp"
+ "github.com/google/go-cmp/cmp/cmpopts"
+ "github.com/stretchr/testify/require"
+
"github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/stereoscope/pkg/filetree/filenode"
"github.com/stretchr/testify/assert"
)
func TestFileTree_AddPath(t *testing.T) {
- tr := NewFileTree()
+ tr := New()
path := file.Path("/home")
fileNode, err := tr.AddFile(path)
if err != nil {
@@ -21,13 +23,13 @@ func TestFileTree_AddPath(t *testing.T) {
}
_, f, _ := tr.File(path)
- if f != fileNode {
+ if f.Reference != fileNode {
t.Fatal("expected pointer to the newly created fileNode")
}
}
func TestFileTree_AddPathAndMissingAncestors(t *testing.T) {
- tr := NewFileTree()
+ tr := New()
path := file.Path("/home/wagoodman/awesome/file.txt")
fileNode, err := tr.AddFile(path)
if err != nil {
@@ -35,7 +37,7 @@ func TestFileTree_AddPathAndMissingAncestors(t *testing.T) {
}
_, f, _ := tr.File(path)
- if f != fileNode {
+ if f.Reference != fileNode {
t.Fatal("expected pointer to the newly created fileNode")
}
@@ -46,7 +48,7 @@ func TestFileTree_AddPathAndMissingAncestors(t *testing.T) {
if err != nil {
t.Fatalf("could not get parent Node: %+v", err)
}
- children := tr.tree.Children(n)
+ children := tr.tree.Children(n.FileNode)
if len(children) != 1 {
t.Fatal("unexpected child count", len(children))
@@ -58,7 +60,7 @@ func TestFileTree_AddPathAndMissingAncestors(t *testing.T) {
}
func TestFileTree_RemovePath(t *testing.T) {
- tr := NewFileTree()
+ tr := New()
path := file.Path("/home/wagoodman/awesome/file.txt")
_, err := tr.AddFile(path)
if err != nil {
@@ -85,8 +87,40 @@ func TestFileTree_RemovePath(t *testing.T) {
}
}
+func TestFileTree_FilesByGlob_AncestorSymlink(t *testing.T) {
+ var err error
+ tr := New()
+
+ _, err = tr.AddSymLink("/parent-link", "/parent")
+ require.NoError(t, err)
+
+ _, err = tr.AddDir("/parent")
+ require.NoError(t, err)
+
+ expectedRef, err := tr.AddFile("/parent/file.txt")
+ require.NoError(t, err)
+
+ expected := []file.Resolution{
+ {
+ RequestPath: "/parent-link/file.txt",
+ Reference: expectedRef,
+ LinkResolutions: nil,
+ },
+ }
+
+ requestGlob := "**/parent-link/file.txt"
+ linkOptions := []LinkResolutionOption{FollowBasenameLinks}
+ ref, err := tr.FilesByGlob(requestGlob, linkOptions...)
+ require.NoError(t, err)
+
+ opt := cmp.AllowUnexported(file.Reference{})
+ if d := cmp.Diff(expected, ref, opt); d != "" {
+ t.Errorf("unexpected file reference (-want +got):\n%s", d)
+ }
+}
+
func TestFileTree_FilesByGlob(t *testing.T) {
- tr := NewFileTree()
+ tr := New()
paths := []string{
"/home/wagoodman/awesome/file.txt",
@@ -301,22 +335,22 @@ func TestFileTree_FilesByGlob(t *testing.T) {
return
}
- actualSet := internal.NewStringSet()
- expectedSet := internal.NewStringSet()
+ actualSet := strset.New()
+ expectedSet := strset.New()
for _, r := range actual {
- actualSet.Add(string(r.MatchPath))
+ actualSet.Add(string(r.RequestPath))
}
for _, e := range test.expected {
expectedSet.Add(e)
- if !actualSet.Contains(e) {
+ if !actualSet.Has(e) {
t.Errorf("missing search hit: %s", e)
}
}
for _, r := range actual {
- if !expectedSet.Contains(string(r.MatchPath)) {
+ if !expectedSet.Has(string(r.RequestPath)) {
t.Errorf("extra search hit: %+v", r)
}
}
@@ -327,14 +361,14 @@ func TestFileTree_FilesByGlob(t *testing.T) {
}
func TestFileTree_Merge(t *testing.T) {
- tr1 := NewFileTree()
+ tr1 := New()
tr1.AddFile("/home/wagoodman/awesome/file-1.txt")
- tr2 := NewFileTree()
+ tr2 := New()
tr2.AddFile("/home/wagoodman/awesome/file-2.txt")
- if err := tr1.merge(tr2); err != nil {
- t.Fatalf("error on merge : %+v", err)
+ if err := tr1.Merge(tr2); err != nil {
+ t.Fatalf("error on Merge : %+v", err)
}
for _, p := range []file.Path{"/home/wagoodman/awesome/file-1.txt", "/home/wagoodman/awesome/file-2.txt"} {
@@ -345,32 +379,34 @@ func TestFileTree_Merge(t *testing.T) {
}
func TestFileTree_Merge_Overwrite(t *testing.T) {
- tr1 := NewFileTree()
+ tr1 := New()
tr1.AddFile("/home/wagoodman/awesome/file.txt")
- tr2 := NewFileTree()
+ tr2 := New()
newRef, _ := tr2.AddFile("/home/wagoodman/awesome/file.txt")
- if err := tr1.merge(tr2); err != nil {
- t.Fatalf("error on merge : %+v", err)
+ if err := tr1.Merge(tr2); err != nil {
+ t.Fatalf("error on Merge : %+v", err)
}
_, f, _ := tr1.File("/home/wagoodman/awesome/file.txt")
if f.ID() != newRef.ID() {
- t.Fatalf("did not overwrite paths on merge")
+ t.Fatalf("did not overwrite paths on Merge")
}
}
func TestFileTree_Merge_OpaqueWhiteout(t *testing.T) {
- tr1 := NewFileTree()
- tr1.AddFile("/home/wagoodman/awesome/file.txt")
+ tr1 := New()
+ _, err := tr1.AddFile("/home/wagoodman/awesome/file.txt")
+ require.NoError(t, err)
- tr2 := NewFileTree()
- tr2.AddFile("/home/wagoodman/.wh..wh..opq")
+ tr2 := New()
+ _, err = tr2.AddFile("/home/wagoodman/.wh..wh..opq")
+ require.NoError(t, err)
- if err := tr1.merge(tr2); err != nil {
- t.Fatalf("error on merge : %+v", err)
+ if err := tr1.Merge(tr2); err != nil {
+ t.Fatalf("error on Merge : %+v", err)
}
for _, p := range []file.Path{"/home/wagoodman", "/home"} {
@@ -388,14 +424,14 @@ func TestFileTree_Merge_OpaqueWhiteout(t *testing.T) {
}
func TestFileTree_Merge_OpaqueWhiteout_NoLowerDirectory(t *testing.T) {
- tr1 := NewFileTree()
+ tr1 := New()
tr1.AddFile("/home")
- tr2 := NewFileTree()
+ tr2 := New()
tr2.AddFile("/home/luhring/.wh..wh..opq")
- if err := tr1.merge(tr2); err != nil {
- t.Fatalf("error on merge : %+v", err)
+ if err := tr1.Merge(tr2); err != nil {
+ t.Fatalf("error on Merge : %+v", err)
}
for _, p := range []file.Path{"/home/luhring", "/home"} {
@@ -406,14 +442,14 @@ func TestFileTree_Merge_OpaqueWhiteout_NoLowerDirectory(t *testing.T) {
}
func TestFileTree_Merge_Whiteout(t *testing.T) {
- tr1 := NewFileTree()
+ tr1 := New()
tr1.AddFile("/home/wagoodman/awesome/file.txt")
- tr2 := NewFileTree()
+ tr2 := New()
tr2.AddFile("/home/wagoodman/awesome/.wh.file.txt")
- if err := tr1.merge(tr2); err != nil {
- t.Fatalf("error on merge : %+v", err)
+ if err := tr1.Merge(tr2); err != nil {
+ t.Fatalf("error on Merge : %+v", err)
}
for _, p := range []file.Path{"/home/wagoodman/awesome", "/home/wagoodman", "/home"} {
@@ -431,14 +467,14 @@ func TestFileTree_Merge_Whiteout(t *testing.T) {
}
func TestFileTree_Merge_DirOverride(t *testing.T) {
- tr1 := NewFileTree()
+ tr1 := New()
tr1.AddFile("/home/wagoodman/awesome/place")
- tr2 := NewFileTree()
+ tr2 := New()
tr2.AddFile("/home/wagoodman/awesome/place/thing.txt")
- if err := tr1.merge(tr2); err != nil {
- t.Fatalf("error on merge : %+v", err)
+ if err := tr1.Merge(tr2); err != nil {
+ t.Fatalf("error on Merge : %+v", err)
}
for _, p := range []file.Path{"/home/wagoodman/awesome/place", "/home/wagoodman/awesome/place/thing.txt"} {
@@ -455,24 +491,24 @@ func TestFileTree_Merge_DirOverride(t *testing.T) {
t.Fatalf("somehow override path does not exist?")
}
- if n.FileType != file.TypeDir {
+ if n.FileNode.FileType != file.TypeDirectory {
t.Errorf("did not override to dir")
}
}
func TestFileTree_Merge_RemoveChildPathsOnOverride(t *testing.T) {
- lowerTree := NewFileTree()
+ lowerTree := New()
// add a file in the lower tree, which implicitly adds "/home/wagoodman/awesome/place" as a directory type
lowerTree.AddFile("/home/wagoodman/awesome/place/thing.txt")
- upperTree := NewFileTree()
+ upperTree := New()
// add "/home/wagoodman/awesome/place" as a file type in the upper treee
upperTree.AddFile("/home/wagoodman/awesome/place")
// merge the upper tree into the lower tree
- if err := lowerTree.merge(upperTree); err != nil {
- t.Fatalf("error on merge : %+v", err)
+ if err := lowerTree.Merge(upperTree); err != nil {
+ t.Fatalf("error on Merge : %+v", err)
}
// the directory should still exist
@@ -494,90 +530,302 @@ func TestFileTree_Merge_RemoveChildPathsOnOverride(t *testing.T) {
t.Fatalf("somehow override path does not exist?")
}
- if fileNode.FileType != file.TypeReg {
+ if fileNode.FileNode.FileType != file.TypeRegular {
t.Errorf("did not override to dir")
}
}
+func TestFileTree_File_MultiSymlink(t *testing.T) {
+ var err error
+ tr := New()
+
+ _, err = tr.AddSymLink("/home", "/link-to-1/link-to-place")
+ require.NoError(t, err)
+
+ _, err = tr.AddSymLink("/link-to-1", "/1")
+ require.NoError(t, err)
+
+ _, err = tr.AddDir("/1")
+ require.NoError(t, err)
+
+ _, err = tr.AddFile("/2/real-file.txt")
+ require.NoError(t, err)
+
+ _, err = tr.AddSymLink("/1/file.txt", "/2/real-file.txt")
+ require.NoError(t, err)
+
+ _, err = tr.AddSymLink("/1/link-to-place", "/place")
+ require.NoError(t, err)
+
+ _, err = tr.AddSymLink("/place/wagoodman/file.txt", "/link-to-1/file.txt")
+ require.NoError(t, err)
+
+ // this is the current state of the filetree
+ // .
+ // ├── 1
+ // │ ├── file.txt -> 2/real-file.txt
+ // │ └── link-to-place -> place
+ // ├── 2
+ // │ └── real-file.txt
+ // ├── home -> link-to-1/link-to-place
+ // ├── link-to-1 -> 1
+ // └── place
+ // └── wagoodman
+ // └── file.txt -> link-to-1/file.txt
+
+ // request: /home/wagoodman/file.txt
+ // reference: /2/real-file.txt
+ // ancestor resolution:
+ // - /home -> /link-to-1/link-to-place
+ // - /link-to-1 -> /1
+ // - /1/link-to-place -> /place
+ // leaf resolution:
+ // - /place/wagoodman/file.txt -> /link-to-1/file.txt
+ // - /link-to-1 -> /1
+ // - /1/file.txt -> /2/real-file.txt
+ // path:
+ // - home -> link-to-1/link-to-place -> place
+ // - place/wagoodman
+ // - place/wagoodman/file.txt -> link-to-1/file.txt -> 1/file.txt -> 2/real-file.txt
+
+ expected := &file.Resolution{
+ RequestPath: "/home/wagoodman/file.txt",
+ Reference: &file.Reference{RealPath: "/2/real-file.txt"},
+ LinkResolutions: []file.Resolution{
+ {
+ RequestPath: "/place/wagoodman/file.txt",
+ Reference: &file.Reference{RealPath: "/place/wagoodman/file.txt"},
+ },
+ {
+ RequestPath: "/1/file.txt",
+ Reference: &file.Reference{RealPath: "/1/file.txt"},
+ },
+ },
+ }
+
+ requestPath := "/home/wagoodman/file.txt"
+ linkOptions := []LinkResolutionOption{FollowBasenameLinks}
+ _, ref, err := tr.File(file.Path(requestPath), linkOptions...)
+ require.NoError(t, err)
+
+ // compare the remaining expectations, ignoring any reference IDs
+ ignoreIDs := cmpopts.IgnoreUnexported(file.Reference{})
+ if d := cmp.Diff(expected, ref, ignoreIDs); d != "" {
+ t.Errorf("unexpected file reference (-want +got):\n%s", d)
+ }
+
+}
+
+func TestFileTree_File_MultiSymlink_deadlink(t *testing.T) {
+ var err error
+ tr := New()
+
+ _, err = tr.AddSymLink("/home", "/link-to-1/link-to-place")
+ require.NoError(t, err)
+
+ _, err = tr.AddSymLink("/link-to-1", "/1")
+ require.NoError(t, err)
+
+ _, err = tr.AddDir("/1")
+ require.NoError(t, err)
+
+ // causes the dead link
+ //_, err = tr.AddFile("/2/real-file.txt")
+ //require.NoError(t, err)
+
+ _, err = tr.AddSymLink("/1/file.txt", "/2/real-file.txt")
+ require.NoError(t, err)
+
+ _, err = tr.AddSymLink("/1/link-to-place", "/place")
+ require.NoError(t, err)
+
+ _, err = tr.AddSymLink("/place/wagoodman/file.txt", "/link-to-1/file.txt")
+ require.NoError(t, err)
+
+ // this is the current state of the filetree
+ // .
+ // ├── 1
+ // │ ├── file.txt -> 2/real-file.txt
+ // │ └── link-to-place -> place
+ // ├── home -> link-to-1/link-to-place
+ // ├── link-to-1 -> 1
+ // └── place
+ // └── wagoodman
+ // └── file.txt -> link-to-1/file.txt
+
+ // request: /home/wagoodman/file.txt
+ // reference: /2/real-file.txt
+ // ancestor resolution:
+ // - /home -> /link-to-1/link-to-place
+ // - /link-to-1 -> /1
+ // - /1/link-to-place -> /place
+ // leaf resolution:
+ // - /place/wagoodman/file.txt -> /link-to-1/file.txt
+ // - /link-to-1 -> /1
+ // - /1/file.txt -> /2/real-file.txt
+ // path:
+ // - home -> link-to-1/link-to-place -> place
+ // - place/wagoodman
+ // - place/wagoodman/file.txt -> link-to-1/file.txt -> 1/file.txt -> 2/real-file.txt
+
+ expected := &file.Resolution{
+ RequestPath: "/home/wagoodman/file.txt",
+ Reference: &file.Reference{RealPath: "/1/file.txt"},
+ LinkResolutions: []file.Resolution{
+ {
+ RequestPath: "/place/wagoodman/file.txt",
+ Reference: &file.Reference{RealPath: "/place/wagoodman/file.txt"},
+ },
+ {
+ RequestPath: "/1/file.txt",
+ Reference: &file.Reference{RealPath: "/1/file.txt"},
+ },
+ {
+ RequestPath: "/2/real-file.txt",
+ //Reference: &file.Reference{RealPath: "/2/real-file.txt"},
+ },
+ },
+ }
+
+ requestPath := "/home/wagoodman/file.txt"
+
+ {
+ linkOptions := []LinkResolutionOption{FollowBasenameLinks}
+ _, ref, err := tr.File(file.Path(requestPath), linkOptions...)
+ require.Nil(t, ref)
+ require.NoError(t, err)
+ }
+
+ {
+ linkOptions := []LinkResolutionOption{FollowBasenameLinks, DoNotFollowDeadBasenameLinks}
+ _, ref, err := tr.File(file.Path(requestPath), linkOptions...)
+ require.NoError(t, err)
+
+ // compare the remaining expectations, ignoring any reference IDs
+ ignoreIDs := cmpopts.IgnoreUnexported(file.Reference{})
+ if d := cmp.Diff(expected, ref, ignoreIDs); d != "" {
+ t.Errorf("unexpected file reference (-want +got):\n%s", d)
+ }
+ }
+
+}
+
func TestFileTree_File_Symlink(t *testing.T) {
tests := []struct {
- name string
- buildLinkSource file.Path // ln -s