Skip to content

Commit

Permalink
Refactor OCI file extraction to use existing tarutils.
Browse files Browse the repository at this point in the history
Signed-off-by: Michael Nelson <minelson@vmware.com>
  • Loading branch information
absoludity committed Dec 19, 2022
1 parent 4a6f250 commit a4b48ed
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 203 deletions.
88 changes: 12 additions & 76 deletions cmd/asset-syncer/server/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
package server

import (
"archive/tar"
"bytes"
"compress/gzip"
"crypto/sha256"
"encoding/json"
"fmt"
Expand Down Expand Up @@ -396,70 +394,6 @@ func (r *OCIRegistry) Repo() *models.RepoInternal {
return r.RepoInternal
}

type artifactFiles struct {
Metadata string
Readme string
Values string
Schema string
}

func extractFilesFromBuffer(buf *bytes.Buffer) (*artifactFiles, error) {
result := &artifactFiles{}
gzf, err := gzip.NewReader(buf)
if err != nil {
return nil, err
}
tarReader := tar.NewReader(gzf)
importantFiles := map[string]bool{
"chart.yaml": true,
"readme.md": true,
"values.yaml": true,
"values.schema.json": true,
}
for {
header, err := tarReader.Next()
if err == io.EOF {
break
}
if err != nil {
return nil, err
}

compressedFileName := header.Name
if len(strings.Split(compressedFileName, "/")) > 2 {
// We are only interested on files within the root directory
continue
}

switch header.Typeflag {
case tar.TypeDir:
// Ignore directories
case tar.TypeReg:
filename := strings.ToLower(path.Base(compressedFileName))
if importantFiles[filename] {
// Read content
data, err := io.ReadAll(tarReader)
if err != nil {
return nil, err
}
switch filename {
case "chart.yaml":
result.Metadata = string(data)
case "readme.md":
result.Readme = string(data)
case "values.yaml":
result.Values = string(data)
case "values.schema.json":
result.Schema = string(data)
}
}
default:
// Unknown type, ignore
}
}
return result, nil
}

func pullAndExtract(repoURL *url.URL, appName, tag string, puller helm.ChartPuller, r *OCIRegistry) (*models.Chart, error) {
ref := path.Join(repoURL.Host, repoURL.Path, fmt.Sprintf("%s:%s", appName, tag))

Expand All @@ -468,13 +402,15 @@ func pullAndExtract(repoURL *url.URL, appName, tag string, puller helm.ChartPull
return nil, err
}

// Extract
files, err := extractFilesFromBuffer(chartBuffer)
// OCI tarballs do not have a root directory with files under that, but rather
// have the files at the top-level.
tarballRootDir := ""
files, err := tarutil.FetchChartDetailFromTarball(chartBuffer, tarballRootDir)
if err != nil {
return nil, err
}
chartMetadata := chart.Metadata{}
err = yaml.Unmarshal([]byte(files.Metadata), &chartMetadata)
err = yaml.Unmarshal([]byte(files[models.ChartYamlKey]), &chartMetadata)
if err != nil {
return nil, err
}
Expand All @@ -485,9 +421,9 @@ func pullAndExtract(repoURL *url.URL, appName, tag string, puller helm.ChartPull
AppVersion: chartMetadata.AppVersion,
Digest: digest,
URLs: chartMetadata.Sources,
Readme: files.Readme,
Values: files.Values,
Schema: files.Schema,
Readme: files[models.ReadmeKey],
Values: files[models.DefaultValuesKey],
Schema: files[models.SchemaKey],
}

maintainers := []chart.Maintainer{}
Expand Down Expand Up @@ -656,9 +592,9 @@ func (r *OCIRegistry) Charts(fetchLatestOnly bool) ([]models.Chart, error) {
// FetchFiles do nothing for the OCI case since they have been already fetched in the Charts() method
func (r *OCIRegistry) FetchFiles(name string, cv models.ChartVersion, userAgent string, passCredentials bool) (map[string]string, error) {
return map[string]string{
models.ValuesKey: cv.Values,
models.ReadmeKey: cv.Readme,
models.SchemaKey: cv.Schema,
models.DefaultValuesKey: cv.Values,
models.ReadmeKey: cv.Readme,
models.SchemaKey: cv.Schema,
}, nil
}

Expand Down Expand Up @@ -888,7 +824,7 @@ func (f *fileImporter) fetchAndImportFiles(name string, repo Repo, cv models.Cha
} else {
log.Info("README.md not found, name=%s, version=%s", name, cv.Version)
}
if v, ok := files[models.ValuesKey]; ok {
if v, ok := files[models.DefaultValuesKey]; ok {
chartFiles.DefaultValues = v
} else {
log.Info("values.yaml not found, name=%s, version=%s", name, cv.Version)
Expand Down
97 changes: 10 additions & 87 deletions cmd/asset-syncer/server/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -531,9 +531,9 @@ func (r *fakeRepo) Charts(shallow bool) ([]models.Chart, error) {

func (r *fakeRepo) FetchFiles(name string, cv models.ChartVersion, userAgent string, passCredentials bool) (map[string]string, error) {
return map[string]string{
models.ValuesKey: r.chartFiles.DefaultValues,
models.ReadmeKey: r.chartFiles.Readme,
models.SchemaKey: r.chartFiles.Schema,
models.DefaultValuesKey: r.chartFiles.DefaultValues,
models.ReadmeKey: r.chartFiles.Readme,
models.SchemaKey: r.chartFiles.Schema,
}, nil
}

Expand Down Expand Up @@ -1077,105 +1077,28 @@ version: 1.0.0
charts, err := chartsRepo.Charts(tt.shallow)
assert.NoError(t, err)
if !cmp.Equal(charts, tt.expected) {
t.Errorf("Unexpected result %v", cmp.Diff(charts, tt.expected))
t.Errorf("Unexpected result %v", cmp.Diff(tt.expected, charts))
}
})
}

t.Run("FetchFiles - It returns the stored files", func(t *testing.T) {
files := map[string]string{
models.ValuesKey: "values text",
models.ReadmeKey: "readme text",
models.SchemaKey: "schema text",
models.DefaultValuesKey: "values text",
models.ReadmeKey: "readme text",
models.SchemaKey: "schema text",
}
repo := OCIRegistry{}
result, err := repo.FetchFiles("", models.ChartVersion{
Values: files["values"],
Readme: files["readme"],
Schema: files["schema"],
Values: files[models.DefaultValuesKey],
Readme: files[models.ReadmeKey],
Schema: files[models.SchemaKey],
}, "my-user-agent", false)
assert.NoError(t, err)
assert.Equal(t, result, files, "expected files")
})
}

func Test_extractFilesFromBuffer(t *testing.T) {
tests := []struct {
description string
files []tartest.TarballFile
expected *artifactFiles
}{
{
"It should extract the important files",
[]tartest.TarballFile{
{Name: "Chart.yaml", Body: "chart yaml"},
{Name: "README.md", Body: "chart readme"},
{Name: "values.yaml", Body: "chart values"},
{Name: "values.schema.json", Body: "chart schema"},
},
&artifactFiles{
Metadata: "chart yaml",
Readme: "chart readme",
Values: "chart values",
Schema: "chart schema",
},
},
{
"It should ignore letter case",
[]tartest.TarballFile{
{Name: "Readme.md", Body: "chart readme"},
},
&artifactFiles{
Readme: "chart readme",
},
},
{
"It should ignore other files",
[]tartest.TarballFile{
{Name: "README.md", Body: "chart readme"},
{Name: "other.yaml", Body: "other content"},
},
&artifactFiles{
Readme: "chart readme",
},
},
{
"It should handle large files",
[]tartest.TarballFile{
// 1MB file
{Name: "README.md", Body: string(make([]byte, 1048577))},
},
&artifactFiles{
Readme: string(make([]byte, 1048577)),
},
},
{
"It should ignore nested files",
[]tartest.TarballFile{
{Name: "other/README.md", Body: "bad"},
{Name: "README.md", Body: "good"},
},
&artifactFiles{
Readme: "good",
},
},
}
for _, tt := range tests {
t.Run(tt.description, func(t *testing.T) {
w := httptest.NewRecorder()
gzw := gzip.NewWriter(w)
tartest.CreateTestTarball(gzw, tt.files)
gzw.Flush()

r, err := extractFilesFromBuffer(w.Body)
assert.NoError(t, err)
if !cmp.Equal(r, tt.expected) {
t.Errorf("Unexpected result %v", cmp.Diff(r, tt.expected))
}
})
}
}

func Test_filterCharts(t *testing.T) {
tests := []struct {
description string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ func availablePackageDetailFromChartDetail(chartID string, chartDetail map[strin
ShortDescription: chartMetadata.Description,
Categories: categories,
Readme: chartDetail[models.ReadmeKey],
DefaultValues: chartDetail[models.ValuesKey],
DefaultValues: chartDetail[models.DefaultValuesKey],
ValuesSchema: chartDetail[models.SchemaKey],
SourceUrls: chartMetadata.Sources,
Maintainers: maintainers,
Expand Down
18 changes: 9 additions & 9 deletions pkg/chart/models/chart.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,13 @@ type ChartVersion struct {

// ChartFiles holds the README and default values for a given chart version
type ChartFiles struct {
ID string `bson:"file_id"`
Readme string
DefaultValues string
CustomDefaultValues map[string]string
Schema string
Repo *Repo
Digest string
ID string `bson:"file_id"`
Readme string
DefaultValues string
AdditionalDefaultValues map[string]string
Schema string
Repo *Repo
Digest string
}

// Allow to convert ChartFiles to a sql JSON
Expand All @@ -94,8 +94,8 @@ func (a ChartFiles) Value() (driver.Value, error) {
// some constant strings used as keys in maps in several modules
const (
ReadmeKey = "readme"
ValuesKey = "values"
AdditionalDefaultValuesKey = "additionalDefaultValues"
DefaultValuesKey = "values"
AdditionalDefaultValuesKey = "additional-values"
SchemaKey = "schema"
ChartYamlKey = "chartYaml"
)
44 changes: 25 additions & 19 deletions pkg/tarutil/tarutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,26 @@ func FetchChartDetailFromTarballUrl(name string, chartTarballURL string, userAge
return nil, err
}

return FetchChartDetailFromTarball(reader, name)
// decode escaped characters
// ie., "foo%2Fbar" should return "foo/bar"
decodedName, err := url.PathUnescape(name)
if err != nil {
return nil, err
}

// get last part of the name
// ie., "foo/bar" should return "bar"
fixedName := path.Base(decodedName)

return FetchChartDetailFromTarball(reader, fixedName)
}

//
// Fetches helm chart details from a gzipped tarball
//
// name is expected in format "foo/bar" or "foo%2Fbar" if url-escaped
//
func FetchChartDetailFromTarball(reader io.Reader, name string) (map[string]string, error) {
func FetchChartDetailFromTarball(reader io.Reader, tarballRootDir string) (map[string]string, error) {
// We read the whole chart into memory, this should be okay since the chart
// tarball needs to be small enough to fit into a GRPC call
gzf, err := gzip.NewReader(reader)
Expand All @@ -60,31 +71,26 @@ func FetchChartDetailFromTarball(reader io.Reader, name string) (map[string]stri

tarf := tar.NewReader(gzf)

// decode escaped characters
// ie., "foo%2Fbar" should return "foo/bar"
decodedName, err := url.PathUnescape(name)
if err != nil {
return nil, err
prefix := ""
if tarballRootDir != "" {
prefix = tarballRootDir + "/"
}

// get last part of the name
// ie., "foo/bar" should return "bar"
fixedName := path.Base(decodedName)
readmeFileName := fixedName + "/README.md"
valuesFileName := fixedName + "/values.yaml"
schemaFileName := fixedName + "/values.schema.json"
chartYamlFileName := fixedName + "/Chart.yaml"
readmeFileName := prefix + "README.md"
valuesFileName := prefix + "values.yaml"
schemaFileName := prefix + "values.schema.json"
chartYamlFileName := prefix + "Chart.yaml"
filenames := map[string]string{
chart.ValuesKey: valuesFileName,
chart.ReadmeKey: readmeFileName,
chart.SchemaKey: schemaFileName,
chart.ChartYamlKey: chartYamlFileName,
chart.DefaultValuesKey: valuesFileName,
chart.ReadmeKey: readmeFileName,
chart.SchemaKey: schemaFileName,
chart.ChartYamlKey: chartYamlFileName,
}

// Optionally search for files matching a regular expression, using the
// template to provide the key.
regexes := map[string]*regexp.Regexp{
chart.ValuesKey + "-$valuesType": regexp.MustCompile(fixedName + `/values-(?P<valuesType>\w+)\.yaml`),
chart.DefaultValuesKey + "-$valuesType": regexp.MustCompile(prefix + `values-(?P<valuesType>\w+)\.yaml`),
}

return ExtractFilesFromTarball(filenames, regexes, tarf)
Expand Down
Loading

0 comments on commit a4b48ed

Please sign in to comment.