Skip to content

Commit

Permalink
debian: split OVAL feed into binary packages (#550)
Browse files Browse the repository at this point in the history
The Debian OVAL feed reports vulnerabilities by the source, this
change splits those source vulnerabilities into binary packages so they
can be matched easily with the packages the scanner finds in the distro.

Signed-off-by: crozzy <joseph.crosland@gmail.com>
  • Loading branch information
crozzy authored Feb 4, 2022
1 parent 03f3190 commit df34db7
Show file tree
Hide file tree
Showing 18 changed files with 414 additions and 192 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
ClairCore provides a set of go modules which handle scanning container layers for installed packages and reporting any discovered vulnerabilities.
ClairCore is designed to be embedded into a service wrapper.

For a full overview see: [ClairCore Book](https://quay.github.io/claircore)
For a full overview see: [ClairCore Book](https://quay.github.io/claircore)

# Local development and testing

Expand Down
2 changes: 1 addition & 1 deletion debian/matcher_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func TestMatcherIntegration(t *testing.T) {
t.Error(err)
}
// force update
tctx, cancel := context.WithTimeout(ctx, 2*time.Minute)
tctx, cancel := context.WithTimeout(ctx, 4*time.Minute)
defer cancel()
if err := mgr.Run(tctx); err != nil {
t.Error(err)
Expand Down
7 changes: 6 additions & 1 deletion debian/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ func (u *Updater) Parse(ctx context.Context, r io.ReadCloser) ([]*claircore.Vuln
return nil, fmt.Errorf("debian: unable to decode OVAL document: %w", err)
}
zlog.Debug(ctx).Msg("xml decoded")

sourcesMapFunc := func(_ oval.Definition, name *oval.DpkgName) []string {
return u.sm.Get(name.Body)
}

protoVulns := func(def oval.Definition) ([]*claircore.Vulnerability, error) {
vs := []*claircore.Vulnerability{}
v := &claircore.Vulnerability{
Expand All @@ -42,7 +47,7 @@ func (u *Updater) Parse(ctx context.Context, r io.ReadCloser) ([]*claircore.Vuln
vs = append(vs, v)
return vs, nil
}
vulns, err := ovalutil.DpkgDefsToVulns(ctx, &root, protoVulns)
vulns, err := ovalutil.DpkgDefsToVulns(ctx, &root, protoVulns, sourcesMapFunc)
if err != nil {
return nil, err
}
Expand Down
163 changes: 163 additions & 0 deletions debian/sourcemapper.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
package debian

import (
"bufio"
"compress/gzip"
"context"
"errors"
"fmt"
"io"
"net/http"
"net/textproto"
"strings"
"sync"

"github.com/quay/zlog"
"go.opentelemetry.io/otel/baggage"
"go.opentelemetry.io/otel/label"
"golang.org/x/sync/errgroup"
)

const sourcesURL = "https://ftp.debian.org/debian/dists/%s/%s/source/Sources.gz"

var (
sourceRepos = [3]string{"main", "contrib", "non-free"}
)

// NewSourcesMap returns a SourcesMap but does not perform any
// inserts into the map. That needs to be done explitly by calling
// the Update method.
func NewSourcesMap(release Release, client *http.Client) *SourcesMap {
return &SourcesMap{
release: release,
sourcesURL: sourcesURL,
sourceMap: make(map[string]map[string]struct{}),
mu: &sync.RWMutex{},
etagMap: make(map[string]string),
etagMu: &sync.RWMutex{},
client: client,
}
}

// SourcesMap wraps a map that defines the relationship between a source
// package and it's associated binaries. It offers an Update method
// to populate and update the map. It is Release-centric.
//
// It should have the same lifespan as the Updater to save allocations
// and take advantage of the entity tag that debian sends back.
type SourcesMap struct {
release Release
sourcesURL string
mu, etagMu *sync.RWMutex
sourceMap map[string]map[string]struct{}
etagMap map[string]string
client *http.Client
}

// Get returns all the binaries associated with a source package
// identified by a string. Empty slice is returned if the source
// doesn't exist in the map.
func (m *SourcesMap) Get(source string) []string {
m.mu.RLock()
defer m.mu.RUnlock()
bins := []string{}
if m.sourceMap[source] == nil {
return bins
}

for bin := range m.sourceMap[source] {
bins = append(bins, bin)
}
return bins
}

// Update pulls the Sources.gz files for the different repos and saves
// the resulting source to binary relationships.
func (m *SourcesMap) Update(ctx context.Context) error {
if m.release == Wheezy {
// There are no Wheezy records we assume the source->binary relationship of Jessie.
m.release = Jessie
}
g, ctx := errgroup.WithContext(ctx)
for _, r := range sourceRepos {
url := fmt.Sprintf(m.sourcesURL, m.release, r)
g.Go(func() error {
err := m.fetchSources(ctx, url)
if err != nil {
return fmt.Errorf("unable to fetch sources: %w", err)
}
return nil
})
}
return g.Wait()
}

func (m *SourcesMap) fetchSources(ctx context.Context, url string) error {
ctx = baggage.ContextWithValues(ctx,
label.String("component", "debian/sourcemapper.fetchSources"),
label.String("url", url))
zlog.Debug(ctx).Msg("attempting fetch of Sources file")

req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return err
}
m.etagMu.RLock()
req.Header.Set("If-None-Match", m.etagMap[url])
m.etagMu.RUnlock()

resp, err := m.client.Do(req)
if err != nil {
return err
}

defer resp.Body.Close()
switch resp.StatusCode {
case http.StatusOK:
case http.StatusNotModified:
zlog.Debug(ctx).Msg("already processed the latest version of the file")
return nil
default:
return fmt.Errorf("received status code %d querying mapping url %s", resp.StatusCode, url)
}
m.etagMu.Lock()
m.etagMap[url] = resp.Header.Get("etag")
m.etagMu.Unlock()

var reader io.ReadCloser
switch resp.Header.Get("Content-Type") {
case "application/gzip", "application/x-gzip":
reader, err = gzip.NewReader(resp.Body)
if err != nil {
return err
}
defer reader.Close()
default:
return fmt.Errorf("received bad content-type %s querying mapping url %s", resp.Header.Get("Content-Type"), url)
}

tp := textproto.NewReader(bufio.NewReader(reader))
hdr, err := tp.ReadMIMEHeader()
for ; err == nil && len(hdr) > 0; hdr, err = tp.ReadMIMEHeader() {
source := hdr.Get("Package")
if source == "linux" {
continue
}
binaries := hdr.Get("Binary")
m.mu.Lock()
if m.sourceMap[source] == nil {
m.sourceMap[source] = make(map[string]struct{})
}
for _, bin := range strings.Split(binaries, ", ") {
m.sourceMap[source][bin] = struct{}{}
}
m.mu.Unlock()
}
switch {
case errors.Is(err, io.EOF):
default:
return fmt.Errorf("could not read Sources file %s: %w", url, err)
}

return nil
}
68 changes: 68 additions & 0 deletions debian/sourcemapper_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package debian

import (
"bytes"
"context"
"io"
"io/ioutil"
"net/http"
"os"
"testing"

"github.com/quay/zlog"
)

type TestClientFunc func(req *http.Request) *http.Response

func (f TestClientFunc) RoundTrip(req *http.Request) (*http.Response, error) {
return f(req), nil
}

func NewTestClient() (*http.Client, error) {
f, err := os.Open("testdata/Bullseye-Sources.gz")
if err != nil {
return nil, err
}
b, err := ioutil.ReadAll(f)
if err != nil {
return nil, err
}
f.Close()

return &http.Client{
Transport: TestClientFunc(
func(req *http.Request) *http.Response {
w := &http.Response{
StatusCode: http.StatusOK,
Header: make(http.Header),
Body: io.NopCloser(bytes.NewReader(b)),
}
w.Header.Set("Content-Type", "application/gzip")
return w
},
),
}, nil
}

func TestCreateSourcesMap(t *testing.T) {
ctx := zlog.Test(context.Background(), t)
client, err := NewTestClient()
if err != nil {
t.Fatalf("got the error %v", err)
}
mapper := NewSourcesMap(Bullseye, client)

err = mapper.Update(ctx)
if err != nil {
t.Fatalf("unexpected error %v", err)
}
opensshBinaries := mapper.Get("aalib")
if len(opensshBinaries) != 3 {
t.Fatalf("expected 3 binaries related to aalib found %d found %v", len(opensshBinaries), opensshBinaries)
}

tarBinaries := mapper.Get("389-ds-base")
if len(tarBinaries) != 6 {
t.Fatalf("expected 6 binaries related to 389-ds-base found %d found %v", len(tarBinaries), tarBinaries)
}
}
Binary file added debian/testdata/Bullseye-Sources.gz
Binary file not shown.
13 changes: 11 additions & 2 deletions debian/updater.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ type Updater struct {
// the release name as described by os-release "VERSION_CODENAME"
release Release
c *http.Client
sm *SourcesMap
}

// UpdaterConfig is the configuration for the updater.
Expand All @@ -58,10 +59,12 @@ type UpdaterConfig struct {
func NewUpdater(release Release) *Updater {
url := fmt.Sprintf(OVALTemplate, release)

c := http.DefaultClient // TODO(hank) Remove DefaultClient
return &Updater{
url: url,
release: release,
c: http.DefaultClient, // TODO(hank) Remove DefaultClient
c: c,
sm: NewSourcesMap(release, c),
}
}

Expand Down Expand Up @@ -134,7 +137,13 @@ func (u *Updater) Fetch(ctx context.Context, fingerprint driver.Fingerprint) (io
f.Close()
return nil, "", fmt.Errorf("failed to seek body: %v", err)
}

zlog.Info(ctx).Msg("fetched latest oval database successfully")

err = u.sm.Update(ctx)
if err != nil {
return nil, "", fmt.Errorf("could not update source to binary map: %w", err)
}
zlog.Info(ctx).Msg("updated the debian source to binary map successfully")

return f, driver.Fingerprint(fp), err
}
Loading

0 comments on commit df34db7

Please sign in to comment.