diff --git a/README.md b/README.md index e0e91693..142e2afa 100644 --- a/README.md +++ b/README.md @@ -27,12 +27,12 @@ behavior data for anyone interested. A YAML configuration file can be provided with the following format: ``` -enabled_feeds: -- pypi -- npm -- goproxy -- rubygems -- crates +feeds: +- type: pypi +- type: npm +- type: goproxy +- type: rubygems +- type: crates publisher: type: 'gcp_pubsub' @@ -47,7 +47,21 @@ timer: false ``` `poll_rate` string formatted for [duration parser](https://golang.org/pkg/time/#ParseDuration).This is used as an initial value to generate a cutoff point for feed events relative to the given time at execution, with subsequent events using the previous time at execution as the cutoff point. -`timer` will configure interal polling of the `enabled_feeds` at the given `poll_rate` period. To specify this configuration file, define its path in your environment under the `PACKAGE_FEEDS_CONFIG_PATH` variable. +`timer` will configure interal polling of the `feeds` at the given `poll_rate` period. To specify this configuration file, define its path in your environment under the `PACKAGE_FEEDS_CONFIG_PATH` variable. + +## FeedOptions + +Feeds can be configured with additional options, not all feeds will support these features. See the appropriate feed `README.md` for supported options. +Below is an example of such options with pypi being configured to poll a specific set of packages + +``` +feeds: +- type: pypi + options: + packages: + - fooPackage + - barPackage +``` ## Legacy Configuration diff --git a/cmd/scheduled-feed/main.go b/cmd/scheduled-feed/main.go index 87782f50..ad2cfe24 100644 --- a/cmd/scheduled-feed/main.go +++ b/cmd/scheduled-feed/main.go @@ -112,8 +112,12 @@ func main() { } log.Infof("using %q publisher", pub.Name()) - scheduledFeeds, err := appConfig.GetScheduledFeeds() - log.Infof("watching feeds: %v", strings.Join(appConfig.EnabledFeeds, ", ")) + feeds, err := appConfig.GetScheduledFeeds() + feedNames := []string{} + for k := range feeds { + feedNames = append(feedNames, k) + } + log.Infof("watching feeds: %v", strings.Join(feedNames, ", ")) if err != nil { log.Fatal(err) } diff --git a/config/config_test.go b/config/config_test.go index be2d881f..93a3f7ba 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -6,16 +6,18 @@ import ( "github.com/ossf/package-feeds/config" "github.com/ossf/package-feeds/events" + "github.com/ossf/package-feeds/feeds" + "github.com/ossf/package-feeds/feeds/pypi" "github.com/ossf/package-feeds/feeds/scheduler" "github.com/ossf/package-feeds/publisher/stdout" ) const ( TestConfigStr = ` -enabled_feeds: -- rubygems -- goproxy -- npm +feeds: +- type: rubygems +- type: goproxy +- type: npm publisher: type: "gcp" @@ -27,8 +29,8 @@ poll_rate: 5m timer: true ` TestConfigStrUnknownFeedType = ` -enabled_feeds: -- foo +feeds: +- type: foo ` TestConfigStrUnknownField = ` foo: @@ -52,11 +54,11 @@ func TestDefault(t *testing.T) { t.Parallel() c := config.Default() - feeds, err := c.GetScheduledFeeds() + scheduledFeeds, err := c.GetScheduledFeeds() if err != nil { t.Fatalf("failed to initialize feeds: %v", err) } - _ = scheduler.New(feeds) + _ = scheduler.New(scheduledFeeds) } func TestGetScheduledFeeds(t *testing.T) { @@ -66,16 +68,16 @@ func TestGetScheduledFeeds(t *testing.T) { if err != nil { t.Fatal(err) } - if len(c.EnabledFeeds) != 3 { - t.Fatalf("EnabledFeeds is expected to be 3 but was `%v`", len(c.EnabledFeeds)) + if len(c.Feeds) != 3 { + t.Fatalf("Feeds is expected to be 3 but was `%v`", len(c.Feeds)) } - feeds, err := c.GetScheduledFeeds() + scheduledFeeds, err := c.GetScheduledFeeds() if err != nil { t.Fatal(err) } - for _, val := range c.EnabledFeeds { - if _, ok := feeds[val]; !ok { - t.Errorf("expected `%v` feed was not found in scheduled feeds after GetScheduledFeeds()", val) + for _, feed := range c.Feeds { + if _, ok := scheduledFeeds[feed.Type]; !ok { + t.Errorf("expected `%v` feed was not found in scheduled feeds after GetScheduledFeeds()", feed.Type) } } } @@ -93,7 +95,7 @@ func TestLoadFeedConfigUnknownFeedType(t *testing.T) { } } -func TestPubConfigToPublisherStdout(t *testing.T) { +func TestPublisherConfigToPublisherStdout(t *testing.T) { t.Parallel() c := config.PublisherConfig{ @@ -109,6 +111,46 @@ func TestPubConfigToPublisherStdout(t *testing.T) { } } +func TestPublisherConfigToFeed(t *testing.T) { + t.Parallel() + + packages := []string{ + "foo", + "bar", + "baz", + } + + c := config.FeedConfig{ + Type: pypi.FeedName, + Options: feeds.FeedOptions{ + Packages: &packages, + }, + } + feed, err := c.ToFeed(events.NewNullHandler()) + if err != nil { + t.Fatalf("failed to create pypi feed from configuration: %v", err) + } + + pypiFeed, ok := feed.(*pypi.Feed) + if !ok { + t.Fatal("failed to cast feed as pypi feed") + } + + feedPackages := pypiFeed.GetPackageList() + if feedPackages == nil { + t.Fatalf("failed to initialize pypi feed package list to poll") + } + if feedPackages != nil && len(*feedPackages) != len(packages) { + t.Errorf("pypi package list does not match config provided package list") + } else { + for i := 0; i < len(packages); i++ { + if (*feedPackages)[i] != packages[i] { + t.Errorf("pypi package '%v' does not match configured package '%v'", (*feedPackages)[i], packages[i]) + } + } + } +} + func TestStrictConfigDecoding(t *testing.T) { t.Parallel() diff --git a/config/scheduledfeed.go b/config/scheduledfeed.go index e1b1f3ae..7d45713d 100644 --- a/config/scheduledfeed.go +++ b/config/scheduledfeed.go @@ -59,11 +59,11 @@ func NewConfigFromBytes(yamlBytes []byte) (*ScheduledFeedConfig, error) { } // Applies environment variables to the configuration. -func (config *ScheduledFeedConfig) applyEnvVars() { +func (sc *ScheduledFeedConfig) applyEnvVars() { // Support legacy env var definition for gcp pub sub. pubURL := os.Getenv("OSSMALWARE_TOPIC_URL") if pubURL != "" { - config.PubConfig = PublisherConfig{ + sc.PubConfig = PublisherConfig{ Type: gcppubsub.PublisherType, Config: map[string]interface{}{ "url": pubURL, @@ -75,7 +75,7 @@ func (config *ScheduledFeedConfig) applyEnvVars() { port, err := strconv.Atoi(portStr) if portProvided && err == nil { - config.HTTPPort = port + sc.HTTPPort = port } } @@ -83,53 +83,37 @@ func AddTo(ls *[]int, value int) { *ls = append(*ls, value) } -// Constructs a map of ScheduledFeeds to enable based on the EnabledFeeds provided -// from configuration, indexed by the feed type. -func (config *ScheduledFeedConfig) GetScheduledFeeds() (map[string]feeds.ScheduledFeed, error) { - var err error +// Constructs a map of ScheduledFeeds to enable based on the Feeds +// provided from configuration, indexed by the feed type. +func (sc *ScheduledFeedConfig) GetScheduledFeeds() (map[string]feeds.ScheduledFeed, error) { scheduledFeeds := map[string]feeds.ScheduledFeed{} - eventHandler, err := config.GetEventHandler() + eventHandler, err := sc.GetEventHandler() if err != nil { return nil, err } - for _, entry := range config.EnabledFeeds { - switch entry { - case crates.FeedName: - scheduledFeeds[entry] = crates.New(eventHandler) - case goproxy.FeedName: - scheduledFeeds[entry] = goproxy.Feed{} - case npm.FeedName: - scheduledFeeds[entry] = npm.New(eventHandler) - case nuget.FeedName: - scheduledFeeds[entry] = nuget.Feed{} - case pypi.FeedName: - scheduledFeeds[entry] = pypi.New(eventHandler) - case packagist.FeedName: - scheduledFeeds[entry] = packagist.Feed{} - case rubygems.FeedName: - scheduledFeeds[entry] = rubygems.New(eventHandler) - default: - err = fmt.Errorf("%w : %v", errUnknownFeed, entry) + + for _, entry := range sc.Feeds { + feed, err := entry.ToFeed(eventHandler) + if err != nil { + return nil, err } + scheduledFeeds[entry.Type] = feed } - if err != nil { - return nil, fmt.Errorf("failed to parse enabled_feeds entries: %w", err) - } return scheduledFeeds, nil } -func (config *ScheduledFeedConfig) GetEventHandler() (*events.Handler, error) { +func (sc *ScheduledFeedConfig) GetEventHandler() (*events.Handler, error) { var err error - if config.EventsConfig == nil { - config.eventHandler = events.NewNullHandler() - } else if config.eventHandler == nil { - config.eventHandler, err = config.EventsConfig.ToEventHandler() + if sc.EventsConfig == nil { + sc.eventHandler = events.NewNullHandler() + } else if sc.eventHandler == nil { + sc.eventHandler, err = sc.EventsConfig.ToEventHandler() if err != nil { return nil, err } } - return config.eventHandler, nil + return sc.eventHandler, nil } func (ec *EventsConfig) ToEventHandler() (*events.Handler, error) { @@ -145,7 +129,8 @@ func (ec *EventsConfig) ToEventHandler() (*events.Handler, error) { // Produces a Publisher object from the provided PublisherConfig // The PublisherConfig.Type value is evaluated and the appropriate Publisher is -// constructed from the Config field. +// constructed from the Config field. If the type is not a recognised Publisher type, +// an error is returned. func (pc PublisherConfig) ToPublisher(ctx context.Context) (publisher.Publisher, error) { var err error switch pc.Type { @@ -166,9 +151,31 @@ func (pc PublisherConfig) ToPublisher(ctx context.Context) (publisher.Publisher, case stdout.PublisherType: return stdout.New(), nil default: - err = fmt.Errorf("%w : %v", errUnknownPub, pc.Type) + return nil, fmt.Errorf("%w : %v", errUnknownPub, pc.Type) + } +} + +// Constructs the appropriate feed for the given type, providing the +// options to the feed. +func (fc FeedConfig) ToFeed(eventHandler *events.Handler) (feeds.ScheduledFeed, error) { + switch fc.Type { + case crates.FeedName: + return crates.New(fc.Options, eventHandler) + case goproxy.FeedName: + return goproxy.New(fc.Options) + case npm.FeedName: + return npm.New(fc.Options, eventHandler) + case nuget.FeedName: + return nuget.New(fc.Options) + case pypi.FeedName: + return pypi.New(fc.Options, eventHandler) + case packagist.FeedName: + return packagist.New(fc.Options) + case rubygems.FeedName: + return rubygems.New(fc.Options, eventHandler) + default: + return nil, fmt.Errorf("%w : %v", errUnknownFeed, fc.Type) } - return nil, err } // Decode an input using mapstruct decoder with strictness enabled, errors will be returned in @@ -186,14 +193,14 @@ func strictDecode(input, out interface{}) error { func Default() *ScheduledFeedConfig { config := &ScheduledFeedConfig{ - EnabledFeeds: []string{ - crates.FeedName, - goproxy.FeedName, - npm.FeedName, - nuget.FeedName, - packagist.FeedName, - pypi.FeedName, - rubygems.FeedName, + Feeds: []FeedConfig{ + {Type: crates.FeedName}, + {Type: goproxy.FeedName}, + {Type: npm.FeedName}, + {Type: nuget.FeedName}, + {Type: packagist.FeedName}, + {Type: pypi.FeedName}, + {Type: rubygems.FeedName}, }, PubConfig: PublisherConfig{ Type: stdout.PublisherType, diff --git a/config/structs.go b/config/structs.go index 7e224f8e..45f46063 100644 --- a/config/structs.go +++ b/config/structs.go @@ -1,19 +1,22 @@ package config -import "github.com/ossf/package-feeds/events" +import ( + "github.com/ossf/package-feeds/events" + "github.com/ossf/package-feeds/feeds" +) type ScheduledFeedConfig struct { - // Configures the publisher for pushing packages after polling + // Configures the publisher for pushing packages after polling. PubConfig PublisherConfig `yaml:"publisher"` - // Configures the feeds to be used for polling from package repositories - EnabledFeeds []string `yaml:"enabled_feeds"` + // Configures the feeds to be used for polling from package repositories. + Feeds []FeedConfig `yaml:"feeds"` HTTPPort int `yaml:"http_port,omitempty"` PollRate string `yaml:"poll_rate"` Timer bool `yaml:"timer"` - // Configures the EventHandler instance to be used throughout the package-feeds application + // Configures the EventHandler instance to be used throughout the package-feeds application. EventsConfig *EventsConfig `yaml:"events"` eventHandler *events.Handler @@ -24,6 +27,11 @@ type PublisherConfig struct { Config interface{} `mapstructure:"config"` } +type FeedConfig struct { + Type string `mapstructure:"type"` + Options feeds.FeedOptions `mapstructure:"options"` +} + type EventsConfig struct { Sink string `yaml:"sink"` EventFilter events.Filter `yaml:"filter"` diff --git a/feeds/crates/README.md b/feeds/crates/README.md new file mode 100644 index 00000000..975cc123 --- /dev/null +++ b/feeds/crates/README.md @@ -0,0 +1,13 @@ +# Crates Feed + +This feed allows polling of package updates from the crates package repository. + +## Configuration options + +The `packages` field is not supported by the crates feed. + + +``` +feeds: +- type: crates +``` \ No newline at end of file diff --git a/feeds/crates/crates.go b/feeds/crates/crates.go index 0530513a..34eb9b32 100644 --- a/feeds/crates/crates.go +++ b/feeds/crates/crates.go @@ -55,10 +55,16 @@ type Feed struct { lossyFeedAlerter *feeds.LossyFeedAlerter } -func New(eventHandler *events.Handler) *Feed { +func New(feedOptions feeds.FeedOptions, eventHandler *events.Handler) (*Feed, error) { + if feedOptions.Packages != nil { + return nil, feeds.UnsupportedOptionError{ + Feed: FeedName, + Option: "packages", + } + } return &Feed{ lossyFeedAlerter: feeds.NewLossyFeedAlerter(eventHandler), - } + }, nil } func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, error) { diff --git a/feeds/crates/crates_test.go b/feeds/crates/crates_test.go index 86c940fd..401ca377 100644 --- a/feeds/crates/crates_test.go +++ b/feeds/crates/crates_test.go @@ -6,6 +6,7 @@ import ( "time" "github.com/ossf/package-feeds/events" + "github.com/ossf/package-feeds/feeds" "github.com/ossf/package-feeds/testutils" ) @@ -18,7 +19,10 @@ func TestCratesLatest(t *testing.T) { srv := testutils.HTTPServerMock(handlers) baseURL = srv.URL + "/api/v1/summary" - feed := New(events.NewNullHandler()) + feed, err := New(feeds.FeedOptions{}, events.NewNullHandler()) + if err != nil { + t.Fatalf("failed to create crates feed: %v", err) + } cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC) pkgs, err := feed.Latest(cutoff) diff --git a/feeds/feed.go b/feeds/feed.go index bbe3ce2c..79923fb8 100644 --- a/feeds/feed.go +++ b/feeds/feed.go @@ -1,15 +1,28 @@ package feeds import ( + "fmt" "time" ) const schemaVer = "1.0" +type UnsupportedOptionError struct { + Option string + Feed string +} + type ScheduledFeed interface { Latest(cutoff time.Time) ([]*Package, error) } +// General configuration options for feeds. +type FeedOptions struct { + // A collection of package names to poll instead of standard firehose behaviour. + // Not supported by all feeds. + Packages *[]string `yaml:"packages"` +} + // Marshalled json output validated against package.schema.json. type Package struct { Name string `json:"name"` @@ -38,3 +51,7 @@ func ApplyCutoff(pkgs []*Package, cutoff time.Time) []*Package { } return filteredPackages } + +func (err UnsupportedOptionError) Error() string { + return fmt.Sprintf("unsupported option `%v` supplied to %v feed", err.Option, err.Feed) +} diff --git a/feeds/goproxy/README.md b/feeds/goproxy/README.md new file mode 100644 index 00000000..4d5437f0 --- /dev/null +++ b/feeds/goproxy/README.md @@ -0,0 +1,13 @@ +# goproxy Feed + +This feed allows polling of package updates from the golang.org/index package repository. + +## Configuration options + +The `packages` field is not supported by the goproxy feed. + + +``` +feeds: +- type: goproxy +``` \ No newline at end of file diff --git a/feeds/goproxy/goproxy.go b/feeds/goproxy/goproxy.go index e1bb61a7..e9a010a8 100644 --- a/feeds/goproxy/goproxy.go +++ b/feeds/goproxy/goproxy.go @@ -70,6 +70,16 @@ func fetchPackages(since time.Time) ([]Package, error) { type Feed struct{} +func New(feedOptions feeds.FeedOptions) (*Feed, error) { + if feedOptions.Packages != nil { + return nil, feeds.UnsupportedOptionError{ + Feed: FeedName, + Option: "packages", + } + } + return &Feed{}, nil +} + func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, error) { pkgs := []*feeds.Package{} packages, err := fetchPackages(cutoff) diff --git a/feeds/npm/README.md b/feeds/npm/README.md new file mode 100644 index 00000000..813f3f8b --- /dev/null +++ b/feeds/npm/README.md @@ -0,0 +1,13 @@ +# npm Feed + +This feed allows polling of package updates from the repository.npmjs.org package repository. + +## Configuration options + +The `packages` field is not supported by the npm feed. + + +``` +feeds: +- type: npm +``` \ No newline at end of file diff --git a/feeds/npm/npm.go b/feeds/npm/npm.go index 1dff9a31..2a24fa44 100644 --- a/feeds/npm/npm.go +++ b/feeds/npm/npm.go @@ -94,10 +94,16 @@ type Feed struct { lossyFeedAlerter *feeds.LossyFeedAlerter } -func New(eventHandler *events.Handler) *Feed { +func New(feedOptions feeds.FeedOptions, eventHandler *events.Handler) (*Feed, error) { + if feedOptions.Packages != nil { + return nil, feeds.UnsupportedOptionError{ + Feed: FeedName, + Option: "packages", + } + } return &Feed{ lossyFeedAlerter: feeds.NewLossyFeedAlerter(eventHandler), - } + }, nil } func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, error) { diff --git a/feeds/npm/npm_test.go b/feeds/npm/npm_test.go index 25add990..21dbfd7a 100644 --- a/feeds/npm/npm_test.go +++ b/feeds/npm/npm_test.go @@ -7,6 +7,7 @@ import ( "time" "github.com/ossf/package-feeds/events" + "github.com/ossf/package-feeds/feeds" "github.com/ossf/package-feeds/testutils" ) @@ -22,7 +23,10 @@ func TestNpmLatest(t *testing.T) { baseURL = srv.URL + "/-/rss/" versionURL = srv.URL + "/" - feed := New(events.NewNullHandler()) + feed, err := New(feeds.FeedOptions{}, events.NewNullHandler()) + if err != nil { + t.Fatalf("failed to create new npm feed: %v", err) + } cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC) pkgs, err := feed.Latest(cutoff) diff --git a/feeds/nuget/README.md b/feeds/nuget/README.md new file mode 100644 index 00000000..cb40943a --- /dev/null +++ b/feeds/nuget/README.md @@ -0,0 +1,13 @@ +# nuget Feed + +This feed allows polling of package updates from the nuget package repository. + +## Configuration options + +The `packages` field is not supported by the nuget feed. + + +``` +feeds: +- type: nuget +``` \ No newline at end of file diff --git a/feeds/nuget/nuget.go b/feeds/nuget/nuget.go index 80eb83b8..73d73130 100644 --- a/feeds/nuget/nuget.go +++ b/feeds/nuget/nuget.go @@ -131,6 +131,16 @@ func fetchPackageInfo(url string) (*nugetPackageDetails, error) { type Feed struct{} +func New(feedOptions feeds.FeedOptions) (*Feed, error) { + if feedOptions.Packages != nil { + return nil, feeds.UnsupportedOptionError{ + Feed: FeedName, + Option: "packages", + } + } + return &Feed{}, nil +} + // Latest will parse all creation events for packages in the nuget.org catalog feed // for packages that have been published since the cutoff // https://docs.microsoft.com/en-us/nuget/api/catalog-resource diff --git a/feeds/packagist/README.md b/feeds/packagist/README.md new file mode 100644 index 00000000..4ffcff43 --- /dev/null +++ b/feeds/packagist/README.md @@ -0,0 +1,13 @@ +# packagist Feed + +This feed allows polling of package updates from the packagist package repository. + +## Configuration options + +The `packages` field is not supported by the packagist feed. + + +``` +feeds: +- type: packagist +``` \ No newline at end of file diff --git a/feeds/packagist/packagist.go b/feeds/packagist/packagist.go index afe25139..a722ed3a 100644 --- a/feeds/packagist/packagist.go +++ b/feeds/packagist/packagist.go @@ -30,6 +30,16 @@ type actions struct { type Feed struct{} +func New(feedOptions feeds.FeedOptions) (*Feed, error) { + if feedOptions.Packages != nil { + return nil, feeds.UnsupportedOptionError{ + Feed: FeedName, + Option: "packages", + } + } + return &Feed{}, nil +} + func fetchPackages(since time.Time) ([]actions, error) { client := &http.Client{ Timeout: 10 * time.Second, diff --git a/feeds/pypi/README.md b/feeds/pypi/README.md new file mode 100644 index 00000000..6ffba5a6 --- /dev/null +++ b/feeds/pypi/README.md @@ -0,0 +1,18 @@ +# Pypi Feed + +This feed allows polling of package updates from the pypi package repository. + +## Configuration options + +The `packages` Field can be supplied to the pypi feed options to enable polling of package specific apis. This is less effective +with large lists of packages as it polls the RSS feed for each package individually but it is much less likely to miss package updates between polling. + + +``` +feeds: +- type: pypi + options: + packages: + - numpy + - scipy +``` \ No newline at end of file diff --git a/feeds/pypi/pypi.go b/feeds/pypi/pypi.go index 0b17cf6b..b05c4de2 100644 --- a/feeds/pypi/pypi.go +++ b/feeds/pypi/pypi.go @@ -2,6 +2,8 @@ package pypi import ( "encoding/xml" + "errors" + "fmt" "net/http" "strings" "time" @@ -15,10 +17,12 @@ const ( ) var ( - baseURL = "https://pypi.org/rss/updates.xml" - httpClient = &http.Client{ + baseURL = "https://pypi.org/rss/updates.xml" + packageURLFormat = "https://pypi.org/rss/project/%s/releases.xml" + httpClient = &http.Client{ Timeout: 10 * time.Second, } + errInvalidLinkForPackage = errors.New("invalid link provided by pypi API") ) type Response struct { @@ -31,14 +35,22 @@ type Package struct { Link string `xml:"link"` } -func (p *Package) Name() string { - // The XML Feed has a "Title" element that contains the package and version in it. - return strings.Split(p.Title, " ")[0] +func (p *Package) Name() (string, error) { + // The XML Link splits to: []string{"https:", "", "pypi.org", "project", "foopy", "2.1", ""} + parts := strings.Split(p.Link, "/") + if len(parts) < 5 { + return "", errInvalidLinkForPackage + } + return parts[len(parts)-3], nil } -func (p *Package) Version() string { - // The XML Feed has a "Title" element that contains the package and version in it. - return strings.Split(p.Title, " ")[1] +func (p *Package) Version() (string, error) { + // The XML Link splits to: []string{"https:", "", "pypi.org", "project", "foopy", "2.1", ""} + parts := strings.Split(p.Link, "/") + if len(parts) < 5 { + return "", errInvalidLinkForPackage + } + return parts[len(parts)-2], nil } type rfc1123Time struct { @@ -73,28 +85,93 @@ func fetchPackages() ([]*Package, error) { return rssResponse.Packages, nil } +func fetchCriticalPackages(packageList []string) ([]*Package, error) { + responseChannel := make(chan *Response) + errChannel := make(chan error) + + for _, pkgName := range packageList { + go func(pkgName string) { + resp, err := httpClient.Get(fmt.Sprintf(packageURLFormat, pkgName)) + if err != nil { + errChannel <- err + return + } + defer resp.Body.Close() + rssResponse := &Response{} + err = xml.NewDecoder(resp.Body).Decode(rssResponse) + if err != nil { + errChannel <- err + return + } + + responseChannel <- rssResponse + }(pkgName) + } + + pkgs := []*Package{} + var lastErr error + for i := 0; i < len(packageList); i++ { + select { + case response := <-responseChannel: + pkgs = append(pkgs, response.Packages...) + case err := <-errChannel: + lastErr = err + } + } + return pkgs, lastErr +} + type Feed struct { + packages *[]string + lossyFeedAlerter *feeds.LossyFeedAlerter } -func New(eventHandler *events.Handler) *Feed { +func New(feedOptions feeds.FeedOptions, eventHandler *events.Handler) (*Feed, error) { return &Feed{ + packages: feedOptions.Packages, lossyFeedAlerter: feeds.NewLossyFeedAlerter(eventHandler), - } + }, nil } func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, error) { pkgs := []*feeds.Package{} - pypiPackages, err := fetchPackages() + var pypiPackages []*Package + var err error + + if feed.packages == nil { + // Firehose fetch all packages. + pypiPackages, err = fetchPackages() + } else { + // Fetch specific packages individually from configured packages list. + pypiPackages, err = fetchCriticalPackages(*feed.packages) + } + if err != nil { - return pkgs, err + return nil, err } for _, pkg := range pypiPackages { - pkg := feeds.NewPackage(pkg.CreatedDate.Time, pkg.Name(), pkg.Version(), FeedName) + pkgName, err := pkg.Name() + if err != nil { + return nil, err + } + pkgVersion, err := pkg.Version() + if err != nil { + return nil, err + } + pkg := feeds.NewPackage(pkg.CreatedDate.Time, pkgName, pkgVersion, FeedName) pkgs = append(pkgs, pkg) } - feed.lossyFeedAlerter.ProcessPackages(FeedName, pkgs) + + // Lossy feed detection is only necessary for firehose fetching + if feed.packages == nil { + feed.lossyFeedAlerter.ProcessPackages(FeedName, pkgs) + } pkgs = feeds.ApplyCutoff(pkgs, cutoff) return pkgs, nil } + +func (feed Feed) GetPackageList() *[]string { + return feed.packages +} diff --git a/feeds/pypi/pypi_test.go b/feeds/pypi/pypi_test.go index 8c2863e1..580f9c3e 100644 --- a/feeds/pypi/pypi_test.go +++ b/feeds/pypi/pypi_test.go @@ -6,6 +6,7 @@ import ( "time" "github.com/ossf/package-feeds/events" + "github.com/ossf/package-feeds/feeds" "github.com/ossf/package-feeds/testutils" ) @@ -18,7 +19,10 @@ func TestPypiLatest(t *testing.T) { srv := testutils.HTTPServerMock(handlers) baseURL = srv.URL + "/rss/updates.xml" - feed := New(events.NewNullHandler()) + feed, err := New(feeds.FeedOptions{}, events.NewNullHandler()) + if err != nil { + t.Fatalf("failed to create new pypi feed: %v", err) + } cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC) pkgs, err := feed.Latest(cutoff) @@ -46,6 +50,60 @@ func TestPypiLatest(t *testing.T) { } } +func TestPypiCriticalLatest(t *testing.T) { + t.Parallel() + + handlers := map[string]testutils.HTTPHandlerFunc{ + "/rss/project/foopy/releases.xml": foopyReleasesResponse, + "/rss/project/barpy/releases.xml": barpyReleasesResponse, + } + packages := []string{ + "foopy", + "barpy", + } + srv := testutils.HTTPServerMock(handlers) + + packageURLFormat = srv.URL + "/rss/project/%s/releases.xml" + feed, err := New(feeds.FeedOptions{ + Packages: &packages, + }, events.NewNullHandler()) + if err != nil { + t.Fatalf("Unexpected err: %v", err) + } + + cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC) + pkgs, err := feed.Latest(cutoff) + if err != nil { + t.Fatalf("failed to call Latest() with err: %v", err) + } + + const expectedNumPackages = 4 + if len(pkgs) != expectedNumPackages { + t.Fatalf("Latest() produced %v packages instead of the expected %v", len(pkgs), expectedNumPackages) + } + pkgMap := map[string]map[string]*feeds.Package{} + pkgMap["foopy"] = map[string]*feeds.Package{} + pkgMap["barpy"] = map[string]*feeds.Package{} + + for _, pkg := range pkgs { + pkgMap[pkg.Name][pkg.Version] = pkg + } + + if _, ok := pkgMap["foopy"]["2.1"]; !ok { + t.Fatalf("missing foopy 2.1") + } + if _, ok := pkgMap["foopy"]["2.0"]; !ok { + t.Fatalf("missing foopy 2.0") + } + if _, ok := pkgMap["barpy"]["1.1"]; !ok { + t.Fatalf("missing barpy 1.1") + } + if _, ok := pkgMap["barpy"]["1.0"]; !ok { + t.Fatalf("missing barpy 1.0") + } +} + +// Mock data for pypi firehose with all packages. func updatesXMLHandle(w http.ResponseWriter, r *http.Request) { _, err := w.Write([]byte(` @@ -76,3 +134,61 @@ func updatesXMLHandle(w http.ResponseWriter, r *http.Request) { http.Error(w, testutils.UnexpectedWriteError(err), http.StatusInternalServerError) } } + +// Mock data response for package specific api when pypi is configured with +// a package list in FeedOptions. +func foopyReleasesResponse(w http.ResponseWriter, r *http.Request) { + _, err := w.Write([]byte(` + + + + PyPI recent updates for foopy + https://pypi.org/project/foopy/ + Recent updates to the Python Package Index for foopy + en + + 2.1 + https://pypi.org/project/foopy/2.1/ + Sat, 27 Mar 2021 22:16:26 GMT + + + 2.0 + https://pypi.org/project/foopy/2.0/ + Sun, 23 Sep 2018 16:50:37 GMT + + + +`)) + if err != nil { + http.Error(w, testutils.UnexpectedWriteError(err), http.StatusInternalServerError) + } +} + +// Mock data response for package specific api when pypi is configured with +// a package list in FeedOptions. +func barpyReleasesResponse(w http.ResponseWriter, r *http.Request) { + _, err := w.Write([]byte(` + + + + PyPI recent updates for barpy + https://pypi.org/project/barpy/ + Recent updates to the Python Package Index for barpy + en + + 1.1 + https://pypi.org/project/barpy/1.1/ + Sat, 27 Mar 2021 22:16:26 GMT + + + 1.0 + https://pypi.org/project/barpy/1.0/ + Sun, 23 Sep 2018 16:50:37 GMT + + + +`)) + if err != nil { + http.Error(w, testutils.UnexpectedWriteError(err), http.StatusInternalServerError) + } +} diff --git a/feeds/rubygems/README.md b/feeds/rubygems/README.md new file mode 100644 index 00000000..13bf0408 --- /dev/null +++ b/feeds/rubygems/README.md @@ -0,0 +1,13 @@ +# rubygems Feed + +This feed allows polling of package updates from the rubygems package repository. + +## Configuration options + +The `packages` field is not supported by the rubygems feed. + + +``` +feeds: +- type: rubygems +``` \ No newline at end of file diff --git a/feeds/rubygems/rubygems.go b/feeds/rubygems/rubygems.go index 0324e460..591feb4d 100644 --- a/feeds/rubygems/rubygems.go +++ b/feeds/rubygems/rubygems.go @@ -42,10 +42,16 @@ type Feed struct { lossyFeedAlerter *feeds.LossyFeedAlerter } -func New(eventHandler *events.Handler) *Feed { +func New(feedOptions feeds.FeedOptions, eventHandler *events.Handler) (*Feed, error) { + if feedOptions.Packages != nil { + return nil, feeds.UnsupportedOptionError{ + Feed: FeedName, + Option: "packages", + } + } return &Feed{ lossyFeedAlerter: feeds.NewLossyFeedAlerter(eventHandler), - } + }, nil } func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, error) { diff --git a/feeds/rubygems/rubygems_test.go b/feeds/rubygems/rubygems_test.go index c7a180aa..8f4ad0f4 100644 --- a/feeds/rubygems/rubygems_test.go +++ b/feeds/rubygems/rubygems_test.go @@ -20,7 +20,10 @@ func TestRubyLatest(t *testing.T) { srv := testutils.HTTPServerMock(handlers) baseURL = srv.URL + "/api/v1/activity" - feed := New(events.NewNullHandler()) + feed, err := New(feeds.FeedOptions{}, events.NewNullHandler()) + if err != nil { + t.Fatalf("failed to create new ruby feed: %v", err) + } cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC) pkgs, err := feed.Latest(cutoff)