diff --git a/README.md b/README.md
index e0e91693..142e2afa 100644
--- a/README.md
+++ b/README.md
@@ -27,12 +27,12 @@ behavior data for anyone interested.
A YAML configuration file can be provided with the following format:
```
-enabled_feeds:
-- pypi
-- npm
-- goproxy
-- rubygems
-- crates
+feeds:
+- type: pypi
+- type: npm
+- type: goproxy
+- type: rubygems
+- type: crates
publisher:
type: 'gcp_pubsub'
@@ -47,7 +47,21 @@ timer: false
```
`poll_rate` string formatted for [duration parser](https://golang.org/pkg/time/#ParseDuration).This is used as an initial value to generate a cutoff point for feed events relative to the given time at execution, with subsequent events using the previous time at execution as the cutoff point.
-`timer` will configure interal polling of the `enabled_feeds` at the given `poll_rate` period. To specify this configuration file, define its path in your environment under the `PACKAGE_FEEDS_CONFIG_PATH` variable.
+`timer` will configure interal polling of the `feeds` at the given `poll_rate` period. To specify this configuration file, define its path in your environment under the `PACKAGE_FEEDS_CONFIG_PATH` variable.
+
+## FeedOptions
+
+Feeds can be configured with additional options, not all feeds will support these features. See the appropriate feed `README.md` for supported options.
+Below is an example of such options with pypi being configured to poll a specific set of packages
+
+```
+feeds:
+- type: pypi
+ options:
+ packages:
+ - fooPackage
+ - barPackage
+```
## Legacy Configuration
diff --git a/cmd/scheduled-feed/main.go b/cmd/scheduled-feed/main.go
index 87782f50..ad2cfe24 100644
--- a/cmd/scheduled-feed/main.go
+++ b/cmd/scheduled-feed/main.go
@@ -112,8 +112,12 @@ func main() {
}
log.Infof("using %q publisher", pub.Name())
- scheduledFeeds, err := appConfig.GetScheduledFeeds()
- log.Infof("watching feeds: %v", strings.Join(appConfig.EnabledFeeds, ", "))
+ feeds, err := appConfig.GetScheduledFeeds()
+ feedNames := []string{}
+ for k := range feeds {
+ feedNames = append(feedNames, k)
+ }
+ log.Infof("watching feeds: %v", strings.Join(feedNames, ", "))
if err != nil {
log.Fatal(err)
}
diff --git a/config/config_test.go b/config/config_test.go
index be2d881f..93a3f7ba 100644
--- a/config/config_test.go
+++ b/config/config_test.go
@@ -6,16 +6,18 @@ import (
"github.com/ossf/package-feeds/config"
"github.com/ossf/package-feeds/events"
+ "github.com/ossf/package-feeds/feeds"
+ "github.com/ossf/package-feeds/feeds/pypi"
"github.com/ossf/package-feeds/feeds/scheduler"
"github.com/ossf/package-feeds/publisher/stdout"
)
const (
TestConfigStr = `
-enabled_feeds:
-- rubygems
-- goproxy
-- npm
+feeds:
+- type: rubygems
+- type: goproxy
+- type: npm
publisher:
type: "gcp"
@@ -27,8 +29,8 @@ poll_rate: 5m
timer: true
`
TestConfigStrUnknownFeedType = `
-enabled_feeds:
-- foo
+feeds:
+- type: foo
`
TestConfigStrUnknownField = `
foo:
@@ -52,11 +54,11 @@ func TestDefault(t *testing.T) {
t.Parallel()
c := config.Default()
- feeds, err := c.GetScheduledFeeds()
+ scheduledFeeds, err := c.GetScheduledFeeds()
if err != nil {
t.Fatalf("failed to initialize feeds: %v", err)
}
- _ = scheduler.New(feeds)
+ _ = scheduler.New(scheduledFeeds)
}
func TestGetScheduledFeeds(t *testing.T) {
@@ -66,16 +68,16 @@ func TestGetScheduledFeeds(t *testing.T) {
if err != nil {
t.Fatal(err)
}
- if len(c.EnabledFeeds) != 3 {
- t.Fatalf("EnabledFeeds is expected to be 3 but was `%v`", len(c.EnabledFeeds))
+ if len(c.Feeds) != 3 {
+ t.Fatalf("Feeds is expected to be 3 but was `%v`", len(c.Feeds))
}
- feeds, err := c.GetScheduledFeeds()
+ scheduledFeeds, err := c.GetScheduledFeeds()
if err != nil {
t.Fatal(err)
}
- for _, val := range c.EnabledFeeds {
- if _, ok := feeds[val]; !ok {
- t.Errorf("expected `%v` feed was not found in scheduled feeds after GetScheduledFeeds()", val)
+ for _, feed := range c.Feeds {
+ if _, ok := scheduledFeeds[feed.Type]; !ok {
+ t.Errorf("expected `%v` feed was not found in scheduled feeds after GetScheduledFeeds()", feed.Type)
}
}
}
@@ -93,7 +95,7 @@ func TestLoadFeedConfigUnknownFeedType(t *testing.T) {
}
}
-func TestPubConfigToPublisherStdout(t *testing.T) {
+func TestPublisherConfigToPublisherStdout(t *testing.T) {
t.Parallel()
c := config.PublisherConfig{
@@ -109,6 +111,46 @@ func TestPubConfigToPublisherStdout(t *testing.T) {
}
}
+func TestPublisherConfigToFeed(t *testing.T) {
+ t.Parallel()
+
+ packages := []string{
+ "foo",
+ "bar",
+ "baz",
+ }
+
+ c := config.FeedConfig{
+ Type: pypi.FeedName,
+ Options: feeds.FeedOptions{
+ Packages: &packages,
+ },
+ }
+ feed, err := c.ToFeed(events.NewNullHandler())
+ if err != nil {
+ t.Fatalf("failed to create pypi feed from configuration: %v", err)
+ }
+
+ pypiFeed, ok := feed.(*pypi.Feed)
+ if !ok {
+ t.Fatal("failed to cast feed as pypi feed")
+ }
+
+ feedPackages := pypiFeed.GetPackageList()
+ if feedPackages == nil {
+ t.Fatalf("failed to initialize pypi feed package list to poll")
+ }
+ if feedPackages != nil && len(*feedPackages) != len(packages) {
+ t.Errorf("pypi package list does not match config provided package list")
+ } else {
+ for i := 0; i < len(packages); i++ {
+ if (*feedPackages)[i] != packages[i] {
+ t.Errorf("pypi package '%v' does not match configured package '%v'", (*feedPackages)[i], packages[i])
+ }
+ }
+ }
+}
+
func TestStrictConfigDecoding(t *testing.T) {
t.Parallel()
diff --git a/config/scheduledfeed.go b/config/scheduledfeed.go
index e1b1f3ae..7d45713d 100644
--- a/config/scheduledfeed.go
+++ b/config/scheduledfeed.go
@@ -59,11 +59,11 @@ func NewConfigFromBytes(yamlBytes []byte) (*ScheduledFeedConfig, error) {
}
// Applies environment variables to the configuration.
-func (config *ScheduledFeedConfig) applyEnvVars() {
+func (sc *ScheduledFeedConfig) applyEnvVars() {
// Support legacy env var definition for gcp pub sub.
pubURL := os.Getenv("OSSMALWARE_TOPIC_URL")
if pubURL != "" {
- config.PubConfig = PublisherConfig{
+ sc.PubConfig = PublisherConfig{
Type: gcppubsub.PublisherType,
Config: map[string]interface{}{
"url": pubURL,
@@ -75,7 +75,7 @@ func (config *ScheduledFeedConfig) applyEnvVars() {
port, err := strconv.Atoi(portStr)
if portProvided && err == nil {
- config.HTTPPort = port
+ sc.HTTPPort = port
}
}
@@ -83,53 +83,37 @@ func AddTo(ls *[]int, value int) {
*ls = append(*ls, value)
}
-// Constructs a map of ScheduledFeeds to enable based on the EnabledFeeds provided
-// from configuration, indexed by the feed type.
-func (config *ScheduledFeedConfig) GetScheduledFeeds() (map[string]feeds.ScheduledFeed, error) {
- var err error
+// Constructs a map of ScheduledFeeds to enable based on the Feeds
+// provided from configuration, indexed by the feed type.
+func (sc *ScheduledFeedConfig) GetScheduledFeeds() (map[string]feeds.ScheduledFeed, error) {
scheduledFeeds := map[string]feeds.ScheduledFeed{}
- eventHandler, err := config.GetEventHandler()
+ eventHandler, err := sc.GetEventHandler()
if err != nil {
return nil, err
}
- for _, entry := range config.EnabledFeeds {
- switch entry {
- case crates.FeedName:
- scheduledFeeds[entry] = crates.New(eventHandler)
- case goproxy.FeedName:
- scheduledFeeds[entry] = goproxy.Feed{}
- case npm.FeedName:
- scheduledFeeds[entry] = npm.New(eventHandler)
- case nuget.FeedName:
- scheduledFeeds[entry] = nuget.Feed{}
- case pypi.FeedName:
- scheduledFeeds[entry] = pypi.New(eventHandler)
- case packagist.FeedName:
- scheduledFeeds[entry] = packagist.Feed{}
- case rubygems.FeedName:
- scheduledFeeds[entry] = rubygems.New(eventHandler)
- default:
- err = fmt.Errorf("%w : %v", errUnknownFeed, entry)
+
+ for _, entry := range sc.Feeds {
+ feed, err := entry.ToFeed(eventHandler)
+ if err != nil {
+ return nil, err
}
+ scheduledFeeds[entry.Type] = feed
}
- if err != nil {
- return nil, fmt.Errorf("failed to parse enabled_feeds entries: %w", err)
- }
return scheduledFeeds, nil
}
-func (config *ScheduledFeedConfig) GetEventHandler() (*events.Handler, error) {
+func (sc *ScheduledFeedConfig) GetEventHandler() (*events.Handler, error) {
var err error
- if config.EventsConfig == nil {
- config.eventHandler = events.NewNullHandler()
- } else if config.eventHandler == nil {
- config.eventHandler, err = config.EventsConfig.ToEventHandler()
+ if sc.EventsConfig == nil {
+ sc.eventHandler = events.NewNullHandler()
+ } else if sc.eventHandler == nil {
+ sc.eventHandler, err = sc.EventsConfig.ToEventHandler()
if err != nil {
return nil, err
}
}
- return config.eventHandler, nil
+ return sc.eventHandler, nil
}
func (ec *EventsConfig) ToEventHandler() (*events.Handler, error) {
@@ -145,7 +129,8 @@ func (ec *EventsConfig) ToEventHandler() (*events.Handler, error) {
// Produces a Publisher object from the provided PublisherConfig
// The PublisherConfig.Type value is evaluated and the appropriate Publisher is
-// constructed from the Config field.
+// constructed from the Config field. If the type is not a recognised Publisher type,
+// an error is returned.
func (pc PublisherConfig) ToPublisher(ctx context.Context) (publisher.Publisher, error) {
var err error
switch pc.Type {
@@ -166,9 +151,31 @@ func (pc PublisherConfig) ToPublisher(ctx context.Context) (publisher.Publisher,
case stdout.PublisherType:
return stdout.New(), nil
default:
- err = fmt.Errorf("%w : %v", errUnknownPub, pc.Type)
+ return nil, fmt.Errorf("%w : %v", errUnknownPub, pc.Type)
+ }
+}
+
+// Constructs the appropriate feed for the given type, providing the
+// options to the feed.
+func (fc FeedConfig) ToFeed(eventHandler *events.Handler) (feeds.ScheduledFeed, error) {
+ switch fc.Type {
+ case crates.FeedName:
+ return crates.New(fc.Options, eventHandler)
+ case goproxy.FeedName:
+ return goproxy.New(fc.Options)
+ case npm.FeedName:
+ return npm.New(fc.Options, eventHandler)
+ case nuget.FeedName:
+ return nuget.New(fc.Options)
+ case pypi.FeedName:
+ return pypi.New(fc.Options, eventHandler)
+ case packagist.FeedName:
+ return packagist.New(fc.Options)
+ case rubygems.FeedName:
+ return rubygems.New(fc.Options, eventHandler)
+ default:
+ return nil, fmt.Errorf("%w : %v", errUnknownFeed, fc.Type)
}
- return nil, err
}
// Decode an input using mapstruct decoder with strictness enabled, errors will be returned in
@@ -186,14 +193,14 @@ func strictDecode(input, out interface{}) error {
func Default() *ScheduledFeedConfig {
config := &ScheduledFeedConfig{
- EnabledFeeds: []string{
- crates.FeedName,
- goproxy.FeedName,
- npm.FeedName,
- nuget.FeedName,
- packagist.FeedName,
- pypi.FeedName,
- rubygems.FeedName,
+ Feeds: []FeedConfig{
+ {Type: crates.FeedName},
+ {Type: goproxy.FeedName},
+ {Type: npm.FeedName},
+ {Type: nuget.FeedName},
+ {Type: packagist.FeedName},
+ {Type: pypi.FeedName},
+ {Type: rubygems.FeedName},
},
PubConfig: PublisherConfig{
Type: stdout.PublisherType,
diff --git a/config/structs.go b/config/structs.go
index 7e224f8e..45f46063 100644
--- a/config/structs.go
+++ b/config/structs.go
@@ -1,19 +1,22 @@
package config
-import "github.com/ossf/package-feeds/events"
+import (
+ "github.com/ossf/package-feeds/events"
+ "github.com/ossf/package-feeds/feeds"
+)
type ScheduledFeedConfig struct {
- // Configures the publisher for pushing packages after polling
+ // Configures the publisher for pushing packages after polling.
PubConfig PublisherConfig `yaml:"publisher"`
- // Configures the feeds to be used for polling from package repositories
- EnabledFeeds []string `yaml:"enabled_feeds"`
+ // Configures the feeds to be used for polling from package repositories.
+ Feeds []FeedConfig `yaml:"feeds"`
HTTPPort int `yaml:"http_port,omitempty"`
PollRate string `yaml:"poll_rate"`
Timer bool `yaml:"timer"`
- // Configures the EventHandler instance to be used throughout the package-feeds application
+ // Configures the EventHandler instance to be used throughout the package-feeds application.
EventsConfig *EventsConfig `yaml:"events"`
eventHandler *events.Handler
@@ -24,6 +27,11 @@ type PublisherConfig struct {
Config interface{} `mapstructure:"config"`
}
+type FeedConfig struct {
+ Type string `mapstructure:"type"`
+ Options feeds.FeedOptions `mapstructure:"options"`
+}
+
type EventsConfig struct {
Sink string `yaml:"sink"`
EventFilter events.Filter `yaml:"filter"`
diff --git a/feeds/crates/README.md b/feeds/crates/README.md
new file mode 100644
index 00000000..975cc123
--- /dev/null
+++ b/feeds/crates/README.md
@@ -0,0 +1,13 @@
+# Crates Feed
+
+This feed allows polling of package updates from the crates package repository.
+
+## Configuration options
+
+The `packages` field is not supported by the crates feed.
+
+
+```
+feeds:
+- type: crates
+```
\ No newline at end of file
diff --git a/feeds/crates/crates.go b/feeds/crates/crates.go
index 0530513a..34eb9b32 100644
--- a/feeds/crates/crates.go
+++ b/feeds/crates/crates.go
@@ -55,10 +55,16 @@ type Feed struct {
lossyFeedAlerter *feeds.LossyFeedAlerter
}
-func New(eventHandler *events.Handler) *Feed {
+func New(feedOptions feeds.FeedOptions, eventHandler *events.Handler) (*Feed, error) {
+ if feedOptions.Packages != nil {
+ return nil, feeds.UnsupportedOptionError{
+ Feed: FeedName,
+ Option: "packages",
+ }
+ }
return &Feed{
lossyFeedAlerter: feeds.NewLossyFeedAlerter(eventHandler),
- }
+ }, nil
}
func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, error) {
diff --git a/feeds/crates/crates_test.go b/feeds/crates/crates_test.go
index 86c940fd..401ca377 100644
--- a/feeds/crates/crates_test.go
+++ b/feeds/crates/crates_test.go
@@ -6,6 +6,7 @@ import (
"time"
"github.com/ossf/package-feeds/events"
+ "github.com/ossf/package-feeds/feeds"
"github.com/ossf/package-feeds/testutils"
)
@@ -18,7 +19,10 @@ func TestCratesLatest(t *testing.T) {
srv := testutils.HTTPServerMock(handlers)
baseURL = srv.URL + "/api/v1/summary"
- feed := New(events.NewNullHandler())
+ feed, err := New(feeds.FeedOptions{}, events.NewNullHandler())
+ if err != nil {
+ t.Fatalf("failed to create crates feed: %v", err)
+ }
cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC)
pkgs, err := feed.Latest(cutoff)
diff --git a/feeds/feed.go b/feeds/feed.go
index bbe3ce2c..79923fb8 100644
--- a/feeds/feed.go
+++ b/feeds/feed.go
@@ -1,15 +1,28 @@
package feeds
import (
+ "fmt"
"time"
)
const schemaVer = "1.0"
+type UnsupportedOptionError struct {
+ Option string
+ Feed string
+}
+
type ScheduledFeed interface {
Latest(cutoff time.Time) ([]*Package, error)
}
+// General configuration options for feeds.
+type FeedOptions struct {
+ // A collection of package names to poll instead of standard firehose behaviour.
+ // Not supported by all feeds.
+ Packages *[]string `yaml:"packages"`
+}
+
// Marshalled json output validated against package.schema.json.
type Package struct {
Name string `json:"name"`
@@ -38,3 +51,7 @@ func ApplyCutoff(pkgs []*Package, cutoff time.Time) []*Package {
}
return filteredPackages
}
+
+func (err UnsupportedOptionError) Error() string {
+ return fmt.Sprintf("unsupported option `%v` supplied to %v feed", err.Option, err.Feed)
+}
diff --git a/feeds/goproxy/README.md b/feeds/goproxy/README.md
new file mode 100644
index 00000000..4d5437f0
--- /dev/null
+++ b/feeds/goproxy/README.md
@@ -0,0 +1,13 @@
+# goproxy Feed
+
+This feed allows polling of package updates from the golang.org/index package repository.
+
+## Configuration options
+
+The `packages` field is not supported by the goproxy feed.
+
+
+```
+feeds:
+- type: goproxy
+```
\ No newline at end of file
diff --git a/feeds/goproxy/goproxy.go b/feeds/goproxy/goproxy.go
index e1bb61a7..e9a010a8 100644
--- a/feeds/goproxy/goproxy.go
+++ b/feeds/goproxy/goproxy.go
@@ -70,6 +70,16 @@ func fetchPackages(since time.Time) ([]Package, error) {
type Feed struct{}
+func New(feedOptions feeds.FeedOptions) (*Feed, error) {
+ if feedOptions.Packages != nil {
+ return nil, feeds.UnsupportedOptionError{
+ Feed: FeedName,
+ Option: "packages",
+ }
+ }
+ return &Feed{}, nil
+}
+
func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, error) {
pkgs := []*feeds.Package{}
packages, err := fetchPackages(cutoff)
diff --git a/feeds/npm/README.md b/feeds/npm/README.md
new file mode 100644
index 00000000..813f3f8b
--- /dev/null
+++ b/feeds/npm/README.md
@@ -0,0 +1,13 @@
+# npm Feed
+
+This feed allows polling of package updates from the repository.npmjs.org package repository.
+
+## Configuration options
+
+The `packages` field is not supported by the npm feed.
+
+
+```
+feeds:
+- type: npm
+```
\ No newline at end of file
diff --git a/feeds/npm/npm.go b/feeds/npm/npm.go
index 1dff9a31..2a24fa44 100644
--- a/feeds/npm/npm.go
+++ b/feeds/npm/npm.go
@@ -94,10 +94,16 @@ type Feed struct {
lossyFeedAlerter *feeds.LossyFeedAlerter
}
-func New(eventHandler *events.Handler) *Feed {
+func New(feedOptions feeds.FeedOptions, eventHandler *events.Handler) (*Feed, error) {
+ if feedOptions.Packages != nil {
+ return nil, feeds.UnsupportedOptionError{
+ Feed: FeedName,
+ Option: "packages",
+ }
+ }
return &Feed{
lossyFeedAlerter: feeds.NewLossyFeedAlerter(eventHandler),
- }
+ }, nil
}
func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, error) {
diff --git a/feeds/npm/npm_test.go b/feeds/npm/npm_test.go
index 25add990..21dbfd7a 100644
--- a/feeds/npm/npm_test.go
+++ b/feeds/npm/npm_test.go
@@ -7,6 +7,7 @@ import (
"time"
"github.com/ossf/package-feeds/events"
+ "github.com/ossf/package-feeds/feeds"
"github.com/ossf/package-feeds/testutils"
)
@@ -22,7 +23,10 @@ func TestNpmLatest(t *testing.T) {
baseURL = srv.URL + "/-/rss/"
versionURL = srv.URL + "/"
- feed := New(events.NewNullHandler())
+ feed, err := New(feeds.FeedOptions{}, events.NewNullHandler())
+ if err != nil {
+ t.Fatalf("failed to create new npm feed: %v", err)
+ }
cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC)
pkgs, err := feed.Latest(cutoff)
diff --git a/feeds/nuget/README.md b/feeds/nuget/README.md
new file mode 100644
index 00000000..cb40943a
--- /dev/null
+++ b/feeds/nuget/README.md
@@ -0,0 +1,13 @@
+# nuget Feed
+
+This feed allows polling of package updates from the nuget package repository.
+
+## Configuration options
+
+The `packages` field is not supported by the nuget feed.
+
+
+```
+feeds:
+- type: nuget
+```
\ No newline at end of file
diff --git a/feeds/nuget/nuget.go b/feeds/nuget/nuget.go
index 80eb83b8..73d73130 100644
--- a/feeds/nuget/nuget.go
+++ b/feeds/nuget/nuget.go
@@ -131,6 +131,16 @@ func fetchPackageInfo(url string) (*nugetPackageDetails, error) {
type Feed struct{}
+func New(feedOptions feeds.FeedOptions) (*Feed, error) {
+ if feedOptions.Packages != nil {
+ return nil, feeds.UnsupportedOptionError{
+ Feed: FeedName,
+ Option: "packages",
+ }
+ }
+ return &Feed{}, nil
+}
+
// Latest will parse all creation events for packages in the nuget.org catalog feed
// for packages that have been published since the cutoff
// https://docs.microsoft.com/en-us/nuget/api/catalog-resource
diff --git a/feeds/packagist/README.md b/feeds/packagist/README.md
new file mode 100644
index 00000000..4ffcff43
--- /dev/null
+++ b/feeds/packagist/README.md
@@ -0,0 +1,13 @@
+# packagist Feed
+
+This feed allows polling of package updates from the packagist package repository.
+
+## Configuration options
+
+The `packages` field is not supported by the packagist feed.
+
+
+```
+feeds:
+- type: packagist
+```
\ No newline at end of file
diff --git a/feeds/packagist/packagist.go b/feeds/packagist/packagist.go
index afe25139..a722ed3a 100644
--- a/feeds/packagist/packagist.go
+++ b/feeds/packagist/packagist.go
@@ -30,6 +30,16 @@ type actions struct {
type Feed struct{}
+func New(feedOptions feeds.FeedOptions) (*Feed, error) {
+ if feedOptions.Packages != nil {
+ return nil, feeds.UnsupportedOptionError{
+ Feed: FeedName,
+ Option: "packages",
+ }
+ }
+ return &Feed{}, nil
+}
+
func fetchPackages(since time.Time) ([]actions, error) {
client := &http.Client{
Timeout: 10 * time.Second,
diff --git a/feeds/pypi/README.md b/feeds/pypi/README.md
new file mode 100644
index 00000000..6ffba5a6
--- /dev/null
+++ b/feeds/pypi/README.md
@@ -0,0 +1,18 @@
+# Pypi Feed
+
+This feed allows polling of package updates from the pypi package repository.
+
+## Configuration options
+
+The `packages` Field can be supplied to the pypi feed options to enable polling of package specific apis. This is less effective
+with large lists of packages as it polls the RSS feed for each package individually but it is much less likely to miss package updates between polling.
+
+
+```
+feeds:
+- type: pypi
+ options:
+ packages:
+ - numpy
+ - scipy
+```
\ No newline at end of file
diff --git a/feeds/pypi/pypi.go b/feeds/pypi/pypi.go
index 0b17cf6b..b05c4de2 100644
--- a/feeds/pypi/pypi.go
+++ b/feeds/pypi/pypi.go
@@ -2,6 +2,8 @@ package pypi
import (
"encoding/xml"
+ "errors"
+ "fmt"
"net/http"
"strings"
"time"
@@ -15,10 +17,12 @@ const (
)
var (
- baseURL = "https://pypi.org/rss/updates.xml"
- httpClient = &http.Client{
+ baseURL = "https://pypi.org/rss/updates.xml"
+ packageURLFormat = "https://pypi.org/rss/project/%s/releases.xml"
+ httpClient = &http.Client{
Timeout: 10 * time.Second,
}
+ errInvalidLinkForPackage = errors.New("invalid link provided by pypi API")
)
type Response struct {
@@ -31,14 +35,22 @@ type Package struct {
Link string `xml:"link"`
}
-func (p *Package) Name() string {
- // The XML Feed has a "Title" element that contains the package and version in it.
- return strings.Split(p.Title, " ")[0]
+func (p *Package) Name() (string, error) {
+ // The XML Link splits to: []string{"https:", "", "pypi.org", "project", "foopy", "2.1", ""}
+ parts := strings.Split(p.Link, "/")
+ if len(parts) < 5 {
+ return "", errInvalidLinkForPackage
+ }
+ return parts[len(parts)-3], nil
}
-func (p *Package) Version() string {
- // The XML Feed has a "Title" element that contains the package and version in it.
- return strings.Split(p.Title, " ")[1]
+func (p *Package) Version() (string, error) {
+ // The XML Link splits to: []string{"https:", "", "pypi.org", "project", "foopy", "2.1", ""}
+ parts := strings.Split(p.Link, "/")
+ if len(parts) < 5 {
+ return "", errInvalidLinkForPackage
+ }
+ return parts[len(parts)-2], nil
}
type rfc1123Time struct {
@@ -73,28 +85,93 @@ func fetchPackages() ([]*Package, error) {
return rssResponse.Packages, nil
}
+func fetchCriticalPackages(packageList []string) ([]*Package, error) {
+ responseChannel := make(chan *Response)
+ errChannel := make(chan error)
+
+ for _, pkgName := range packageList {
+ go func(pkgName string) {
+ resp, err := httpClient.Get(fmt.Sprintf(packageURLFormat, pkgName))
+ if err != nil {
+ errChannel <- err
+ return
+ }
+ defer resp.Body.Close()
+ rssResponse := &Response{}
+ err = xml.NewDecoder(resp.Body).Decode(rssResponse)
+ if err != nil {
+ errChannel <- err
+ return
+ }
+
+ responseChannel <- rssResponse
+ }(pkgName)
+ }
+
+ pkgs := []*Package{}
+ var lastErr error
+ for i := 0; i < len(packageList); i++ {
+ select {
+ case response := <-responseChannel:
+ pkgs = append(pkgs, response.Packages...)
+ case err := <-errChannel:
+ lastErr = err
+ }
+ }
+ return pkgs, lastErr
+}
+
type Feed struct {
+ packages *[]string
+
lossyFeedAlerter *feeds.LossyFeedAlerter
}
-func New(eventHandler *events.Handler) *Feed {
+func New(feedOptions feeds.FeedOptions, eventHandler *events.Handler) (*Feed, error) {
return &Feed{
+ packages: feedOptions.Packages,
lossyFeedAlerter: feeds.NewLossyFeedAlerter(eventHandler),
- }
+ }, nil
}
func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, error) {
pkgs := []*feeds.Package{}
- pypiPackages, err := fetchPackages()
+ var pypiPackages []*Package
+ var err error
+
+ if feed.packages == nil {
+ // Firehose fetch all packages.
+ pypiPackages, err = fetchPackages()
+ } else {
+ // Fetch specific packages individually from configured packages list.
+ pypiPackages, err = fetchCriticalPackages(*feed.packages)
+ }
+
if err != nil {
- return pkgs, err
+ return nil, err
}
for _, pkg := range pypiPackages {
- pkg := feeds.NewPackage(pkg.CreatedDate.Time, pkg.Name(), pkg.Version(), FeedName)
+ pkgName, err := pkg.Name()
+ if err != nil {
+ return nil, err
+ }
+ pkgVersion, err := pkg.Version()
+ if err != nil {
+ return nil, err
+ }
+ pkg := feeds.NewPackage(pkg.CreatedDate.Time, pkgName, pkgVersion, FeedName)
pkgs = append(pkgs, pkg)
}
- feed.lossyFeedAlerter.ProcessPackages(FeedName, pkgs)
+
+ // Lossy feed detection is only necessary for firehose fetching
+ if feed.packages == nil {
+ feed.lossyFeedAlerter.ProcessPackages(FeedName, pkgs)
+ }
pkgs = feeds.ApplyCutoff(pkgs, cutoff)
return pkgs, nil
}
+
+func (feed Feed) GetPackageList() *[]string {
+ return feed.packages
+}
diff --git a/feeds/pypi/pypi_test.go b/feeds/pypi/pypi_test.go
index 8c2863e1..580f9c3e 100644
--- a/feeds/pypi/pypi_test.go
+++ b/feeds/pypi/pypi_test.go
@@ -6,6 +6,7 @@ import (
"time"
"github.com/ossf/package-feeds/events"
+ "github.com/ossf/package-feeds/feeds"
"github.com/ossf/package-feeds/testutils"
)
@@ -18,7 +19,10 @@ func TestPypiLatest(t *testing.T) {
srv := testutils.HTTPServerMock(handlers)
baseURL = srv.URL + "/rss/updates.xml"
- feed := New(events.NewNullHandler())
+ feed, err := New(feeds.FeedOptions{}, events.NewNullHandler())
+ if err != nil {
+ t.Fatalf("failed to create new pypi feed: %v", err)
+ }
cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC)
pkgs, err := feed.Latest(cutoff)
@@ -46,6 +50,60 @@ func TestPypiLatest(t *testing.T) {
}
}
+func TestPypiCriticalLatest(t *testing.T) {
+ t.Parallel()
+
+ handlers := map[string]testutils.HTTPHandlerFunc{
+ "/rss/project/foopy/releases.xml": foopyReleasesResponse,
+ "/rss/project/barpy/releases.xml": barpyReleasesResponse,
+ }
+ packages := []string{
+ "foopy",
+ "barpy",
+ }
+ srv := testutils.HTTPServerMock(handlers)
+
+ packageURLFormat = srv.URL + "/rss/project/%s/releases.xml"
+ feed, err := New(feeds.FeedOptions{
+ Packages: &packages,
+ }, events.NewNullHandler())
+ if err != nil {
+ t.Fatalf("Unexpected err: %v", err)
+ }
+
+ cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC)
+ pkgs, err := feed.Latest(cutoff)
+ if err != nil {
+ t.Fatalf("failed to call Latest() with err: %v", err)
+ }
+
+ const expectedNumPackages = 4
+ if len(pkgs) != expectedNumPackages {
+ t.Fatalf("Latest() produced %v packages instead of the expected %v", len(pkgs), expectedNumPackages)
+ }
+ pkgMap := map[string]map[string]*feeds.Package{}
+ pkgMap["foopy"] = map[string]*feeds.Package{}
+ pkgMap["barpy"] = map[string]*feeds.Package{}
+
+ for _, pkg := range pkgs {
+ pkgMap[pkg.Name][pkg.Version] = pkg
+ }
+
+ if _, ok := pkgMap["foopy"]["2.1"]; !ok {
+ t.Fatalf("missing foopy 2.1")
+ }
+ if _, ok := pkgMap["foopy"]["2.0"]; !ok {
+ t.Fatalf("missing foopy 2.0")
+ }
+ if _, ok := pkgMap["barpy"]["1.1"]; !ok {
+ t.Fatalf("missing barpy 1.1")
+ }
+ if _, ok := pkgMap["barpy"]["1.0"]; !ok {
+ t.Fatalf("missing barpy 1.0")
+ }
+}
+
+// Mock data for pypi firehose with all packages.
func updatesXMLHandle(w http.ResponseWriter, r *http.Request) {
_, err := w.Write([]byte(`
@@ -76,3 +134,61 @@ func updatesXMLHandle(w http.ResponseWriter, r *http.Request) {
http.Error(w, testutils.UnexpectedWriteError(err), http.StatusInternalServerError)
}
}
+
+// Mock data response for package specific api when pypi is configured with
+// a package list in FeedOptions.
+func foopyReleasesResponse(w http.ResponseWriter, r *http.Request) {
+ _, err := w.Write([]byte(`
+
+
+
+ PyPI recent updates for foopy
+ https://pypi.org/project/foopy/
+ Recent updates to the Python Package Index for foopy
+ en
+ -
+ 2.1
+ https://pypi.org/project/foopy/2.1/
+ Sat, 27 Mar 2021 22:16:26 GMT
+
+ -
+ 2.0
+ https://pypi.org/project/foopy/2.0/
+ Sun, 23 Sep 2018 16:50:37 GMT
+
+
+
+`))
+ if err != nil {
+ http.Error(w, testutils.UnexpectedWriteError(err), http.StatusInternalServerError)
+ }
+}
+
+// Mock data response for package specific api when pypi is configured with
+// a package list in FeedOptions.
+func barpyReleasesResponse(w http.ResponseWriter, r *http.Request) {
+ _, err := w.Write([]byte(`
+
+
+
+ PyPI recent updates for barpy
+ https://pypi.org/project/barpy/
+ Recent updates to the Python Package Index for barpy
+ en
+ -
+ 1.1
+ https://pypi.org/project/barpy/1.1/
+ Sat, 27 Mar 2021 22:16:26 GMT
+
+ -
+ 1.0
+ https://pypi.org/project/barpy/1.0/
+ Sun, 23 Sep 2018 16:50:37 GMT
+
+
+
+`))
+ if err != nil {
+ http.Error(w, testutils.UnexpectedWriteError(err), http.StatusInternalServerError)
+ }
+}
diff --git a/feeds/rubygems/README.md b/feeds/rubygems/README.md
new file mode 100644
index 00000000..13bf0408
--- /dev/null
+++ b/feeds/rubygems/README.md
@@ -0,0 +1,13 @@
+# rubygems Feed
+
+This feed allows polling of package updates from the rubygems package repository.
+
+## Configuration options
+
+The `packages` field is not supported by the rubygems feed.
+
+
+```
+feeds:
+- type: rubygems
+```
\ No newline at end of file
diff --git a/feeds/rubygems/rubygems.go b/feeds/rubygems/rubygems.go
index 0324e460..591feb4d 100644
--- a/feeds/rubygems/rubygems.go
+++ b/feeds/rubygems/rubygems.go
@@ -42,10 +42,16 @@ type Feed struct {
lossyFeedAlerter *feeds.LossyFeedAlerter
}
-func New(eventHandler *events.Handler) *Feed {
+func New(feedOptions feeds.FeedOptions, eventHandler *events.Handler) (*Feed, error) {
+ if feedOptions.Packages != nil {
+ return nil, feeds.UnsupportedOptionError{
+ Feed: FeedName,
+ Option: "packages",
+ }
+ }
return &Feed{
lossyFeedAlerter: feeds.NewLossyFeedAlerter(eventHandler),
- }
+ }, nil
}
func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, error) {
diff --git a/feeds/rubygems/rubygems_test.go b/feeds/rubygems/rubygems_test.go
index c7a180aa..8f4ad0f4 100644
--- a/feeds/rubygems/rubygems_test.go
+++ b/feeds/rubygems/rubygems_test.go
@@ -20,7 +20,10 @@ func TestRubyLatest(t *testing.T) {
srv := testutils.HTTPServerMock(handlers)
baseURL = srv.URL + "/api/v1/activity"
- feed := New(events.NewNullHandler())
+ feed, err := New(feeds.FeedOptions{}, events.NewNullHandler())
+ if err != nil {
+ t.Fatalf("failed to create new ruby feed: %v", err)
+ }
cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC)
pkgs, err := feed.Latest(cutoff)