diff --git a/README.md b/README.md index 65207018..14097603 100644 --- a/README.md +++ b/README.md @@ -55,10 +55,11 @@ youtube: # youtube configuration, optional rss_location: ./var/rss # location for generated youtube channel's RSS channels: # list of youtube channels to download and process # id: channel or playlist id, name: channel or playlist name, type: "channel" or "playlist", - # lang: language of the channel, keep: override default keep value + # lang: language of the channel, keep: override default keep value + # filter: criteria to include and exclude videos, can be regex - {id: UCWAIvx2yYLK_xTYD4F2mUNw, name: "Живой Гвоздь", lang: "ru-ru"} - {id: UCuIE7-5QzeAR6EdZXwDRwuQ, name: "Дилетант", type: "channel", lang: "ru-ru", "keep": 10} - - {id: PLZVQqcKxEn_6YaOniJmxATjODSVUbbMkd, name: "Точка", type: "playlist", lang: "ru-ru"} + - {id: PLZVQqcKxEn_6YaOniJmxATjODSVUbbMkd, name: "Точка", type: "playlist", lang: "ru-ru", filter: {include: "ТОЧКА", exclude: "STAR'цы Live"}} system: # system configuration update: 1m # update interval for checking source feeds diff --git a/_example/etc/fm-yt.yml b/_example/etc/fm-yt.yml index cfa62064..65a1e584 100644 --- a/_example/etc/fm-yt.yml +++ b/_example/etc/fm-yt.yml @@ -23,7 +23,7 @@ youtube: channels: - {id: UCWAIvx2yYLK_xTYD4F2mUNw, name: "Живой Гвоздь", lang: "ru-ru"} - {id: UCuIE7-5QzeAR6EdZXwDRwuQ, name: "Дилетант", type: "channel", lang: "ru-ru"} - - {id: PLZVQqcKxEn_6YaOniJmxATjODSVUbbMkd, name: "Точка", type: "playlist", lang: "ru-ru"} + - {id: PLZVQqcKxEn_6YaOniJmxATjODSVUbbMkd, name: "Точка", type: "playlist", lang: "ru-ru", filter: {include: "ТОЧКА", exclude: "STAR'цы Live"}} system: update: 1m diff --git a/app/youtube/service.go b/app/youtube/service.go index 270f2382..6cbcd35e 100644 --- a/app/youtube/service.go +++ b/app/youtube/service.go @@ -8,6 +8,7 @@ import ( "fmt" "os" "path" + "regexp" "sort" "strings" "time" @@ -46,6 +47,13 @@ type FeedInfo struct { Type ytfeed.Type `yaml:"type"` Keep int `yaml:"keep"` Language string `yaml:"lang"` + Filter FeedFilter `yaml:"filter"` +} + +// FeedFilter contains filter criteria for the feed +type FeedFilter struct { + Include string `yaml:"include"` + Exclude string `yaml:"exclude"` } // DownloaderService is an interface for downloading audio from youtube @@ -211,6 +219,15 @@ func (s *Service) procChannels(ctx context.Context) error { break } + isAllowed, err := s.isAllowed(entry, feedInfo) + if err != nil { + return errors.Wrapf(err, "failed to check if entry %s is relevant", entry.VideoID) + } + if !isAllowed { + allStats.ignored++ + continue + } + ok, err := s.isNew(entry, feedInfo) if err != nil { return errors.Wrapf(err, "failed to check if entry %s exists", entry.VideoID) @@ -346,6 +363,30 @@ func (s *Service) isNew(entry ytfeed.Entry, fi FeedInfo) (ok bool, err error) { return true, nil } +// isAllowed checks if entry matches all filters for the channel feed +func (s *Service) isAllowed(entry ytfeed.Entry, fi FeedInfo) (ok bool, err error) { + matchedIncludeFilter := true + if fi.Filter.Include != "" { + matchedIncludeFilter, err = regexp.MatchString(fi.Filter.Include, entry.Title) + if err != nil { + return false, errors.Wrapf(err, "failed to check if entry %s matches include filter", entry.VideoID) + } + } + + matchedExcludeFilter := false + if fi.Filter.Exclude != "" { + matchedExcludeFilter, err = regexp.MatchString(fi.Filter.Exclude, entry.Title) + if err != nil { + return false, errors.Wrapf(err, "failed to check if entry %s matches exclude filter", entry.VideoID) + } + } + + if matchedIncludeFilter && !matchedExcludeFilter { + return true, nil + } + return false, nil +} + // update sets entry file name and reset published ts func (s *Service) update(entry ytfeed.Entry, file string, fi FeedInfo) ytfeed.Entry { entry.File = file diff --git a/app/youtube/service_test.go b/app/youtube/service_test.go index f71e3918..f2c949e4 100644 --- a/app/youtube/service_test.go +++ b/app/youtube/service_test.go @@ -111,6 +111,100 @@ func TestService_Do(t *testing.T) { assert.Equal(t, "/tmp/648f79b3a05ececb8a37600aa0aee332f0374e01.mp3", duration.FileCalls()[3].Fname) } +// nolint:dupl // test if very similar to TestService_RSSFeed +func TestService_DoIsAllowedFilter(t *testing.T) { + + chans := &mocks.ChannelServiceMock{ + GetFunc: func(ctx context.Context, chanID string, feedType ytfeed.Type) ([]ytfeed.Entry, error) { + return []ytfeed.Entry{ + {ChannelID: chanID, VideoID: "vid1", Title: "Prefix1: title1", Published: time.Now()}, + {ChannelID: chanID, VideoID: "vid2", Title: "Prefix2: title2", Published: time.Now()}, + {ChannelID: chanID, VideoID: "vid3", Title: "Prefix2: title3", Published: time.Now()}, + }, nil + }, + } + downloader := &mocks.DownloaderServiceMock{ + GetFunc: func(ctx context.Context, id string, fname string) (string, error) { + return "/tmp/" + fname + ".mp3", nil + }, + } + + duration := &mocks.DurationServiceMock{ + FileFunc: func(fname string) int { + return 1234 + }, + } + + tmpfile := filepath.Join(os.TempDir(), "test.db") + defer os.Remove(tmpfile) + + db, err := bolt.Open(tmpfile, 0o600, &bolt.Options{Timeout: 1 * time.Second}) + require.NoError(t, err) + boltStore := &store.BoltDB{DB: db} + svc := Service{ + Feeds: []FeedInfo{ + {ID: "channel1", Name: "name1", Type: ytfeed.FTChannel, Filter: FeedFilter{Include: "Prefix2", Exclude: "title3"}}, + {ID: "channel2", Name: "name2", Type: ytfeed.FTChannel, Filter: FeedFilter{Include: "^\\w{7}:", Exclude: "\\w+3$"}}, + }, + Downloader: downloader, + ChannelService: chans, + Store: boltStore, + CheckDuration: time.Millisecond * 500, + KeepPerChannel: 10, + RSSFileStore: RSSFileStore{Enabled: true, Location: "/tmp"}, + DurationService: duration, + } + + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*900) + defer cancel() + + err = svc.Do(ctx) + assert.EqualError(t, err, "context deadline exceeded") + + require.Equal(t, 4, len(chans.GetCalls())) + assert.Equal(t, "channel1", chans.GetCalls()[0].ChanID) + assert.Equal(t, ytfeed.FTChannel, chans.GetCalls()[0].FeedType) + assert.Equal(t, "channel2", chans.GetCalls()[1].ChanID) + assert.Equal(t, ytfeed.FTChannel, chans.GetCalls()[1].FeedType) + assert.Equal(t, "channel1", chans.GetCalls()[2].ChanID) + assert.Equal(t, "channel2", chans.GetCalls()[3].ChanID) + + res, err := boltStore.Load("channel1", 10) + require.NoError(t, err) + assert.Equal(t, 1, len(res), "one entry for channel1, skipped irrelevant ones") + assert.Equal(t, "vid2", res[0].VideoID) + + res, err = boltStore.Load("channel2", 10) + require.NoError(t, err) + assert.Equal(t, 2, len(res), "two entries for channel2, skipped irrelevant one") + assert.Equal(t, "vid2", res[0].VideoID) + assert.Equal(t, "vid1", res[1].VideoID) + + require.Equal(t, 3, len(downloader.GetCalls())) + require.Equal(t, "vid2", downloader.GetCalls()[0].ID) + require.Equal(t, "vid1", downloader.GetCalls()[1].ID) + require.Equal(t, "vid2", downloader.GetCalls()[2].ID) + require.True(t, downloader.GetCalls()[0].Fname != "") + + rssData, err := os.ReadFile("/tmp/channel1.xml") + require.NoError(t, err) + t.Logf("%s", string(rssData)) + assert.Contains(t, string(rssData), "channel1::vid2") + assert.Contains(t, string(rssData), "1234") + + rssData, err = os.ReadFile("/tmp/channel2.xml") + require.NoError(t, err) + t.Logf("%s", string(rssData)) + assert.Contains(t, string(rssData), "channel2::vid2") + assert.Contains(t, string(rssData), "channel2::vid1") + assert.Contains(t, string(rssData), "1234") + + require.Equal(t, 3, len(duration.FileCalls())) + assert.Equal(t, "/tmp/4308c33c7ddb107c2d0c13a905e4c6962001bab4.mp3", duration.FileCalls()[0].Fname) + assert.Equal(t, "/tmp/3be877c750abb87daee80c005fe87e7a3f824fed.mp3", duration.FileCalls()[1].Fname) + assert.Equal(t, "/tmp/648f79b3a05ececb8a37600aa0aee332f0374e01.mp3", duration.FileCalls()[2].Fname) +} + // nolint:dupl // test if very similar to TestService_RSSFeed func TestService_RSSFeed(t *testing.T) { storeSvc := &mocks.StoreServiceMock{