From 26f7a1227e12d4f061869248f16c375bdc433509 Mon Sep 17 00:00:00 2001 From: romangr Date: Sun, 24 Apr 2022 14:51:50 +0400 Subject: [PATCH 1/3] add include and exclude filters for youtube feed --- app/youtube/service.go | 41 +++++++++++++++++++++ app/youtube/service_test.go | 73 +++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) diff --git a/app/youtube/service.go b/app/youtube/service.go index 270f2382..83d57784 100644 --- a/app/youtube/service.go +++ b/app/youtube/service.go @@ -8,6 +8,7 @@ import ( "fmt" "os" "path" + "regexp" "sort" "strings" "time" @@ -46,6 +47,13 @@ type FeedInfo struct { Type ytfeed.Type `yaml:"type"` Keep int `yaml:"keep"` Language string `yaml:"lang"` + Filter FeedFilter `yaml:"filter"` +} + +// FeedFilter contains filter criteria for the feed +type FeedFilter struct { + Include string `yaml:"include"` + Exclude string `yaml:"exclude"` } // DownloaderService is an interface for downloading audio from youtube @@ -211,6 +219,15 @@ func (s *Service) procChannels(ctx context.Context) error { break } + isRelevant, err := s.isRelevant(entry, feedInfo) + if err != nil { + return errors.Wrapf(err, "failed to check if entry %s is relevant", entry.VideoID) + } + if !isRelevant { + allStats.ignored++ + continue + } + ok, err := s.isNew(entry, feedInfo) if err != nil { return errors.Wrapf(err, "failed to check if entry %s exists", entry.VideoID) @@ -346,6 +363,30 @@ func (s *Service) isNew(entry ytfeed.Entry, fi FeedInfo) (ok bool, err error) { return true, nil } +// isRelevant checks if entry matches all filters for the channel feed +func (s *Service) isRelevant(entry ytfeed.Entry, fi FeedInfo) (ok bool, err error) { + matchedIncludeFilter := true + if fi.Filter.Include != "" { + matchedIncludeFilter, err = regexp.MatchString(fi.Filter.Include, entry.Title) + if err != nil { + return false, errors.Wrapf(err, "failed to check if entry %s matches include filter", entry.VideoID) + } + } + + matchedExcludeFilter := false + if fi.Filter.Exclude != "" { + matchedExcludeFilter, err = regexp.MatchString(fi.Filter.Exclude, entry.Title) + if err != nil { + return false, errors.Wrapf(err, "failed to check if entry %s matches exclude filter", entry.VideoID) + } + } + + if matchedIncludeFilter && !matchedExcludeFilter { + return true, nil + } + return false, nil +} + // update sets entry file name and reset published ts func (s *Service) update(entry ytfeed.Entry, file string, fi FeedInfo) ytfeed.Entry { entry.File = file diff --git a/app/youtube/service_test.go b/app/youtube/service_test.go index f71e3918..5bbc52cd 100644 --- a/app/youtube/service_test.go +++ b/app/youtube/service_test.go @@ -111,6 +111,79 @@ func TestService_Do(t *testing.T) { assert.Equal(t, "/tmp/648f79b3a05ececb8a37600aa0aee332f0374e01.mp3", duration.FileCalls()[3].Fname) } +// nolint:dupl // test if very similar to TestService_RSSFeed +func TestService_DoIsRelevantFilter(t *testing.T) { + + chans := &mocks.ChannelServiceMock{ + GetFunc: func(ctx context.Context, chanID string, feedType ytfeed.Type) ([]ytfeed.Entry, error) { + return []ytfeed.Entry{ + {ChannelID: chanID, VideoID: "vid1", Title: "Prefix1: title1", Published: time.Now()}, + {ChannelID: chanID, VideoID: "vid2", Title: "Prefix2: title2", Published: time.Now()}, + {ChannelID: chanID, VideoID: "vid3", Title: "Prefix2: title3", Published: time.Now()}, + }, nil + }, + } + downloader := &mocks.DownloaderServiceMock{ + GetFunc: func(ctx context.Context, id string, fname string) (string, error) { + return "/tmp/" + fname + ".mp3", nil + }, + } + + duration := &mocks.DurationServiceMock{ + FileFunc: func(fname string) int { + return 1234 + }, + } + + tmpfile := filepath.Join(os.TempDir(), "test.db") + defer os.Remove(tmpfile) + + db, err := bolt.Open(tmpfile, 0o600, &bolt.Options{Timeout: 1 * time.Second}) + require.NoError(t, err) + boltStore := &store.BoltDB{DB: db} + svc := Service{ + Feeds: []FeedInfo{ + {ID: "channel1", Name: "name1", Type: ytfeed.FTChannel, Filter: FeedFilter{Include: "Prefix2", Exclude: "title3"}}, + }, + Downloader: downloader, + ChannelService: chans, + Store: boltStore, + CheckDuration: time.Millisecond * 500, + KeepPerChannel: 10, + RSSFileStore: RSSFileStore{Enabled: true, Location: "/tmp"}, + DurationService: duration, + } + + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*900) + defer cancel() + + err = svc.Do(ctx) + assert.EqualError(t, err, "context deadline exceeded") + + require.Equal(t, 2, len(chans.GetCalls())) + assert.Equal(t, "channel1", chans.GetCalls()[0].ChanID) + assert.Equal(t, ytfeed.FTChannel, chans.GetCalls()[0].FeedType) + assert.Equal(t, "channel1", chans.GetCalls()[1].ChanID) + + res, err := boltStore.Load("channel1", 10) + require.NoError(t, err) + assert.Equal(t, 1, len(res), "one entry for channel1, skipped irrelevant ones") + assert.Equal(t, "vid2", res[0].VideoID) + + require.Equal(t, 1, len(downloader.GetCalls())) + require.Equal(t, "vid2", downloader.GetCalls()[0].ID) + require.True(t, downloader.GetCalls()[0].Fname != "") + + rssData, err := os.ReadFile("/tmp/channel1.xml") + require.NoError(t, err) + t.Logf("%s", string(rssData)) + assert.Contains(t, string(rssData), "channel1::vid2") + assert.Contains(t, string(rssData), "1234") + + require.Equal(t, 1, len(duration.FileCalls())) + assert.Equal(t, "/tmp/4308c33c7ddb107c2d0c13a905e4c6962001bab4.mp3", duration.FileCalls()[0].Fname) +} + // nolint:dupl // test if very similar to TestService_RSSFeed func TestService_RSSFeed(t *testing.T) { storeSvc := &mocks.StoreServiceMock{ From bbd61701bb07f9c4b97a3874b83608976f88d494 Mon Sep 17 00:00:00 2001 From: romangr Date: Sun, 24 Apr 2022 15:57:29 +0400 Subject: [PATCH 2/3] add youtube filters to examples --- README.md | 5 +++-- _example/etc/fm-yt.yml | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 65207018..4368f349 100644 --- a/README.md +++ b/README.md @@ -55,10 +55,11 @@ youtube: # youtube configuration, optional rss_location: ./var/rss # location for generated youtube channel's RSS channels: # list of youtube channels to download and process # id: channel or playlist id, name: channel or playlist name, type: "channel" or "playlist", - # lang: language of the channel, keep: override default keep value + # lang: language of the channel, keep: override default keep value + # filter: criteria to include and exclude videos - {id: UCWAIvx2yYLK_xTYD4F2mUNw, name: "Живой Гвоздь", lang: "ru-ru"} - {id: UCuIE7-5QzeAR6EdZXwDRwuQ, name: "Дилетант", type: "channel", lang: "ru-ru", "keep": 10} - - {id: PLZVQqcKxEn_6YaOniJmxATjODSVUbbMkd, name: "Точка", type: "playlist", lang: "ru-ru"} + - {id: PLZVQqcKxEn_6YaOniJmxATjODSVUbbMkd, name: "Точка", type: "playlist", lang: "ru-ru", filter: {include: "ТОЧКА", exclude: "STAR'цы Live"}} system: # system configuration update: 1m # update interval for checking source feeds diff --git a/_example/etc/fm-yt.yml b/_example/etc/fm-yt.yml index cfa62064..65a1e584 100644 --- a/_example/etc/fm-yt.yml +++ b/_example/etc/fm-yt.yml @@ -23,7 +23,7 @@ youtube: channels: - {id: UCWAIvx2yYLK_xTYD4F2mUNw, name: "Живой Гвоздь", lang: "ru-ru"} - {id: UCuIE7-5QzeAR6EdZXwDRwuQ, name: "Дилетант", type: "channel", lang: "ru-ru"} - - {id: PLZVQqcKxEn_6YaOniJmxATjODSVUbbMkd, name: "Точка", type: "playlist", lang: "ru-ru"} + - {id: PLZVQqcKxEn_6YaOniJmxATjODSVUbbMkd, name: "Точка", type: "playlist", lang: "ru-ru", filter: {include: "ТОЧКА", exclude: "STAR'цы Live"}} system: update: 1m From 0e188c30e1a756f5bf813412f30086c02e138df5 Mon Sep 17 00:00:00 2001 From: romangr Date: Sun, 24 Apr 2022 22:10:45 +0400 Subject: [PATCH 3/3] rename filter function, add test case with regex --- README.md | 2 +- app/youtube/service.go | 8 ++++---- app/youtube/service_test.go | 31 ++++++++++++++++++++++++++----- 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 4368f349..14097603 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ youtube: # youtube configuration, optional channels: # list of youtube channels to download and process # id: channel or playlist id, name: channel or playlist name, type: "channel" or "playlist", # lang: language of the channel, keep: override default keep value - # filter: criteria to include and exclude videos + # filter: criteria to include and exclude videos, can be regex - {id: UCWAIvx2yYLK_xTYD4F2mUNw, name: "Живой Гвоздь", lang: "ru-ru"} - {id: UCuIE7-5QzeAR6EdZXwDRwuQ, name: "Дилетант", type: "channel", lang: "ru-ru", "keep": 10} - {id: PLZVQqcKxEn_6YaOniJmxATjODSVUbbMkd, name: "Точка", type: "playlist", lang: "ru-ru", filter: {include: "ТОЧКА", exclude: "STAR'цы Live"}} diff --git a/app/youtube/service.go b/app/youtube/service.go index 83d57784..6cbcd35e 100644 --- a/app/youtube/service.go +++ b/app/youtube/service.go @@ -219,11 +219,11 @@ func (s *Service) procChannels(ctx context.Context) error { break } - isRelevant, err := s.isRelevant(entry, feedInfo) + isAllowed, err := s.isAllowed(entry, feedInfo) if err != nil { return errors.Wrapf(err, "failed to check if entry %s is relevant", entry.VideoID) } - if !isRelevant { + if !isAllowed { allStats.ignored++ continue } @@ -363,8 +363,8 @@ func (s *Service) isNew(entry ytfeed.Entry, fi FeedInfo) (ok bool, err error) { return true, nil } -// isRelevant checks if entry matches all filters for the channel feed -func (s *Service) isRelevant(entry ytfeed.Entry, fi FeedInfo) (ok bool, err error) { +// isAllowed checks if entry matches all filters for the channel feed +func (s *Service) isAllowed(entry ytfeed.Entry, fi FeedInfo) (ok bool, err error) { matchedIncludeFilter := true if fi.Filter.Include != "" { matchedIncludeFilter, err = regexp.MatchString(fi.Filter.Include, entry.Title) diff --git a/app/youtube/service_test.go b/app/youtube/service_test.go index 5bbc52cd..f2c949e4 100644 --- a/app/youtube/service_test.go +++ b/app/youtube/service_test.go @@ -112,7 +112,7 @@ func TestService_Do(t *testing.T) { } // nolint:dupl // test if very similar to TestService_RSSFeed -func TestService_DoIsRelevantFilter(t *testing.T) { +func TestService_DoIsAllowedFilter(t *testing.T) { chans := &mocks.ChannelServiceMock{ GetFunc: func(ctx context.Context, chanID string, feedType ytfeed.Type) ([]ytfeed.Entry, error) { @@ -144,6 +144,7 @@ func TestService_DoIsRelevantFilter(t *testing.T) { svc := Service{ Feeds: []FeedInfo{ {ID: "channel1", Name: "name1", Type: ytfeed.FTChannel, Filter: FeedFilter{Include: "Prefix2", Exclude: "title3"}}, + {ID: "channel2", Name: "name2", Type: ytfeed.FTChannel, Filter: FeedFilter{Include: "^\\w{7}:", Exclude: "\\w+3$"}}, }, Downloader: downloader, ChannelService: chans, @@ -160,18 +161,29 @@ func TestService_DoIsRelevantFilter(t *testing.T) { err = svc.Do(ctx) assert.EqualError(t, err, "context deadline exceeded") - require.Equal(t, 2, len(chans.GetCalls())) + require.Equal(t, 4, len(chans.GetCalls())) assert.Equal(t, "channel1", chans.GetCalls()[0].ChanID) assert.Equal(t, ytfeed.FTChannel, chans.GetCalls()[0].FeedType) - assert.Equal(t, "channel1", chans.GetCalls()[1].ChanID) + assert.Equal(t, "channel2", chans.GetCalls()[1].ChanID) + assert.Equal(t, ytfeed.FTChannel, chans.GetCalls()[1].FeedType) + assert.Equal(t, "channel1", chans.GetCalls()[2].ChanID) + assert.Equal(t, "channel2", chans.GetCalls()[3].ChanID) res, err := boltStore.Load("channel1", 10) require.NoError(t, err) assert.Equal(t, 1, len(res), "one entry for channel1, skipped irrelevant ones") assert.Equal(t, "vid2", res[0].VideoID) - require.Equal(t, 1, len(downloader.GetCalls())) + res, err = boltStore.Load("channel2", 10) + require.NoError(t, err) + assert.Equal(t, 2, len(res), "two entries for channel2, skipped irrelevant one") + assert.Equal(t, "vid2", res[0].VideoID) + assert.Equal(t, "vid1", res[1].VideoID) + + require.Equal(t, 3, len(downloader.GetCalls())) require.Equal(t, "vid2", downloader.GetCalls()[0].ID) + require.Equal(t, "vid1", downloader.GetCalls()[1].ID) + require.Equal(t, "vid2", downloader.GetCalls()[2].ID) require.True(t, downloader.GetCalls()[0].Fname != "") rssData, err := os.ReadFile("/tmp/channel1.xml") @@ -180,8 +192,17 @@ func TestService_DoIsRelevantFilter(t *testing.T) { assert.Contains(t, string(rssData), "channel1::vid2") assert.Contains(t, string(rssData), "1234") - require.Equal(t, 1, len(duration.FileCalls())) + rssData, err = os.ReadFile("/tmp/channel2.xml") + require.NoError(t, err) + t.Logf("%s", string(rssData)) + assert.Contains(t, string(rssData), "channel2::vid2") + assert.Contains(t, string(rssData), "channel2::vid1") + assert.Contains(t, string(rssData), "1234") + + require.Equal(t, 3, len(duration.FileCalls())) assert.Equal(t, "/tmp/4308c33c7ddb107c2d0c13a905e4c6962001bab4.mp3", duration.FileCalls()[0].Fname) + assert.Equal(t, "/tmp/3be877c750abb87daee80c005fe87e7a3f824fed.mp3", duration.FileCalls()[1].Fname) + assert.Equal(t, "/tmp/648f79b3a05ececb8a37600aa0aee332f0374e01.mp3", duration.FileCalls()[2].Fname) } // nolint:dupl // test if very similar to TestService_RSSFeed