Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add include and exclude filters for youtube feed #92

Merged
merged 3 commits into from
Apr 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,11 @@ youtube: # youtube configuration, optional
rss_location: ./var/rss # location for generated youtube channel's RSS
channels: # list of youtube channels to download and process
# id: channel or playlist id, name: channel or playlist name, type: "channel" or "playlist",
# lang: language of the channel, keep: override default keep value
# lang: language of the channel, keep: override default keep value
# filter: criteria to include and exclude videos, can be regex
- {id: UCWAIvx2yYLK_xTYD4F2mUNw, name: "Живой Гвоздь", lang: "ru-ru"}
- {id: UCuIE7-5QzeAR6EdZXwDRwuQ, name: "Дилетант", type: "channel", lang: "ru-ru", "keep": 10}
- {id: PLZVQqcKxEn_6YaOniJmxATjODSVUbbMkd, name: "Точка", type: "playlist", lang: "ru-ru"}
- {id: PLZVQqcKxEn_6YaOniJmxATjODSVUbbMkd, name: "Точка", type: "playlist", lang: "ru-ru", filter: {include: "ТОЧКА", exclude: "STAR'цы Live"}}

system: # system configuration
update: 1m # update interval for checking source feeds
Expand Down
2 changes: 1 addition & 1 deletion _example/etc/fm-yt.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ youtube:
channels:
- {id: UCWAIvx2yYLK_xTYD4F2mUNw, name: "Живой Гвоздь", lang: "ru-ru"}
- {id: UCuIE7-5QzeAR6EdZXwDRwuQ, name: "Дилетант", type: "channel", lang: "ru-ru"}
- {id: PLZVQqcKxEn_6YaOniJmxATjODSVUbbMkd, name: "Точка", type: "playlist", lang: "ru-ru"}
- {id: PLZVQqcKxEn_6YaOniJmxATjODSVUbbMkd, name: "Точка", type: "playlist", lang: "ru-ru", filter: {include: "ТОЧКА", exclude: "STAR'цы Live"}}

system:
update: 1m
Expand Down
41 changes: 41 additions & 0 deletions app/youtube/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"fmt"
"os"
"path"
"regexp"
"sort"
"strings"
"time"
Expand Down Expand Up @@ -46,6 +47,13 @@ type FeedInfo struct {
Type ytfeed.Type `yaml:"type"`
Keep int `yaml:"keep"`
Language string `yaml:"lang"`
Filter FeedFilter `yaml:"filter"`
}

// FeedFilter contains filter criteria for the feed
type FeedFilter struct {
Include string `yaml:"include"`
Exclude string `yaml:"exclude"`
}

// DownloaderService is an interface for downloading audio from youtube
Expand Down Expand Up @@ -211,6 +219,15 @@ func (s *Service) procChannels(ctx context.Context) error {
break
}

isAllowed, err := s.isAllowed(entry, feedInfo)
if err != nil {
return errors.Wrapf(err, "failed to check if entry %s is relevant", entry.VideoID)
}
if !isAllowed {
allStats.ignored++
continue
}

ok, err := s.isNew(entry, feedInfo)
if err != nil {
return errors.Wrapf(err, "failed to check if entry %s exists", entry.VideoID)
Expand Down Expand Up @@ -346,6 +363,30 @@ func (s *Service) isNew(entry ytfeed.Entry, fi FeedInfo) (ok bool, err error) {
return true, nil
}

// isAllowed checks if entry matches all filters for the channel feed
func (s *Service) isAllowed(entry ytfeed.Entry, fi FeedInfo) (ok bool, err error) {
matchedIncludeFilter := true
if fi.Filter.Include != "" {
matchedIncludeFilter, err = regexp.MatchString(fi.Filter.Include, entry.Title)
if err != nil {
return false, errors.Wrapf(err, "failed to check if entry %s matches include filter", entry.VideoID)
}
}

matchedExcludeFilter := false
if fi.Filter.Exclude != "" {
matchedExcludeFilter, err = regexp.MatchString(fi.Filter.Exclude, entry.Title)
if err != nil {
return false, errors.Wrapf(err, "failed to check if entry %s matches exclude filter", entry.VideoID)
}
}

if matchedIncludeFilter && !matchedExcludeFilter {
return true, nil
}
return false, nil
}

// update sets entry file name and reset published ts
func (s *Service) update(entry ytfeed.Entry, file string, fi FeedInfo) ytfeed.Entry {
entry.File = file
Expand Down
94 changes: 94 additions & 0 deletions app/youtube/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,100 @@ func TestService_Do(t *testing.T) {
assert.Equal(t, "/tmp/648f79b3a05ececb8a37600aa0aee332f0374e01.mp3", duration.FileCalls()[3].Fname)
}

// nolint:dupl // test if very similar to TestService_RSSFeed
func TestService_DoIsAllowedFilter(t *testing.T) {

chans := &mocks.ChannelServiceMock{
GetFunc: func(ctx context.Context, chanID string, feedType ytfeed.Type) ([]ytfeed.Entry, error) {
return []ytfeed.Entry{
{ChannelID: chanID, VideoID: "vid1", Title: "Prefix1: title1", Published: time.Now()},
{ChannelID: chanID, VideoID: "vid2", Title: "Prefix2: title2", Published: time.Now()},
{ChannelID: chanID, VideoID: "vid3", Title: "Prefix2: title3", Published: time.Now()},
}, nil
},
}
downloader := &mocks.DownloaderServiceMock{
GetFunc: func(ctx context.Context, id string, fname string) (string, error) {
return "/tmp/" + fname + ".mp3", nil
},
}

duration := &mocks.DurationServiceMock{
FileFunc: func(fname string) int {
return 1234
},
}

tmpfile := filepath.Join(os.TempDir(), "test.db")
defer os.Remove(tmpfile)

db, err := bolt.Open(tmpfile, 0o600, &bolt.Options{Timeout: 1 * time.Second})
require.NoError(t, err)
boltStore := &store.BoltDB{DB: db}
svc := Service{
Feeds: []FeedInfo{
{ID: "channel1", Name: "name1", Type: ytfeed.FTChannel, Filter: FeedFilter{Include: "Prefix2", Exclude: "title3"}},
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd suggest adding a case with some regex filters. Maybe for channel2 in addition to the current one

{ID: "channel2", Name: "name2", Type: ytfeed.FTChannel, Filter: FeedFilter{Include: "^\\w{7}:", Exclude: "\\w+3$"}},
},
Downloader: downloader,
ChannelService: chans,
Store: boltStore,
CheckDuration: time.Millisecond * 500,
KeepPerChannel: 10,
RSSFileStore: RSSFileStore{Enabled: true, Location: "/tmp"},
DurationService: duration,
}

ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*900)
defer cancel()

err = svc.Do(ctx)
assert.EqualError(t, err, "context deadline exceeded")

require.Equal(t, 4, len(chans.GetCalls()))
assert.Equal(t, "channel1", chans.GetCalls()[0].ChanID)
assert.Equal(t, ytfeed.FTChannel, chans.GetCalls()[0].FeedType)
assert.Equal(t, "channel2", chans.GetCalls()[1].ChanID)
assert.Equal(t, ytfeed.FTChannel, chans.GetCalls()[1].FeedType)
assert.Equal(t, "channel1", chans.GetCalls()[2].ChanID)
assert.Equal(t, "channel2", chans.GetCalls()[3].ChanID)

res, err := boltStore.Load("channel1", 10)
require.NoError(t, err)
assert.Equal(t, 1, len(res), "one entry for channel1, skipped irrelevant ones")
assert.Equal(t, "vid2", res[0].VideoID)

res, err = boltStore.Load("channel2", 10)
require.NoError(t, err)
assert.Equal(t, 2, len(res), "two entries for channel2, skipped irrelevant one")
assert.Equal(t, "vid2", res[0].VideoID)
assert.Equal(t, "vid1", res[1].VideoID)

require.Equal(t, 3, len(downloader.GetCalls()))
require.Equal(t, "vid2", downloader.GetCalls()[0].ID)
require.Equal(t, "vid1", downloader.GetCalls()[1].ID)
require.Equal(t, "vid2", downloader.GetCalls()[2].ID)
require.True(t, downloader.GetCalls()[0].Fname != "")

rssData, err := os.ReadFile("/tmp/channel1.xml")
require.NoError(t, err)
t.Logf("%s", string(rssData))
assert.Contains(t, string(rssData), "<guid>channel1::vid2</guid>")
assert.Contains(t, string(rssData), "<itunes:duration>1234</itunes:duration>")

rssData, err = os.ReadFile("/tmp/channel2.xml")
require.NoError(t, err)
t.Logf("%s", string(rssData))
assert.Contains(t, string(rssData), "<guid>channel2::vid2</guid>")
assert.Contains(t, string(rssData), "<guid>channel2::vid1</guid>")
assert.Contains(t, string(rssData), "<itunes:duration>1234</itunes:duration>")

require.Equal(t, 3, len(duration.FileCalls()))
assert.Equal(t, "/tmp/4308c33c7ddb107c2d0c13a905e4c6962001bab4.mp3", duration.FileCalls()[0].Fname)
assert.Equal(t, "/tmp/3be877c750abb87daee80c005fe87e7a3f824fed.mp3", duration.FileCalls()[1].Fname)
assert.Equal(t, "/tmp/648f79b3a05ececb8a37600aa0aee332f0374e01.mp3", duration.FileCalls()[2].Fname)
}

// nolint:dupl // test if very similar to TestService_RSSFeed
func TestService_RSSFeed(t *testing.T) {
storeSvc := &mocks.StoreServiceMock{
Expand Down