Skip to content

Commit

Permalink
Move tag exclusion to scrape query resolver (#2391)
Browse files Browse the repository at this point in the history
  • Loading branch information
WithoutPants authored Mar 20, 2022
1 parent e4ad42c commit dd0fa48
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 46 deletions.
111 changes: 98 additions & 13 deletions internal/api/resolver_query_scraper.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,17 @@ import (
"context"
"errors"
"fmt"
"regexp"
"strconv"
"strings"

"github.com/stashapp/stash/internal/manager"
"github.com/stashapp/stash/internal/manager/config"
"github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/models"
"github.com/stashapp/stash/pkg/scraper"
"github.com/stashapp/stash/pkg/scraper/stashbox"
"github.com/stashapp/stash/pkg/sliceutil/stringslice"
)

func (r *queryResolver) ScrapeURL(ctx context.Context, url string, ty models.ScrapeContentType) (models.ScrapedContent, error) {
Expand Down Expand Up @@ -99,7 +104,13 @@ func (r *queryResolver) ScrapeSceneQuery(ctx context.Context, scraperID string,
return nil, err
}

return marshalScrapedScenes(content)
ret, err := marshalScrapedScenes(content)
if err != nil {
return nil, err
}

filterSceneTags(ret)
return ret, nil
}

func (r *queryResolver) ScrapeScene(ctx context.Context, scraperID string, scene models.SceneUpdateInput) (*models.ScrapedScene, error) {
Expand All @@ -113,7 +124,59 @@ func (r *queryResolver) ScrapeScene(ctx context.Context, scraperID string, scene
return nil, err
}

return marshalScrapedScene(content)
ret, err := marshalScrapedScene(content)
if err != nil {
return nil, err
}

filterSceneTags([]*models.ScrapedScene{ret})

return ret, nil
}

// filterSceneTags removes tags matching excluded tag patterns from the provided scraped scenes
func filterSceneTags(scenes []*models.ScrapedScene) {
excludePatterns := manager.GetInstance().Config.GetScraperExcludeTagPatterns()
var excludeRegexps []*regexp.Regexp

for _, excludePattern := range excludePatterns {
reg, err := regexp.Compile(strings.ToLower(excludePattern))
if err != nil {
logger.Errorf("Invalid tag exclusion pattern: %v", err)
} else {
excludeRegexps = append(excludeRegexps, reg)
}
}

if len(excludeRegexps) == 0 {
return
}

var ignoredTags []string

for _, s := range scenes {
var newTags []*models.ScrapedTag
for _, t := range s.Tags {
ignore := false
for _, reg := range excludeRegexps {
if reg.MatchString(strings.ToLower(t.Name)) {
ignore = true
ignoredTags = stringslice.StrAppendUnique(ignoredTags, t.Name)
break
}
}

if !ignore {
newTags = append(newTags, t)
}
}

s.Tags = newTags
}

if len(ignoredTags) > 0 {
logger.Debugf("Scraping ignored tags: %s", strings.Join(ignoredTags, ", "))
}
}

func (r *queryResolver) ScrapeSceneURL(ctx context.Context, url string) (*models.ScrapedScene, error) {
Expand All @@ -122,7 +185,14 @@ func (r *queryResolver) ScrapeSceneURL(ctx context.Context, url string) (*models
return nil, err
}

return marshalScrapedScene(content)
ret, err := marshalScrapedScene(content)
if err != nil {
return nil, err
}

filterSceneTags([]*models.ScrapedScene{ret})

return ret, nil
}

func (r *queryResolver) ScrapeGallery(ctx context.Context, scraperID string, gallery models.GalleryUpdateInput) (*models.ScrapedGallery, error) {
Expand Down Expand Up @@ -208,10 +278,13 @@ func (r *queryResolver) getStashBoxClient(index int) (*stashbox.Client, error) {
}

func (r *queryResolver) ScrapeSingleScene(ctx context.Context, source models.ScraperSourceInput, input models.ScrapeSingleSceneInput) ([]*models.ScrapedScene, error) {
if source.ScraperID != nil {
var ret []*models.ScrapedScene

switch {
case source.ScraperID != nil:
var err error
var c models.ScrapedContent
var content []models.ScrapedContent
var err error

switch {
case input.SceneID != nil:
Expand Down Expand Up @@ -239,23 +312,35 @@ func (r *queryResolver) ScrapeSingleScene(ctx context.Context, source models.Scr
return nil, err
}

return marshalScrapedScenes(content)
} else if source.StashBoxIndex != nil {
ret, err = marshalScrapedScenes(content)
if err != nil {
return nil, err
}
case source.StashBoxIndex != nil:
client, err := r.getStashBoxClient(*source.StashBoxIndex)
if err != nil {
return nil, err
}

if input.SceneID != nil {
return client.FindStashBoxScenesByFingerprintsFlat(ctx, []string{*input.SceneID})
} else if input.Query != nil {
return client.QueryStashBoxScene(ctx, *input.Query)
switch {
case input.SceneID != nil:
ret, err = client.FindStashBoxScenesByFingerprintsFlat(ctx, []string{*input.SceneID})
case input.Query != nil:
ret, err = client.QueryStashBoxScene(ctx, *input.Query)
default:
return nil, fmt.Errorf("%w: scene_id or query must be set", ErrInput)
}

return nil, fmt.Errorf("%w: scene_id or query must be set", ErrInput)
if err != nil {
return nil, err
}
default:
return nil, fmt.Errorf("%w: scraper_id or stash_box_index must be set", ErrInput)
}

return nil, fmt.Errorf("%w: scraper_id or stash_box_index must be set", ErrInput)
filterSceneTags(ret)

return ret, nil
}

func (r *queryResolver) ScrapeMultiScenes(ctx context.Context, source models.ScraperSourceInput, input models.ScrapeMultiScenesInput) ([][]*models.ScrapedScene, error) {
Expand Down
1 change: 0 additions & 1 deletion pkg/scraper/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ type GlobalConfig interface {
GetScrapersPath() string
GetScraperCDPPath() string
GetScraperCertCheck() bool
GetScraperExcludeTagPatterns() []string
}

func isCDPPathHTTP(c GlobalConfig) bool {
Expand Down
37 changes: 5 additions & 32 deletions pkg/scraper/postprocessing.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ package scraper

import (
"context"
"regexp"
"strings"

"github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/match"
Expand Down Expand Up @@ -50,7 +48,7 @@ func (c Cache) postScrapePerformer(ctx context.Context, p models.ScrapedPerforme
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
tqb := r.Tag()

tags, err := postProcessTags(c.globalConfig, tqb, p.Tags)
tags, err := postProcessTags(tqb, p.Tags)
if err != nil {
return err
}
Expand Down Expand Up @@ -93,7 +91,7 @@ func (c Cache) postScrapeScenePerformer(ctx context.Context, p models.ScrapedPer
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
tqb := r.Tag()

tags, err := postProcessTags(c.globalConfig, tqb, p.Tags)
tags, err := postProcessTags(tqb, p.Tags)
if err != nil {
return err
}
Expand Down Expand Up @@ -135,7 +133,7 @@ func (c Cache) postScrapeScene(ctx context.Context, scene models.ScrapedScene) (
}
}

tags, err := postProcessTags(c.globalConfig, tqb, scene.Tags)
tags, err := postProcessTags(tqb, scene.Tags)
if err != nil {
return err
}
Expand Down Expand Up @@ -174,7 +172,7 @@ func (c Cache) postScrapeGallery(ctx context.Context, g models.ScrapedGallery) (
}
}

tags, err := postProcessTags(c.globalConfig, tqb, g.Tags)
tags, err := postProcessTags(tqb, g.Tags)
if err != nil {
return err
}
Expand All @@ -195,41 +193,16 @@ func (c Cache) postScrapeGallery(ctx context.Context, g models.ScrapedGallery) (
return g, nil
}

func postProcessTags(globalConfig GlobalConfig, tqb models.TagReader, scrapedTags []*models.ScrapedTag) ([]*models.ScrapedTag, error) {
func postProcessTags(tqb models.TagReader, scrapedTags []*models.ScrapedTag) ([]*models.ScrapedTag, error) {
var ret []*models.ScrapedTag

excludePatterns := globalConfig.GetScraperExcludeTagPatterns()
var excludeRegexps []*regexp.Regexp

for _, excludePattern := range excludePatterns {
reg, err := regexp.Compile(strings.ToLower(excludePattern))
if err != nil {
logger.Errorf("Invalid tag exclusion pattern :%v", err)
} else {
excludeRegexps = append(excludeRegexps, reg)
}
}

var ignoredTags []string
ScrapeTag:
for _, t := range scrapedTags {
for _, reg := range excludeRegexps {
if reg.MatchString(strings.ToLower(t.Name)) {
ignoredTags = append(ignoredTags, t.Name)
continue ScrapeTag
}
}

err := match.ScrapedTag(tqb, t)
if err != nil {
return nil, err
}
ret = append(ret, t)
}

if len(ignoredTags) > 0 {
logger.Infof("Scraping ignored tags: %s", strings.Join(ignoredTags, ", "))
}

return ret, nil
}
1 change: 1 addition & 0 deletions ui/v2.5/src/components/Changelog/versions/v0140.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* Improved autotag performance. ([#2368](https://github.com/stashapp/stash/pull/2368))

### 🐛 Bug fixes
* Perform tag pattern exclusion on stash-box sources. ([#2391](https://github.com/stashapp/stash/pull/2391))
* Don't generate jpg thumbnails for animated webp files. ([#2388](https://github.com/stashapp/stash/pull/2388))
* Removed warnings and incorrect error message in json scrapers. ([#2375](https://github.com/stashapp/stash/pull/2375))
* Ensure identify continues using other scrapers if a scrape returns no results. ([#2375](https://github.com/stashapp/stash/pull/2375))
Expand Down

0 comments on commit dd0fa48

Please sign in to comment.