Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix identify and script scraper bugs #2375

Merged
merged 9 commits into from
Mar 14, 2022
8 changes: 6 additions & 2 deletions pkg/api/resolver_query_scraper.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,10 +221,14 @@ func (r *queryResolver) ScrapeSingleScene(ctx context.Context, source models.Scr
return nil, fmt.Errorf("%w: sceneID is not an integer: '%s'", ErrInput, *input.SceneID)
}
c, err = r.scraperCache().ScrapeID(ctx, *source.ScraperID, sceneID, models.ScrapeContentTypeScene)
content = []models.ScrapedContent{c}
if c != nil {
content = []models.ScrapedContent{c}
}
case input.SceneInput != nil:
c, err = r.scraperCache().ScrapeFragment(ctx, *source.ScraperID, scraper.Input{Scene: input.SceneInput})
content = []models.ScrapedContent{c}
if c != nil {
content = []models.ScrapedContent{c}
}
case input.Query != nil:
content, err = r.scraperCache().ScrapeName(ctx, *source.ScraperID, *input.Query, models.ScrapeContentTypeScene)
default:
Expand Down
8 changes: 4 additions & 4 deletions pkg/api/scraped_content.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ func marshalScrapedScenes(content []models.ScrapedContent) ([]*models.ScrapedSce
var ret []*models.ScrapedScene
for _, c := range content {
if c == nil {
ret = append(ret, nil)
// graphql schema requires scenes to be non-nil
continue
}

Expand All @@ -35,7 +35,7 @@ func marshalScrapedPerformers(content []models.ScrapedContent) ([]*models.Scrape
var ret []*models.ScrapedPerformer
for _, c := range content {
if c == nil {
ret = append(ret, nil)
// graphql schema requires performers to be non-nil
continue
}

Expand All @@ -58,7 +58,7 @@ func marshalScrapedGalleries(content []models.ScrapedContent) ([]*models.Scraped
var ret []*models.ScrapedGallery
for _, c := range content {
if c == nil {
ret = append(ret, nil)
// graphql schema requires galleries to be non-nil
continue
}

Expand All @@ -81,7 +81,7 @@ func marshalScrapedMovies(content []models.ScrapedContent) ([]*models.ScrapedMov
var ret []*models.ScrapedMovie
for _, c := range content {
if c == nil {
ret = append(ret, nil)
// graphql schema requires movies to be non-nil
continue
}

Expand Down
3 changes: 2 additions & 1 deletion pkg/identify/identify.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ func (t *SceneIdentifier) scrapeScene(ctx context.Context, scene *models.Scene)
// scrape using the source
scraped, err := source.Scraper.ScrapeScene(ctx, scene.ID)
if err != nil {
return nil, fmt.Errorf("error scraping from %v: %v", source.Scraper, err)
logger.Errorf("error scraping from %v: %v", source.Scraper, err)
continue
}

// if results were found then return
Expand Down
4 changes: 2 additions & 2 deletions pkg/identify/identify_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,12 @@ func TestSceneIdentifier_Identify(t *testing.T) {
{
"error scraping",
errID1,
true,
false,
},
{
"error scraping from second",
errID2,
true,
false,
},
{
"found in first scraper",
Expand Down
5 changes: 5 additions & 0 deletions pkg/manager/task_identify.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,11 @@ func (s scraperSource) ScrapeScene(ctx context.Context, sceneID int) (*models.Sc
return nil, err
}

// don't try to convert nil return value
if content == nil {
return nil, nil
}

if scene, ok := content.(models.ScrapedScene); ok {
return &scene, nil
}
Expand Down
16 changes: 14 additions & 2 deletions pkg/scraper/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -273,10 +273,16 @@ func (c Cache) ScrapeID(ctx context.Context, scraperID string, id int, ty models
return nil, fmt.Errorf("scraper %s: unable to load scene id %v: %w", scraperID, id, err)
}

ret, err = ss.viaScene(ctx, c.client, scene)
// don't assign nil concrete pointer to ret interface, otherwise nil
// detection is harder
scraped, err := ss.viaScene(ctx, c.client, scene)
if err != nil {
return nil, fmt.Errorf("scraper %s: %w", scraperID, err)
}

if scraped != nil {
ret = scraped
}
case models.ScrapeContentTypeGallery:
gs, ok := s.(galleryScraper)
if !ok {
Expand All @@ -288,10 +294,16 @@ func (c Cache) ScrapeID(ctx context.Context, scraperID string, id int, ty models
return nil, fmt.Errorf("scraper %s: unable to load gallery id %v: %w", scraperID, id, err)
}

ret, err = gs.viaGallery(ctx, c.client, gallery)
// don't assign nil concrete pointer to ret interface, otherwise nil
// detection is harder
scraped, err := gs.viaGallery(ctx, c.client, gallery)
if err != nil {
return nil, fmt.Errorf("scraper %s: %w", scraperID, err)
}

if scraped != nil {
ret = scraped
}
}

return c.postScrape(ctx, ret)
Expand Down
5 changes: 4 additions & 1 deletion pkg/scraper/json.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,10 @@ func (q *jsonQuery) runQuery(selector string) ([]string, error) {
value := gjson.Get(q.doc, selector)

if !value.Exists() {
return nil, fmt.Errorf("could not find json path '%s' in json object", selector)
// many possible reasons why the selector may not be in the json object
// and not all are errors.
// Just return nil
return nil, nil
}

var ret []string
Expand Down
18 changes: 18 additions & 0 deletions pkg/scraper/json_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,4 +97,22 @@ jsonScrapers:
verifyField(t, "Some girls are so damn hot that they can get you bent out of shape, and you will not even be mad at them for doing so. Well, tawny blonde Mia Malkova can bend her body into any shape she pleases, and that’s sure to satisfy all of the horny cocks and wet pussies out there. This girl has acrobatic and contortionist abilities that could even twist a pretzel into a new knot, which can be very helpful in the ... arrow_drop_down Some girls are so damn hot that they can get you bent out of shape, and you will not even be mad at them for doing so. Well, tawny blonde Mia Malkova can bend her body into any shape she pleases, and that’s sure to satisfy all of the horny cocks and wet pussies out there. This girl has acrobatic and contortionist abilities that could even twist a pretzel into a new knot, which can be very helpful in the VR Porn movies – trust us. Ankles behind her neck and feet over her back so she can kiss her toes, turned, twisted and gyrating, she can fuck any which way she wants (and that ass!), will surely make you fall in love with this hot Virtual Reality Porn slut, as she is one of the finest of them all. Talking about perfection, maybe it’s all the acrobatic work that keeps it in such gorgeous shape? Who cares really, because you just want to take a big bite out of it and never let go. But it’s not all about the body. Mia’s also got a great smile, which might not sound kinky, but believe us, it is a smile that will heat up your innards and drop your pants. Is it her golden skin, her innocent pink lips or that heart-shaped face? There is just too much good stuff going on with Mia Malkova, which is maybe why these past few years have heaped awards upon awards on this Southern California native. Mia came to VR Bangers for her first VR Porn video, so you know she’s only going for top-notch scenes with top-game performers, men, and women. Better hit up that yoga studio if you ever dream of being able to bang a flexible and talented chick like lady Malkova. arrow_drop_up", scrapedPerformer.Details, "Details")
verifyField(t, "Blonde", scrapedPerformer.HairColor, "HairColor")
verifyField(t, "57", scrapedPerformer.Weight, "Weight")

notFoundJson := `
{
"data": null
}`

q = &jsonQuery{
doc: notFoundJson,
}

scrapedPerformer, err = performerScraper.scrapePerformer(context.Background(), q)
if err != nil {
t.Fatalf("Error scraping performer: %s", err.Error())
}

if scrapedPerformer != nil {
t.Errorf("expected nil scraped performer when not found, got %v", scrapedPerformer)
}
}
29 changes: 16 additions & 13 deletions pkg/scraper/mapped.go
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,7 @@ func (r mappedResults) setKey(index int, key string, value string) mappedResults
}

func (s mappedScraper) scrapePerformer(ctx context.Context, q mappedQuery) (*models.ScrapedPerformer, error) {
var ret models.ScrapedPerformer
var ret *models.ScrapedPerformer

performerMap := s.Performer
if performerMap == nil {
Expand All @@ -772,7 +772,8 @@ func (s mappedScraper) scrapePerformer(ctx context.Context, q mappedQuery) (*mod

results := performerMap.process(ctx, q, s.Common)
if len(results) > 0 {
results[0].apply(&ret)
ret = &models.ScrapedPerformer{}
results[0].apply(ret)

// now apply the tags
if performerTagsMap != nil {
Expand All @@ -787,7 +788,7 @@ func (s mappedScraper) scrapePerformer(ctx context.Context, q mappedQuery) (*mod
}
}

return &ret, nil
return ret, nil
}

func (s mappedScraper) scrapePerformers(ctx context.Context, q mappedQuery) ([]*models.ScrapedPerformer, error) {
Expand Down Expand Up @@ -903,7 +904,7 @@ func (s mappedScraper) scrapeScenes(ctx context.Context, q mappedQuery) ([]*mode
}

func (s mappedScraper) scrapeScene(ctx context.Context, q mappedQuery) (*models.ScrapedScene, error) {
var ret models.ScrapedScene
var ret *models.ScrapedScene

sceneScraperConfig := s.Scene
sceneMap := sceneScraperConfig.mappedConfig
Expand All @@ -914,15 +915,14 @@ func (s mappedScraper) scrapeScene(ctx context.Context, q mappedQuery) (*models.
logger.Debug(`Processing scene:`)
results := sceneMap.process(ctx, q, s.Common)
if len(results) > 0 {
ss := s.processScene(ctx, q, results[0])
ret = *ss
ret = s.processScene(ctx, q, results[0])
}

return &ret, nil
return ret, nil
}

func (s mappedScraper) scrapeGallery(ctx context.Context, q mappedQuery) (*models.ScrapedGallery, error) {
var ret models.ScrapedGallery
var ret *models.ScrapedGallery

galleryScraperConfig := s.Gallery
galleryMap := galleryScraperConfig.mappedConfig
Expand All @@ -937,7 +937,9 @@ func (s mappedScraper) scrapeGallery(ctx context.Context, q mappedQuery) (*model
logger.Debug(`Processing gallery:`)
results := galleryMap.process(ctx, q, s.Common)
if len(results) > 0 {
results[0].apply(&ret)
ret = &models.ScrapedGallery{}

results[0].apply(ret)

// now apply the performers and tags
if galleryPerformersMap != nil {
Expand Down Expand Up @@ -974,11 +976,11 @@ func (s mappedScraper) scrapeGallery(ctx context.Context, q mappedQuery) (*model
}
}

return &ret, nil
return ret, nil
}

func (s mappedScraper) scrapeMovie(ctx context.Context, q mappedQuery) (*models.ScrapedMovie, error) {
var ret models.ScrapedMovie
var ret *models.ScrapedMovie

movieScraperConfig := s.Movie
movieMap := movieScraperConfig.mappedConfig
Expand All @@ -990,7 +992,8 @@ func (s mappedScraper) scrapeMovie(ctx context.Context, q mappedQuery) (*models.

results := movieMap.process(ctx, q, s.Common)
if len(results) > 0 {
results[0].apply(&ret)
ret = &models.ScrapedMovie{}
results[0].apply(ret)

if movieStudioMap != nil {
logger.Debug(`Processing movie studio:`)
Expand All @@ -1004,5 +1007,5 @@ func (s mappedScraper) scrapeMovie(ctx context.Context, q mappedQuery) (*models.
}
}

return &ret, nil
return ret, nil
}
24 changes: 12 additions & 12 deletions pkg/scraper/script.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,21 +173,21 @@ func (s *scriptScraper) scrapeByURL(ctx context.Context, url string, ty models.S
func (s *scriptScraper) scrape(ctx context.Context, input string, ty models.ScrapeContentType) (models.ScrapedContent, error) {
switch ty {
case models.ScrapeContentTypePerformer:
var performer models.ScrapedPerformer
var performer *models.ScrapedPerformer
err := s.runScraperScript(input, &performer)
return &performer, err
return performer, err
case models.ScrapeContentTypeGallery:
var gallery models.ScrapedGallery
var gallery *models.ScrapedGallery
err := s.runScraperScript(input, &gallery)
return &gallery, err
return gallery, err
case models.ScrapeContentTypeScene:
var scene models.ScrapedScene
var scene *models.ScrapedScene
err := s.runScraperScript(input, &scene)
return &scene, err
return scene, err
case models.ScrapeContentTypeMovie:
var movie models.ScrapedMovie
var movie *models.ScrapedMovie
err := s.runScraperScript(input, &movie)
return &movie, err
return movie, err
}

return nil, ErrNotSupported
Expand All @@ -200,11 +200,11 @@ func (s *scriptScraper) scrapeSceneByScene(ctx context.Context, scene *models.Sc
return nil, err
}

var ret models.ScrapedScene
var ret *models.ScrapedScene

err = s.runScraperScript(string(inString), &ret)

return &ret, err
return ret, err
}

func (s *scriptScraper) scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) {
Expand All @@ -214,11 +214,11 @@ func (s *scriptScraper) scrapeGalleryByGallery(ctx context.Context, gallery *mod
return nil, err
}

var ret models.ScrapedGallery
var ret *models.ScrapedGallery

err = s.runScraperScript(string(inString), &ret)

return &ret, err
return ret, err
}

func findPythonExecutable() (string, error) {
Expand Down
3 changes: 3 additions & 0 deletions ui/v2.5/src/components/Changelog/versions/v0140.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,7 @@
* Improved autotag performance. ([#2368](https://github.com/stashapp/stash/pull/2368))

### 🐛 Bug fixes
* Removed warnings and incorrect error message in json scrapers. ([#2375](https://github.com/stashapp/stash/pull/2375))
* Ensure identify continues using other scrapers if a scrape returns no results. ([#2375](https://github.com/stashapp/stash/pull/2375))
* Continue trying to identify scene if scraper fails. ([#2375](https://github.com/stashapp/stash/pull/2375))
* Fix auto-tag not using case-insensitive matching. ([#2378](https://github.com/stashapp/stash/pull/2378))