Skip to content

Commit

Permalink
Add image scraping support (#370)
Browse files Browse the repository at this point in the history
* Add sub-scraper functionality
* Add scraping of performer image
* Add scene cover image scraping
* Port UI changes to v2.5
* Fix v2.5 dialog suggest color
* Don't convert eol of UI to support pretty
  • Loading branch information
WithoutPants authored Mar 11, 2020
1 parent 5fb8bbf commit 34d8293
Show file tree
Hide file tree
Showing 21 changed files with 665 additions and 15 deletions.
3 changes: 2 additions & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
go.mod text eol=lf
go.sum text eol=lf
go.sum text eol=lf
ui/v2.5/** -text
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ require (
github.com/h2non/filetype v1.0.8
// this is required for generate
github.com/inconshreveable/mousetrap v1.0.0 // indirect
github.com/jinzhu/copier v0.0.0-20190924061706-b57f9002281a
github.com/jmoiron/sqlx v1.2.0
github.com/mattn/go-sqlite3 v1.10.0
github.com/rs/cors v1.6.0
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,8 @@ github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANyt
github.com/jackc/fake v0.0.0-20150926172116-812a484cc733/go.mod h1:WrMFNQdiFJ80sQsxDoMokWK1W5TQtxBFNpzWTD84ibQ=
github.com/jackc/pgx v3.2.0+incompatible/go.mod h1:0ZGrqGqkRlliWnWB4zKnWtjbSWbGkVEFm4TeybAXq+I=
github.com/jellevandenhooff/dkim v0.0.0-20150330215556-f50fe3d243e1/go.mod h1:E0B/fFc00Y+Rasa88328GlI/XbtyysCtTHZS8h7IrBU=
github.com/jinzhu/copier v0.0.0-20190924061706-b57f9002281a h1:zPPuIq2jAWWPTrGt70eK/BSch+gFAGrNzecsoENgu2o=
github.com/jinzhu/copier v0.0.0-20190924061706-b57f9002281a/go.mod h1:yL958EeXv8Ylng6IfnvG4oflryUi3vgA3xPs9hmII1s=
github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k=
github.com/jmoiron/sqlx v0.0.0-20180614180643-0dae4fefe7c0/go.mod h1:IiEW3SEiiErVyFdH8NTuWjSifiEQKUoyK3LNqr2kCHU=
github.com/jmoiron/sqlx v1.2.0 h1:41Ip0zITnmWNR/vHV+S4m+VoUivnWY5E4OJfLZjCJMA=
Expand Down
2 changes: 2 additions & 0 deletions graphql/documents/data/scrapers.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ fragment ScrapedPerformerData on ScrapedPerformer {
tattoos
piercings
aliases
image
}

fragment ScrapedScenePerformerData on ScrapedScenePerformer {
Expand Down Expand Up @@ -75,6 +76,7 @@ fragment ScrapedSceneData on ScrapedScene {
details
url
date
image

file {
size
Expand Down
5 changes: 5 additions & 0 deletions graphql/schema/types/scraped-performer.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ type ScrapedPerformer {
tattoos: String
piercings: String
aliases: String

"""This should be base64 encoded"""
image: String
}

input ScrapedPerformerInput {
Expand All @@ -33,4 +36,6 @@ input ScrapedPerformerInput {
tattoos: String
piercings: String
aliases: String

# not including image for the input
}
3 changes: 3 additions & 0 deletions graphql/schema/types/scraper.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ type ScrapedScene {
url: String
date: String

"""This should be base64 encoded"""
image: String

file: SceneFileType # Resolver

studio: ScrapedSceneStudio
Expand Down
34 changes: 34 additions & 0 deletions pkg/models/model_scraped_item.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,20 +39,54 @@ type ScrapedPerformer struct {
Tattoos *string `graphql:"tattoos" json:"tattoos"`
Piercings *string `graphql:"piercings" json:"piercings"`
Aliases *string `graphql:"aliases" json:"aliases"`
Image *string `graphql:"image" json:"image"`
}

// this type has no Image field
type ScrapedPerformerStash struct {
Name *string `graphql:"name" json:"name"`
URL *string `graphql:"url" json:"url"`
Twitter *string `graphql:"twitter" json:"twitter"`
Instagram *string `graphql:"instagram" json:"instagram"`
Birthdate *string `graphql:"birthdate" json:"birthdate"`
Ethnicity *string `graphql:"ethnicity" json:"ethnicity"`
Country *string `graphql:"country" json:"country"`
EyeColor *string `graphql:"eye_color" json:"eye_color"`
Height *string `graphql:"height" json:"height"`
Measurements *string `graphql:"measurements" json:"measurements"`
FakeTits *string `graphql:"fake_tits" json:"fake_tits"`
CareerLength *string `graphql:"career_length" json:"career_length"`
Tattoos *string `graphql:"tattoos" json:"tattoos"`
Piercings *string `graphql:"piercings" json:"piercings"`
Aliases *string `graphql:"aliases" json:"aliases"`
}

type ScrapedScene struct {
Title *string `graphql:"title" json:"title"`
Details *string `graphql:"details" json:"details"`
URL *string `graphql:"url" json:"url"`
Date *string `graphql:"date" json:"date"`
Image *string `graphql:"image" json:"image"`
File *SceneFileType `graphql:"file" json:"file"`
Studio *ScrapedSceneStudio `graphql:"studio" json:"studio"`
Movies []*ScrapedSceneMovie `graphql:"movies" json:"movies"`
Tags []*ScrapedSceneTag `graphql:"tags" json:"tags"`
Performers []*ScrapedScenePerformer `graphql:"performers" json:"performers"`
}

// stash doesn't return image, and we need id
type ScrapedSceneStash struct {
ID string `graphql:"id" json:"id"`
Title *string `graphql:"title" json:"title"`
Details *string `graphql:"details" json:"details"`
URL *string `graphql:"url" json:"url"`
Date *string `graphql:"date" json:"date"`
File *SceneFileType `graphql:"file" json:"file"`
Studio *ScrapedSceneStudio `graphql:"studio" json:"studio"`
Tags []*ScrapedSceneTag `graphql:"tags" json:"tags"`
Performers []*ScrapedScenePerformer `graphql:"performers" json:"performers"`
}

type ScrapedScenePerformer struct {
// Set if performer matched
ID *string `graphql:"id" json:"id"`
Expand Down
84 changes: 84 additions & 0 deletions pkg/scraper/image.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package scraper

import (
"io/ioutil"
"net/http"
"strings"
"time"

"github.com/stashapp/stash/pkg/models"
"github.com/stashapp/stash/pkg/utils"
)

// Timeout to get the image. Includes transfer time. May want to make this
// configurable at some point.
const imageGetTimeout = time.Second * 30

func setPerformerImage(p *models.ScrapedPerformer) error {
if p == nil || p.Image == nil || !strings.HasPrefix(*p.Image, "http") {
// nothing to do
return nil
}

img, err := getImage(*p.Image)
if err != nil {
return err
}

p.Image = img

return nil
}

func setSceneImage(s *models.ScrapedScene) error {
// don't try to get the image if it doesn't appear to be a URL
if s == nil || s.Image == nil || !strings.HasPrefix(*s.Image, "http") {
// nothing to do
return nil
}

img, err := getImage(*s.Image)
if err != nil {
return err
}

s.Image = img

return nil
}

func getImage(url string) (*string, error) {
client := &http.Client{
Timeout: imageGetTimeout,
}

// assume is a URL for now
resp, err := client.Get(url)
if err != nil {
return nil, err
}

defer resp.Body.Close()

body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, err
}

// determine the image type and set the base64 type
contentType := resp.Header.Get("Content-Type")
if contentType == "" {
contentType = http.DetectContentType(body)
}

img := "data:" + contentType + ";base64," + utils.GetBase64StringFromData(body)
return &img, nil
}

func getStashPerformerImage(stashURL string, performerID string) (*string, error) {
return getImage(stashURL + "/performer/" + performerID + "/image")
}

func getStashSceneImage(stashURL string, sceneID string) (*string, error) {
return getImage(stashURL + "/scene/" + sceneID + "/screenshot")
}
29 changes: 27 additions & 2 deletions pkg/scraper/scrapers.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,17 @@ func ScrapePerformer(scraperID string, scrapedPerformer models.ScrapedPerformerI
// find scraper with the provided id
s := findScraper(scraperID)
if s != nil {
return s.ScrapePerformer(scrapedPerformer)
ret, err := s.ScrapePerformer(scrapedPerformer)
if err != nil {
return nil, err
}

// post-process - set the image if applicable
if err := setPerformerImage(ret); err != nil {
logger.Warnf("Could not set image using URL %s: %s", *ret.Image, err.Error())
}

return ret, nil
}

return nil, errors.New("Scraper with ID " + scraperID + " not found")
Expand All @@ -117,7 +127,17 @@ func ScrapePerformer(scraperID string, scrapedPerformer models.ScrapedPerformerI
func ScrapePerformerURL(url string) (*models.ScrapedPerformer, error) {
for _, s := range scrapers {
if s.matchesPerformerURL(url) {
return s.ScrapePerformerURL(url)
ret, err := s.ScrapePerformerURL(url)
if err != nil {
return nil, err
}

// post-process - set the image if applicable
if err := setPerformerImage(ret); err != nil {
logger.Warnf("Could not set image using URL %s: %s", *ret.Image, err.Error())
}

return ret, nil
}
}

Expand Down Expand Up @@ -228,6 +248,11 @@ func postScrapeScene(ret *models.ScrapedScene) error {
}
}

// post-process - set the image if applicable
if err := setSceneImage(ret); err != nil {
logger.Warnf("Could not set image using URL %s: %s", *ret.Image, err.Error())
}

return nil
}

Expand Down
39 changes: 34 additions & 5 deletions pkg/scraper/stash.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"strconv"

"github.com/jinzhu/copier"
"github.com/shurcooL/graphql"

"github.com/stashapp/stash/pkg/models"
Expand Down Expand Up @@ -67,20 +68,35 @@ func scrapePerformerFragmentStash(c scraperTypeConfig, scrapedPerformer models.S
client := getStashClient(c)

var q struct {
FindPerformer *models.ScrapedPerformer `graphql:"findPerformer(id: $f)"`
FindPerformer *models.ScrapedPerformerStash `graphql:"findPerformer(id: $f)"`
}

performerID := *scrapedPerformer.URL

// get the id from the URL field
vars := map[string]interface{}{
"f": *scrapedPerformer.URL,
"f": performerID,
}

err := client.Query(context.Background(), &q, vars)
if err != nil {
return nil, err
}

return q.FindPerformer, nil
// need to copy back to a scraped performer
ret := models.ScrapedPerformer{}
err = copier.Copy(&ret, q.FindPerformer)
if err != nil {
return nil, err
}

// get the performer image directly
ret.Image, err = getStashPerformerImage(c.scraperConfig.StashServer.URL, performerID)
if err != nil {
return nil, err
}

return &ret, nil
}

func scrapeSceneFragmentStash(c scraperTypeConfig, scene models.SceneUpdateInput) (*models.ScrapedScene, error) {
Expand All @@ -99,7 +115,7 @@ func scrapeSceneFragmentStash(c scraperTypeConfig, scene models.SceneUpdateInput
}

var q struct {
FindScene *models.ScrapedScene `graphql:"findScene(checksum: $c)"`
FindScene *models.ScrapedSceneStash `graphql:"findScene(checksum: $c)"`
}

checksum := graphql.String(storedScene.Checksum)
Expand Down Expand Up @@ -128,5 +144,18 @@ func scrapeSceneFragmentStash(c scraperTypeConfig, scene models.SceneUpdateInput
}
}

return q.FindScene, nil
// need to copy back to a scraped scene
ret := models.ScrapedScene{}
err = copier.Copy(&ret, q.FindScene)
if err != nil {
return nil, err
}

// get the performer image directly
ret.Image, err = getStashSceneImage(c.scraperConfig.StashServer.URL, q.FindScene.ID)
if err != nil {
return nil, err
}

return &ret, nil
}
Loading

0 comments on commit 34d8293

Please sign in to comment.