-
-
Notifications
You must be signed in to change notification settings - Fork 803
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add support for scene fragment scrape in xpath
- Loading branch information
1 parent
470a2b5
commit 7158e83
Showing
28 changed files
with
5,005 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,191 @@ | ||
package scraper | ||
|
||
import ( | ||
"errors" | ||
"io/ioutil" | ||
"net/url" | ||
"strings" | ||
|
||
"github.com/stashapp/stash/pkg/logger" | ||
"github.com/stashapp/stash/pkg/models" | ||
"github.com/tidwall/gjson" | ||
) | ||
|
||
type jsonScraper struct { | ||
scraper scraperTypeConfig | ||
config config | ||
globalConfig GlobalConfig | ||
} | ||
|
||
func newJsonScraper(scraper scraperTypeConfig, config config, globalConfig GlobalConfig) *jsonScraper { | ||
return &jsonScraper{ | ||
scraper: scraper, | ||
config: config, | ||
globalConfig: globalConfig, | ||
} | ||
} | ||
|
||
func (s *jsonScraper) getJsonScraper() *mappedScraper { | ||
return s.config.JsonScrapers[s.scraper.Scraper] | ||
} | ||
|
||
func (s *jsonScraper) scrapeURL(url string) (string, *mappedScraper, error) { | ||
scraper := s.getJsonScraper() | ||
|
||
if scraper == nil { | ||
return "", nil, errors.New("json scraper with name " + s.scraper.Scraper + " not found in config") | ||
} | ||
|
||
doc, err := s.loadURL(url) | ||
|
||
if err != nil { | ||
return "", nil, err | ||
} | ||
|
||
return doc, scraper, nil | ||
} | ||
|
||
func (s *jsonScraper) loadURL(url string) (string, error) { | ||
r, err := loadURL(url, s.config, s.globalConfig) | ||
if err != nil { | ||
return "", err | ||
} | ||
|
||
doc, err := ioutil.ReadAll(r) | ||
if err != nil { | ||
return "", err | ||
} | ||
|
||
docStr := string(doc) | ||
if !gjson.Valid(docStr) { | ||
return "", errors.New("not valid json") | ||
} | ||
|
||
if err == nil && s.config.DebugOptions != nil && s.config.DebugOptions.PrintHTML { | ||
logger.Infof("loadURL (%s) response: \n%s", url, docStr) | ||
} | ||
|
||
return docStr, err | ||
} | ||
|
||
func (s *jsonScraper) scrapePerformerByURL(url string) (*models.ScrapedPerformer, error) { | ||
doc, scraper, err := s.scrapeURL(url) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
q := s.getJsonQuery(doc) | ||
return scraper.scrapePerformer(q) | ||
} | ||
|
||
func (s *jsonScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error) { | ||
doc, scraper, err := s.scrapeURL(url) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
q := s.getJsonQuery(doc) | ||
return scraper.scrapeScene(q) | ||
} | ||
|
||
func (s *jsonScraper) scrapePerformersByName(name string) ([]*models.ScrapedPerformer, error) { | ||
scraper := s.getJsonScraper() | ||
|
||
if scraper == nil { | ||
return nil, errors.New("json scraper with name " + s.scraper.Scraper + " not found in config") | ||
} | ||
|
||
const placeholder = "{}" | ||
|
||
// replace the placeholder string with the URL-escaped name | ||
escapedName := url.QueryEscape(name) | ||
|
||
url := s.scraper.QueryURL | ||
url = strings.Replace(url, placeholder, escapedName, -1) | ||
|
||
doc, err := s.loadURL(url) | ||
|
||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
q := s.getJsonQuery(doc) | ||
return scraper.scrapePerformers(q) | ||
} | ||
|
||
func (s *jsonScraper) scrapePerformerByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) { | ||
return nil, errors.New("scrapePerformerByFragment not supported for json scraper") | ||
} | ||
|
||
func (s *jsonScraper) scrapeSceneByFragment(scene models.SceneUpdateInput) (*models.ScrapedScene, error) { | ||
storedScene, err := sceneFromUpdateFragment(scene) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
if storedScene == nil { | ||
return nil, errors.New("no scene found") | ||
} | ||
|
||
// construct the URL | ||
url := constructSceneURL(s.scraper.QueryURL, storedScene) | ||
|
||
scraper := s.getJsonScraper() | ||
|
||
if scraper == nil { | ||
return nil, errors.New("json scraper with name " + s.scraper.Scraper + " not found in config") | ||
} | ||
|
||
doc, err := s.loadURL(url) | ||
|
||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
q := s.getJsonQuery(doc) | ||
return scraper.scrapeScene(q) | ||
} | ||
|
||
func (s *jsonScraper) getJsonQuery(doc string) *jsonQuery { | ||
return &jsonQuery{ | ||
doc: doc, | ||
scraper: s, | ||
} | ||
} | ||
|
||
type jsonQuery struct { | ||
doc string | ||
scraper *jsonScraper | ||
} | ||
|
||
func (q *jsonQuery) runQuery(selector string) []string { | ||
value := gjson.Get(q.doc, selector) | ||
|
||
if !value.Exists() { | ||
logger.Warnf("Could not find json path '%s' in json object", selector) | ||
return nil | ||
} | ||
|
||
var ret []string | ||
if value.IsArray() { | ||
value.ForEach(func(k, v gjson.Result) bool { | ||
ret = append(ret, v.String()) | ||
return true | ||
}) | ||
} else { | ||
ret = append(ret, value.String()) | ||
} | ||
|
||
return ret | ||
} | ||
|
||
func (q *jsonQuery) subScrape(value string) mappedQuery { | ||
doc, err := q.scraper.loadURL(value) | ||
|
||
if err != nil { | ||
logger.Warnf("Error getting URL '%s' for sub-scraper: %s", value, err.Error()) | ||
return nil | ||
} | ||
|
||
return q.scraper.getJsonQuery(doc) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
package scraper | ||
|
||
import ( | ||
"testing" | ||
|
||
"gopkg.in/yaml.v2" | ||
) | ||
|
||
func TestJsonPerformerScraper(t *testing.T) { | ||
const yamlStr = `name: Test | ||
jsonScrapers: | ||
performerScraper: | ||
common: | ||
$extras: data.extras | ||
performer: | ||
Name: data.name | ||
Gender: $extras.gender | ||
Birthdate: $extras.birthday | ||
Ethnicity: $extras.ethnicity | ||
Height: $extras.height | ||
Measurements: $extras.measurements | ||
Tattoos: $extras.tattoos | ||
Piercings: $extras.piercings | ||
Aliases: data.aliases | ||
Image: data.image | ||
` | ||
|
||
const json = ` | ||
{ | ||
"data": { | ||
"id": "2cd4146b-637d-49b1-8ff9-19d4a06947bb", | ||
"name": "Mia Malkova", | ||
"bio": "Some girls are so damn hot that they can get you bent out of shape, and you will not even be mad at them for doing so. Well, tawny blonde Mia Malkova can bend her body into any shape she pleases, and that’s sure to satisfy all of the horny cocks and wet pussies out there. This girl has acrobatic and contortionist abilities that could even twist a pretzel into a new knot, which can be very helpful in the ... arrow_drop_down Some girls are so damn hot that they can get you bent out of shape, and you will not even be mad at them for doing so. Well, tawny blonde Mia Malkova can bend her body into any shape she pleases, and that’s sure to satisfy all of the horny cocks and wet pussies out there. This girl has acrobatic and contortionist abilities that could even twist a pretzel into a new knot, which can be very helpful in the VR Porn movies – trust us. Ankles behind her neck and feet over her back so she can kiss her toes, turned, twisted and gyrating, she can fuck any which way she wants (and that ass!), will surely make you fall in love with this hot Virtual Reality Porn slut, as she is one of the finest of them all. Talking about perfection, maybe it’s all the acrobatic work that keeps it in such gorgeous shape? Who cares really, because you just want to take a big bite out of it and never let go. But it’s not all about the body. Mia’s also got a great smile, which might not sound kinky, but believe us, it is a smile that will heat up your innards and drop your pants. Is it her golden skin, her innocent pink lips or that heart-shaped face? There is just too much good stuff going on with Mia Malkova, which is maybe why these past few years have heaped awards upon awards on this Southern California native. Mia came to VR Bangers for her first VR Porn video, so you know she’s only going for top-notch scenes with top-game performers, men, and women. Better hit up that yoga studio if you ever dream of being able to bang a flexible and talented chick like lady Malkova. arrow_drop_up", | ||
"extras": { | ||
"gender": "Female", | ||
"birthday": "1992-07-01", | ||
"birthday_timestamp": 709948800, | ||
"birthplace": "Palm Springs, California, United States", | ||
"active": 1, | ||
"astrology": "Cancer (Jun 21 - Jul 22)", | ||
"ethnicity": "Caucasian", | ||
"nationality": "United States", | ||
"hair_colour": "Blonde", | ||
"weight": "126 lbs (or 57 kg)", | ||
"height": "5'6\" (or 167 cm)", | ||
"measurements": "34-26-36", | ||
"cupsize": "34C (75C)", | ||
"tattoos": "None", | ||
"piercings": "Navel", | ||
"first_seen": null | ||
}, | ||
"aliases": [ | ||
"Mia Bliss", | ||
"Madison Clover", | ||
"Madison Swan", | ||
"Mia Mountain", | ||
"Mia M.", | ||
"Mia Malvoka", | ||
"Mia Molkova", | ||
"Mia Thomas" | ||
], | ||
"image": "https:\/\/thumb.metadataapi.net\/unsafe\/1000x1500\/smart\/filters:sharpen():upscale()\/https%3A%2F%2Fcdn.metadataapi.net%2Fperformer%2F49%2F05%2F30%2Fade2255dc065032a89ebb23f0e038fa%2Fposter%2Fmia-malkova.jpg%3Fid1582610531" | ||
} | ||
} | ||
` | ||
|
||
c := &config{} | ||
err := yaml.Unmarshal([]byte(yamlStr), &c) | ||
|
||
if err != nil { | ||
t.Fatalf("Error loading yaml: %s", err.Error()) | ||
} | ||
|
||
// perform scrape using json string | ||
performerScraper := c.JsonScrapers["performerScraper"] | ||
|
||
q := &jsonQuery{ | ||
doc: json, | ||
} | ||
|
||
scrapedPerformer, err := performerScraper.scrapePerformer(q) | ||
if err != nil { | ||
t.Fatalf("Error scraping performer: %s", err.Error()) | ||
} | ||
|
||
verifyField(t, "Mia Malkova", scrapedPerformer.Name, "Name") | ||
verifyField(t, "Female", scrapedPerformer.Gender, "Gender") | ||
verifyField(t, "1992-07-01", scrapedPerformer.Birthdate, "Birthdate") | ||
verifyField(t, "Caucasian", scrapedPerformer.Ethnicity, "Ethnicity") | ||
verifyField(t, "5'6\" (or 167 cm)", scrapedPerformer.Height, "Height") | ||
verifyField(t, "None", scrapedPerformer.Tattoos, "Tattoos") | ||
verifyField(t, "Navel", scrapedPerformer.Piercings, "Piercings") | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.