Skip to content

Commit

Permalink
Query url parameters (#878)
Browse files Browse the repository at this point in the history
  • Loading branch information
WithoutPants authored Oct 22, 2020
1 parent 228a5c5 commit 109e55a
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 29 deletions.
3 changes: 2 additions & 1 deletion pkg/scraper/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,8 @@ type scraperTypeConfig struct {
Scraper string `yaml:"scraper"`

// for xpath name scraper only
QueryURL string `yaml:"queryURL"`
QueryURL string `yaml:"queryURL"`
QueryURLReplacements queryURLReplacements `yaml:"queryURLReplace"`
}

func (c scraperTypeConfig) validate() error {
Expand Down
13 changes: 11 additions & 2 deletions pkg/scraper/json.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,11 @@ func (s *jsonScraper) scrapeSceneByFragment(scene models.SceneUpdateInput) (*mod
}

// construct the URL
url := constructSceneURL(s.scraper.QueryURL, storedScene)
queryURL := queryURLParametersFromScene(storedScene)
if s.scraper.QueryURLReplacements != nil {
queryURL.applyReplacements(s.scraper.QueryURLReplacements)
}
url := queryURL.constructURL(s.scraper.QueryURL)

scraper := s.getJsonScraper()

Expand Down Expand Up @@ -176,7 +180,12 @@ func (s *jsonScraper) scrapeGalleryByFragment(gallery models.GalleryUpdateInput)
return nil, errors.New("no scene found")
}

url := constructGalleryURL(s.scraper.QueryURL, storedGallery)
// construct the URL
queryURL := queryURLParametersFromGallery(storedGallery)
if s.scraper.QueryURLReplacements != nil {
queryURL.applyReplacements(s.scraper.QueryURLReplacements)
}
url := queryURL.constructURL(s.scraper.QueryURL)

scraper := s.getJsonScraper()

Expand Down
51 changes: 51 additions & 0 deletions pkg/scraper/query_url.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package scraper

import (
"path/filepath"
"strings"

"github.com/stashapp/stash/pkg/models"
)

type queryURLReplacements map[string]mappedRegexConfigs

type queryURLParameters map[string]string

func queryURLParametersFromScene(scene *models.Scene) queryURLParameters {
ret := make(queryURLParameters)
ret["checksum"] = scene.Checksum.String
ret["oshash"] = scene.OSHash.String
ret["filename"] = filepath.Base(scene.Path)
ret["title"] = scene.Title.String
return ret
}

func queryURLParametersFromGallery(gallery *models.Gallery) queryURLParameters {
ret := make(queryURLParameters)
ret["checksum"] = gallery.Checksum

if gallery.Path.Valid {
ret["filename"] = filepath.Base(gallery.Path.String)
}
ret["title"] = gallery.Title.String

return ret
}

func (p queryURLParameters) applyReplacements(r queryURLReplacements) {
for k, v := range p {
rpl, found := r[k]
if found {
p[k] = rpl.apply(v)
}
}
}

func (p queryURLParameters) constructURL(url string) string {
ret := url
for k, v := range p {
ret = strings.Replace(ret, "{"+k+"}", v, -1)
}

return ret
}
23 changes: 0 additions & 23 deletions pkg/scraper/url.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"net/http"
"net/http/cookiejar"
"os"
"path/filepath"
"strings"
"time"

Expand All @@ -19,7 +18,6 @@ import (
"github.com/chromedp/chromedp"
jsoniter "github.com/json-iterator/go"
"github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/models"
"golang.org/x/net/html/charset"
"golang.org/x/net/publicsuffix"
)
Expand All @@ -28,27 +26,6 @@ import (
// configurable at some point.
const scrapeGetTimeout = time.Second * 30

func constructSceneURL(url string, scene *models.Scene) string {
// support checksum, title and filename
ret := strings.Replace(url, "{checksum}", scene.Checksum.String, -1)
ret = strings.Replace(url, "{oshash}", scene.OSHash.String, -1)
ret = strings.Replace(ret, "{filename}", filepath.Base(scene.Path), -1)
ret = strings.Replace(ret, "{title}", scene.Title.String, -1)

return ret
}

func constructGalleryURL(url string, gallery *models.Gallery) string {
// support checksum, title and filename
ret := strings.Replace(url, "{checksum}", gallery.Checksum, -1)
if gallery.Path.Valid {
ret = strings.Replace(url, "{filename}", filepath.Base(gallery.Path.String), -1)
}
ret = strings.Replace(url, "{title}", gallery.Title.String, -1)

return ret
}

func loadURL(url string, scraperConfig config, globalConfig GlobalConfig) (io.Reader, error) {
driverOptions := scraperConfig.DriverOptions
if driverOptions != nil && driverOptions.UseCDP {
Expand Down
12 changes: 10 additions & 2 deletions pkg/scraper/xpath.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,11 @@ func (s *xpathScraper) scrapeSceneByFragment(scene models.SceneUpdateInput) (*mo
}

// construct the URL
url := constructSceneURL(s.scraper.QueryURL, storedScene)
queryURL := queryURLParametersFromScene(storedScene)
if s.scraper.QueryURLReplacements != nil {
queryURL.applyReplacements(s.scraper.QueryURLReplacements)
}
url := queryURL.constructURL(s.scraper.QueryURL)

scraper := s.getXpathScraper()

Expand Down Expand Up @@ -158,7 +162,11 @@ func (s *xpathScraper) scrapeGalleryByFragment(gallery models.GalleryUpdateInput
}

// construct the URL
url := constructGalleryURL(s.scraper.QueryURL, storedGallery)
queryURL := queryURLParametersFromGallery(storedGallery)
if s.scraper.QueryURLReplacements != nil {
queryURL.applyReplacements(s.scraper.QueryURLReplacements)
}
url := queryURL.constructURL(s.scraper.QueryURL)

scraper := s.getXpathScraper()

Expand Down
1 change: 1 addition & 0 deletions ui/v2.5/src/components/Changelog/versions/v040.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* Add selective scene export.

### 🎨 Improvements
* Add support for query URL parameter regex replacement when scraping by query URL.
* Include empty fields in isMissing filter
* Show static image on scene wall if preview video is missing.
* Add path filter to scene and gallery query.
Expand Down
10 changes: 9 additions & 1 deletion ui/v2.5/src/docs/en/Scraping.md
Original file line number Diff line number Diff line change
Expand Up @@ -216,15 +216,23 @@ For `sceneByFragment`, the `queryURL` field must also be present. This field is
* `{filename}` - the base filename of the scene
* `{title}` - the title of the scene

These placeholder field values may be manipulated with regex replacements by adding a `queryURLReplace` section, containing a map of placeholder field to regex configuration which uses the same format as the `replace` post-process action covered below.

For example:

```
sceneByFragment:
action: scrapeJson
queryURL: https://metadataapi.net/api/scenes?parse={filename}&limit=1
scraper: sceneQueryScraper
queryURL: https://metadataapi.net/api/scenes?parse={filename}&limit=1
queryURLReplace:
filename:
- regex: <some regex>
with: <replacement>
```

The above configuration would scrape from the value of `queryURL`, replacing `{filename}` with the base filename of the scene, after it has been manipulated by the regex replacements.

### Xpath and JSON scrapers configuration

The top-level `xPathScrapers` field contains xpath scraping configurations, freely named. These are referenced in the `scraper` field for `scrapeXPath` scrapers.
Expand Down

0 comments on commit 109e55a

Please sign in to comment.