From 109e55a25aeaeb4d41f6c60291fd727c30045ba9 Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Thu, 22 Oct 2020 11:56:04 +1100 Subject: [PATCH] Query url parameters (#878) --- pkg/scraper/config.go | 3 +- pkg/scraper/json.go | 13 ++++- pkg/scraper/query_url.go | 51 +++++++++++++++++++ pkg/scraper/url.go | 23 --------- pkg/scraper/xpath.go | 12 ++++- .../src/components/Changelog/versions/v040.md | 1 + ui/v2.5/src/docs/en/Scraping.md | 10 +++- 7 files changed, 84 insertions(+), 29 deletions(-) create mode 100644 pkg/scraper/query_url.go diff --git a/pkg/scraper/config.go b/pkg/scraper/config.go index a9a25c249d1..4dca0f58e87 100644 --- a/pkg/scraper/config.go +++ b/pkg/scraper/config.go @@ -114,7 +114,8 @@ type scraperTypeConfig struct { Scraper string `yaml:"scraper"` // for xpath name scraper only - QueryURL string `yaml:"queryURL"` + QueryURL string `yaml:"queryURL"` + QueryURLReplacements queryURLReplacements `yaml:"queryURLReplace"` } func (c scraperTypeConfig) validate() error { diff --git a/pkg/scraper/json.go b/pkg/scraper/json.go index 1d5d4db1ea4..00590f5fe9b 100644 --- a/pkg/scraper/json.go +++ b/pkg/scraper/json.go @@ -148,7 +148,11 @@ func (s *jsonScraper) scrapeSceneByFragment(scene models.SceneUpdateInput) (*mod } // construct the URL - url := constructSceneURL(s.scraper.QueryURL, storedScene) + queryURL := queryURLParametersFromScene(storedScene) + if s.scraper.QueryURLReplacements != nil { + queryURL.applyReplacements(s.scraper.QueryURLReplacements) + } + url := queryURL.constructURL(s.scraper.QueryURL) scraper := s.getJsonScraper() @@ -176,7 +180,12 @@ func (s *jsonScraper) scrapeGalleryByFragment(gallery models.GalleryUpdateInput) return nil, errors.New("no scene found") } - url := constructGalleryURL(s.scraper.QueryURL, storedGallery) + // construct the URL + queryURL := queryURLParametersFromGallery(storedGallery) + if s.scraper.QueryURLReplacements != nil { + queryURL.applyReplacements(s.scraper.QueryURLReplacements) + } + url := queryURL.constructURL(s.scraper.QueryURL) scraper := s.getJsonScraper() diff --git a/pkg/scraper/query_url.go b/pkg/scraper/query_url.go new file mode 100644 index 00000000000..517df5ac21b --- /dev/null +++ b/pkg/scraper/query_url.go @@ -0,0 +1,51 @@ +package scraper + +import ( + "path/filepath" + "strings" + + "github.com/stashapp/stash/pkg/models" +) + +type queryURLReplacements map[string]mappedRegexConfigs + +type queryURLParameters map[string]string + +func queryURLParametersFromScene(scene *models.Scene) queryURLParameters { + ret := make(queryURLParameters) + ret["checksum"] = scene.Checksum.String + ret["oshash"] = scene.OSHash.String + ret["filename"] = filepath.Base(scene.Path) + ret["title"] = scene.Title.String + return ret +} + +func queryURLParametersFromGallery(gallery *models.Gallery) queryURLParameters { + ret := make(queryURLParameters) + ret["checksum"] = gallery.Checksum + + if gallery.Path.Valid { + ret["filename"] = filepath.Base(gallery.Path.String) + } + ret["title"] = gallery.Title.String + + return ret +} + +func (p queryURLParameters) applyReplacements(r queryURLReplacements) { + for k, v := range p { + rpl, found := r[k] + if found { + p[k] = rpl.apply(v) + } + } +} + +func (p queryURLParameters) constructURL(url string) string { + ret := url + for k, v := range p { + ret = strings.Replace(ret, "{"+k+"}", v, -1) + } + + return ret +} diff --git a/pkg/scraper/url.go b/pkg/scraper/url.go index 52a57a55b2d..fa4ae44c4fe 100644 --- a/pkg/scraper/url.go +++ b/pkg/scraper/url.go @@ -10,7 +10,6 @@ import ( "net/http" "net/http/cookiejar" "os" - "path/filepath" "strings" "time" @@ -19,7 +18,6 @@ import ( "github.com/chromedp/chromedp" jsoniter "github.com/json-iterator/go" "github.com/stashapp/stash/pkg/logger" - "github.com/stashapp/stash/pkg/models" "golang.org/x/net/html/charset" "golang.org/x/net/publicsuffix" ) @@ -28,27 +26,6 @@ import ( // configurable at some point. const scrapeGetTimeout = time.Second * 30 -func constructSceneURL(url string, scene *models.Scene) string { - // support checksum, title and filename - ret := strings.Replace(url, "{checksum}", scene.Checksum.String, -1) - ret = strings.Replace(url, "{oshash}", scene.OSHash.String, -1) - ret = strings.Replace(ret, "{filename}", filepath.Base(scene.Path), -1) - ret = strings.Replace(ret, "{title}", scene.Title.String, -1) - - return ret -} - -func constructGalleryURL(url string, gallery *models.Gallery) string { - // support checksum, title and filename - ret := strings.Replace(url, "{checksum}", gallery.Checksum, -1) - if gallery.Path.Valid { - ret = strings.Replace(url, "{filename}", filepath.Base(gallery.Path.String), -1) - } - ret = strings.Replace(url, "{title}", gallery.Title.String, -1) - - return ret -} - func loadURL(url string, scraperConfig config, globalConfig GlobalConfig) (io.Reader, error) { driverOptions := scraperConfig.DriverOptions if driverOptions != nil && driverOptions.UseCDP { diff --git a/pkg/scraper/xpath.go b/pkg/scraper/xpath.go index 12f0750ba29..787187bd0c8 100644 --- a/pkg/scraper/xpath.go +++ b/pkg/scraper/xpath.go @@ -129,7 +129,11 @@ func (s *xpathScraper) scrapeSceneByFragment(scene models.SceneUpdateInput) (*mo } // construct the URL - url := constructSceneURL(s.scraper.QueryURL, storedScene) + queryURL := queryURLParametersFromScene(storedScene) + if s.scraper.QueryURLReplacements != nil { + queryURL.applyReplacements(s.scraper.QueryURLReplacements) + } + url := queryURL.constructURL(s.scraper.QueryURL) scraper := s.getXpathScraper() @@ -158,7 +162,11 @@ func (s *xpathScraper) scrapeGalleryByFragment(gallery models.GalleryUpdateInput } // construct the URL - url := constructGalleryURL(s.scraper.QueryURL, storedGallery) + queryURL := queryURLParametersFromGallery(storedGallery) + if s.scraper.QueryURLReplacements != nil { + queryURL.applyReplacements(s.scraper.QueryURLReplacements) + } + url := queryURL.constructURL(s.scraper.QueryURL) scraper := s.getXpathScraper() diff --git a/ui/v2.5/src/components/Changelog/versions/v040.md b/ui/v2.5/src/components/Changelog/versions/v040.md index 79972ad1f7c..cb17a460025 100644 --- a/ui/v2.5/src/components/Changelog/versions/v040.md +++ b/ui/v2.5/src/components/Changelog/versions/v040.md @@ -6,6 +6,7 @@ * Add selective scene export. ### 🎨 Improvements +* Add support for query URL parameter regex replacement when scraping by query URL. * Include empty fields in isMissing filter * Show static image on scene wall if preview video is missing. * Add path filter to scene and gallery query. diff --git a/ui/v2.5/src/docs/en/Scraping.md b/ui/v2.5/src/docs/en/Scraping.md index 230cb66c365..ef438234d20 100644 --- a/ui/v2.5/src/docs/en/Scraping.md +++ b/ui/v2.5/src/docs/en/Scraping.md @@ -216,15 +216,23 @@ For `sceneByFragment`, the `queryURL` field must also be present. This field is * `{filename}` - the base filename of the scene * `{title}` - the title of the scene +These placeholder field values may be manipulated with regex replacements by adding a `queryURLReplace` section, containing a map of placeholder field to regex configuration which uses the same format as the `replace` post-process action covered below. + For example: ``` sceneByFragment: action: scrapeJson - queryURL: https://metadataapi.net/api/scenes?parse={filename}&limit=1 scraper: sceneQueryScraper + queryURL: https://metadataapi.net/api/scenes?parse={filename}&limit=1 + queryURLReplace: + filename: + - regex: + with: ``` +The above configuration would scrape from the value of `queryURL`, replacing `{filename}` with the base filename of the scene, after it has been manipulated by the regex replacements. + ### Xpath and JSON scrapers configuration The top-level `xPathScrapers` field contains xpath scraping configurations, freely named. These are referenced in the `scraper` field for `scrapeXPath` scrapers.