Skip to content

Commit

Permalink
feat(api): improve image proxy usage
Browse files Browse the repository at this point in the history
- use main downloader cache
- use image proxy for thumbhash
  • Loading branch information
ncarlier committed Oct 5, 2024
1 parent ab0f6b5 commit 73018bd
Show file tree
Hide file tree
Showing 14 changed files with 156 additions and 109 deletions.
24 changes: 0 additions & 24 deletions internal/api/helper.go
Original file line number Diff line number Diff line change
@@ -1,36 +1,12 @@
package api

import (
"encoding/base64"
"errors"
"fmt"
"net/http"
"regexp"
"strings"
"time"
)

var proxyPathRe = regexp.MustCompile(`^/([^/]+)/([^/]+)/(.+)`)

// Decode image URL from Image Proxy Path
func decodeImageProxyPath(path string) (signature, options, url string, err error) {
parts := proxyPathRe.FindStringSubmatch(path)
if len(parts) != 4 {
err = errors.New("invalid image proxy path")
return
}
signature = parts[1]
options = parts[2]
encoded := parts[3]
var decoded []byte
decoded, err = base64.StdEncoding.DecodeString(encoded)
if err == nil {
url = string(decoded)
}

return
}

// addXForwardHeader add X-Forwarded-For header
func addXForwardHeader(header *http.Header, host string) {
if prior := header.Values("X-Forwarded-For"); len(prior) > 0 {
Expand Down
20 changes: 5 additions & 15 deletions internal/api/image-proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,35 +7,25 @@ import (
"time"

"github.com/ncarlier/readflow/internal/config"
"github.com/ncarlier/readflow/pkg/cache"
"github.com/ncarlier/readflow/internal/service"
"github.com/ncarlier/readflow/pkg/defaults"
"github.com/ncarlier/readflow/pkg/downloader"
imageproxy "github.com/ncarlier/readflow/pkg/image-proxy"
"github.com/ncarlier/readflow/pkg/logger"
)

// imgProxyHandler is the handler for proxying images.
func imgProxyHandler(conf *config.Config) http.Handler {
if conf.Image.ProxyURL == "" {
if conf.ImageProxy.URL == "" {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
http.Error(w, http.StatusText(http.StatusNoContent), http.StatusNotFound)
})
}
c, err := cache.New(conf.Image.Cache)
if err != nil {
logger.Fatal().Err(err).Msg("unable to setup Image Proxy cache")
}
// TODO add image proxy toe service registry
down := downloader.NewInternalDownloader(&downloader.InternalDownloaderConfig{
Timeout: conf.Downloader.Timeout.Duration,
MaxConcurrentDownload: conf.Downloader.MaxConcurentDownloads,
Cache: c,
})

return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
start := time.Now()

img := strings.TrimPrefix(r.URL.Path, "/img")
_, opts, src, err := decodeImageProxyPath(img)
_, opts, src, err := imageproxy.Decode(img)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
Expand All @@ -46,7 +36,7 @@ func imgProxyHandler(conf *config.Config) http.Handler {
addXForwardHeader(&r.Header, host)
}
logger.Debug().Msg("getting image via proxy")
asset, resp, err := down.Get(r.Context(), conf.Image.ProxyURL+img, &r.Header)
asset, resp, err := service.Lookup().Download(r.Context(), conf.ImageProxy.URL+img, &r.Header)
if err != nil {
logger.Info().Err(err).Dur("took", time.Since(start)).Msg("unable to get image via proxy")
// Redirect if image proxy failed
Expand Down
5 changes: 2 additions & 3 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,8 @@ func NewConfig() *Config {
Avatar: AvatarConfig{
ServiceProvider: "https://robohash.org/{seed}?set=set4&size=48x48",
},
Image: ImageConfig{
ProxySizes: "320,768",
Cache: "boltdb:///tmp/readflow-images.cache?maxSize=256,maxEntries=5000,maxEntrySize=1",
ImageProxy: ImageProxyConfig{
Sizes: "320,768",
},
RateLimiting: RateLimitingConfig{
Notification: ratelimiter.RateLimiterConfig{
Expand Down
11 changes: 4 additions & 7 deletions internal/config/defaults.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ level = "${READFLOW_LOG_LEVEL}"
## Log format
# Values: "json" or "text"
# Default: "json"
format = "${REAFLOW_LOG_FORMAT}"
format = "${READFLOW_LOG_FORMAT}"

[database]
## Database connection string
Expand Down Expand Up @@ -141,17 +141,14 @@ hosts = [ "medium.com" ]
# - https://seccdn.libravatar.org/avatar/{seed}?d=mp&s=48
service_provider = "${READFLOW_AVATAR_SERVICE_PROVIDER}"

[image]
[image_proxy]
## Image proxy URL, disabled if empty
# Example: "http://imagor:8080"
proxy_url = "${READFLOW_IMAGE_PROXY_URL}"
url = "${READFLOW_IMAGE_PROXY_URL}"
## Image proxy supported sizes
# Comma separated list of image size
# Default: "320,768"
proxy_sizes = "${READFLOW_IMAGE_PROXY_SIZES}"
## Cache paramters
# Default: "boltdb:///tmp/readflow-images.cache?maxSize=256,maxEntries=5000,maxEntrySize=1"
cache = "${READFLOW_IMAGE_CACHE}"
sizes = "${READFLOW_IMAGE_PROXY_SIZES}"

[pdf]
## PDF generator service provider, disabled if empty
Expand Down
2 changes: 1 addition & 1 deletion internal/config/expvars.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@ func ExportVars(conf *Config) {
exportConfigVar("metrics-listen-addr", conf.Metrics.ListenAddr)
exportConfigVar("http-public-url", conf.HTTP.PublicURL)
exportConfigVar("ui-public-url", conf.UI.PublicURL)
exportConfigVar("image-proxy-url", conf.Image.ProxyURL)
exportConfigVar("image-proxy-url", conf.ImageProxy.URL)
}
2 changes: 1 addition & 1 deletion internal/config/test/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
func TestDefaultConfig(t *testing.T) {
conf := config.NewConfig()
assert.Equal(t, ":8080", conf.HTTP.ListenAddr)
assert.Empty(t, conf.Image.ProxyURL)
assert.Empty(t, conf.ImageProxy.URL)
assert.Nil(t, conf.GetUserPlan("test"), "plan should not be found")
}

Expand Down
11 changes: 5 additions & 6 deletions internal/config/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ type Config struct {
Downloader DownloaderConfig `toml:"downloader"`
Scraping ScrapingConfig `toml:"scraping"`
Avatar AvatarConfig `toml:"avatar"`
Image ImageConfig `toml:"image"`
ImageProxy ImageProxyConfig `toml:"image_proxy"`
PDF PDFConfig `toml:"pdf"`
Secrets SecretsConfig `toml:"secrets"`
Event EventConfig `toml:"event"`
Expand Down Expand Up @@ -121,11 +121,10 @@ type AvatarConfig struct {
ServiceProvider string `toml:"service_provider"`
}

// ImageConfig for image configuration section
type ImageConfig struct {
ProxyURL string `toml:"proxy_url"`
ProxySizes string `toml:"proxy_sizes"`
Cache string `toml:"cache"`
// ImageProxyConfig for image proxy configuration section
type ImageProxyConfig struct {
URL string `toml:"url"`
Sizes string `toml:"sizes"`
}

// PDFConfig for PDF configuration section
Expand Down
24 changes: 1 addition & 23 deletions internal/schema/article/queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package article

import (
"errors"
"strings"

"github.com/graphql-go/graphql"

Expand Down Expand Up @@ -94,28 +93,7 @@ func thumbnailsResolver(p graphql.ResolveParams) (interface{}, error) {
if !ok {
return nil, errors.New("thumbnails resolver is expecting an article")
}
if article.Image == nil || *article.Image == "" {
return nil, nil
}
if service.Lookup().GetConfig().Image.ProxyURL == "" {
return nil, nil
}
sizes := strings.Split(service.Lookup().GetConfig().Image.ProxySizes, ",")
result := make([]struct {
Size string
Hash string
}, len(sizes))
for i, size := range sizes {
result[i] = struct {
Size string
Hash string
}{
Size: size,
Hash: service.Lookup().GetArticleThumbnailHash(article, size),
}
}

return result, nil
return service.Lookup().GetArticleThumbnailHashSet(article), nil
}

func init() {
Expand Down
25 changes: 6 additions & 19 deletions internal/service/articles_thumbnail.go
Original file line number Diff line number Diff line change
@@ -1,27 +1,14 @@
package service

import (
"crypto/hmac"
"crypto/sha256"
"encoding/base64"

"github.com/ncarlier/readflow/internal/model"
imageproxy "github.com/ncarlier/readflow/pkg/image-proxy"
)

// encode image URL to Image Proxy path
func encodeImageProxyPath(url, size string) string {
return "/resize:fit:" + size + "/" + base64.StdEncoding.EncodeToString([]byte(url))
}

// GetArticleThumbnail return article thumbnail URL
func (reg *Registry) GetArticleThumbnailHash(article *model.Article, size string) string {
if article.Image == nil || *article.Image == "" {
return ""
// GetArticleThumbnailHashSet return article thumbnail hash set
func (reg *Registry) GetArticleThumbnailHashSet(article *model.Article) *[]imageproxy.ImageProxyHashSet {
if reg.imageProxy.URL() == "" || article.Image == nil || *article.Image == "" {
return nil
}
path := encodeImageProxyPath(*article.Image, size)

mac := hmac.New(sha256.New, reg.conf.Hash.SecretKey.Value)
mac.Write(reg.conf.Hash.SecretSalt.Value)
mac.Write([]byte(path))
return base64.RawURLEncoding.EncodeToString(mac.Sum(nil))
return reg.imageProxy.GetHashSet(*article.Image)
}
6 changes: 6 additions & 0 deletions internal/service/download.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package service
import (
"context"
"errors"
"net/http"
"time"

"github.com/ncarlier/readflow/internal/exporter"
Expand Down Expand Up @@ -91,3 +92,8 @@ func (reg *Registry) DownloadArticle(ctx context.Context, idArticle uint, format

return result, nil
}

// Download web asset
func (reg *Registry) Download(ctx context.Context, url string, header *http.Header) (*downloader.WebAsset, *http.Response, error) {
return reg.dl.Get(ctx, url, header)
}
13 changes: 10 additions & 3 deletions internal/service/event-thumbhash.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,30 @@ import (
"github.com/ncarlier/readflow/pkg/event"
"github.com/ncarlier/readflow/pkg/logger"
"github.com/ncarlier/readflow/pkg/thumbhash"
"github.com/ncarlier/readflow/pkg/utils"
)

const thumbhashErrorMessage = "unable to create thumbhash"

func newThumbhashEventHandler(srv *Registry) event.EventHandler {
return func(evt event.Event) {
article, ok := evt.Payload.(model.Article)
if !ok || article.Image == nil || article.ThumbHash != nil {
if !ok || article.Status == "read" || utils.IsNilOrEmpty(article.Image) || !utils.IsNilOrEmpty(article.ThumbHash) {
// Ignore if not a article event
// OR if the article is marked as read
// OR if the article have no image
// OR if the article have already a thumbhash
return
}
logger := logger.With().Uint("id", article.ID).Logger()

// download article image
// TODO use image proxy service (in order to reduce image size and therfore the memory)
asset, res, err := srv.dl.Get(context.Background(), *article.Image, nil)
src := *article.Image
if srv.imageProxy.URL() != "" {
src = srv.imageProxy.Encode(src, "")
}
logger = logger.With().Str("src", src).Logger()
asset, res, err := srv.dl.Get(context.Background(), src, nil)
if err != nil {
logger.Info().Err(err).Msg(thumbhashErrorMessage)
return
Expand Down
24 changes: 17 additions & 7 deletions internal/service/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/ncarlier/readflow/pkg/event"
"github.com/ncarlier/readflow/pkg/event/dispatcher"
"github.com/ncarlier/readflow/pkg/hashid"
imageproxy "github.com/ncarlier/readflow/pkg/image-proxy"
"github.com/ncarlier/readflow/pkg/job"
"github.com/ncarlier/readflow/pkg/logger"
ratelimiter "github.com/ncarlier/readflow/pkg/rate-limiter"
Expand All @@ -35,6 +36,7 @@ type Registry struct {
properties *model.Properties
webScraper *scraper.WebScraper
dl downloader.Downloader
imageProxy *imageproxy.ImageProxy
hashid *hashid.HashIDHandler
notificationRateLimiter ratelimiter.RateLimiter
scriptEngine *scripting.ScriptEngine
Expand All @@ -52,6 +54,20 @@ func Configure(conf config.Config, database db.DB) error {
if err != nil {
return err
}
// configure Downloader
dl := downloader.NewInternalDownloader(&downloader.InternalDownloaderConfig{
UserAgent: conf.Downloader.UserAgent,
Cache: downloadCache,
MaxConcurrentDownload: conf.Downloader.MaxConcurentDownloads,
Timeout: conf.Downloader.Timeout.Duration,
})
// configure Image Proxy
imageProxy := imageproxy.NewImageProxy(&imageproxy.ImageProxyConfiguration{
URL: conf.ImageProxy.URL,
Sizes: conf.ImageProxy.Sizes,
SecretKey: conf.Hash.SecretKey.Value,
SecretSalt: conf.Hash.SecretSalt.Value,
})
// configure web scraper
webScraper := scraper.NewWebScraper(&scraper.WebScraperConfiguration{
HttpClient: &http.Client{Timeout: conf.Scraping.Timeout.Duration},
Expand Down Expand Up @@ -85,20 +101,14 @@ func Configure(conf config.Config, database db.DB) error {
db.NewCleanupDatabaseJob(database),
)

dl := downloader.NewInternalDownloader(&downloader.InternalDownloaderConfig{
UserAgent: conf.Downloader.UserAgent,
Cache: downloadCache,
MaxConcurrentDownload: conf.Downloader.MaxConcurentDownloads,
Timeout: conf.Downloader.Timeout.Duration,
})

instance = &Registry{
conf: conf,
db: database,
logger: logger.With().Str("component", "service").Logger(),
downloadCache: downloadCache,
webScraper: webScraper,
dl: dl,
imageProxy: imageProxy,
hashid: hid,
notificationRateLimiter: notificationRateLimiter,
sanitizer: sanitizer.NewSanitizer(blockList),
Expand Down
Loading

0 comments on commit 73018bd

Please sign in to comment.