diff --git a/.travis.yml b/.travis.yml index b82ac19..8d53597 100644 --- a/.travis.yml +++ b/.travis.yml @@ -29,6 +29,10 @@ before_install: # - sudo apt-get update -qq # - sudo apt-get install libcv-dev libopencv-dev libopencv-contrib-dev libhighgui-dev libopencv-photo-dev libopencv-imgproc-dev libopencv-stitching-dev libopencv-superres-dev libopencv-ts-dev libopencv-videostab-dev +install: +# It's complicated to get OpenCV built on each platform, so exclude the OpenCV-based detector and its dependencies this way. + - go get -t $(go list ./... | grep -v gocv | xargs) + script: - go test -v -tags ci ./... - if [[ $TRAVIS_GO_VERSION == 1.9* ]]; then $GOPATH/bin/goveralls -service=travis-ci; fi diff --git a/debug.go b/debug.go index d3395fd..dc5968b 100644 --- a/debug.go +++ b/debug.go @@ -41,12 +41,94 @@ import ( "path/filepath" ) -func debugOutput(debug bool, img *image.RGBA, debugType string) { - if debug { - writeImage("png", img, "./smartcrop_"+debugType+".png") +// debugImage carries debug output image and has methods for updating and writing it +type DebugImage struct { + img *image.RGBA + colors []color.RGBA + nextColorIdx int +} + +func NewDebugImage(bounds image.Rectangle) *DebugImage { + di := DebugImage{} + + // Set up the actual image + di.img = image.NewRGBA(bounds) + for x := bounds.Min.X; x < bounds.Max.X; x++ { + for y := bounds.Min.Y; y < bounds.Max.Y; y++ { + di.img.Set(x, y, color.Black) + } + } + + // Set up an array of colors used for debug outputs + di.colors = []color.RGBA{ + {0, 255, 0, 255}, // default edges + {255, 0, 0, 255}, // default skin + {0, 0, 255, 255}, // default saturation + {255, 128, 0, 255}, // a few extra... + {128, 0, 128, 255}, + {64, 255, 255, 255}, + {255, 64, 255, 255}, + {255, 255, 64, 255}, + {255, 255, 255, 255}, + } + di.nextColorIdx = 0 + return &di +} + +func (di *DebugImage) popNextColor() color.RGBA { + c := di.colors[di.nextColorIdx] + di.nextColorIdx++ + + // Wrap around if necessary (if someone ever implements and sets a tenth detector) + if di.nextColorIdx >= len(di.colors) { + di.nextColorIdx = 0 + } + return c +} + +func scaledColorComponent(factor uint8, oldComponent uint8, newComponent uint8) uint8 { + if factor < 1 { + return oldComponent + } + + return uint8(bounds(((float64(factor)/255.0*float64(newComponent))+float64(oldComponent))/2.0) * 2.0) +} + +func (di *DebugImage) AddDetected(d [][]uint8) { + baseColor := di.popNextColor() + + minX := di.img.Bounds().Min.X + minY := di.img.Bounds().Min.Y + + maxX := di.img.Bounds().Max.X + maxY := di.img.Bounds().Max.Y + if maxX > len(d) { + maxX = len(d) + } + if maxY > len(d[0]) { + maxY = len(d[0]) + } + + for x := minX; x < maxX; x++ { + for y := minY; y < maxY; y++ { + if d[x][y] > 0 { + c := di.img.RGBAAt(x, y) + nc := color.RGBA{} + nc.R = scaledColorComponent(d[x][y], c.R, baseColor.R) + nc.G = scaledColorComponent(d[x][y], c.G, baseColor.G) + nc.B = scaledColorComponent(d[x][y], c.B, baseColor.B) + nc.A = 255 + + di.img.SetRGBA(x, y, nc) + } + } } } +func (di *DebugImage) DebugOutput(debugType string) { + writeImage("png", di.img, "./smartcrop_"+debugType+".png") +} + func writeImage(imgtype string, img image.Image, name string) error { if err := os.MkdirAll(filepath.Dir(name), 0755); err != nil { panic(err) @@ -82,7 +164,9 @@ func writeImageToPng(img image.Image, name string) error { return png.Encode(fso, img) } -func drawDebugCrop(topCrop Crop, o *image.RGBA) { +func (di *DebugImage) DrawDebugCrop(topCrop Crop) { + o := di.img + width := o.Bounds().Dx() height := o.Bounds().Dy() diff --git a/gocv/face.go b/gocv/face.go new file mode 100644 index 0000000..5a715d1 --- /dev/null +++ b/gocv/face.go @@ -0,0 +1,125 @@ +// +build !ci + +package gocv + +import ( + "fmt" + "image" + "os" + + "gocv.io/x/gocv" + + sclogger "github.com/muesli/smartcrop/logger" +) + +type FaceDetector struct { + FaceDetectionHaarCascadeFilepath string + Logger *sclogger.Logger +} + +func (d *FaceDetector) Name() string { + return "face" +} + +func (d *FaceDetector) Bias() float64 { + return 0.9 +} + +func (d *FaceDetector) Weight() float64 { + return 1.8 +} + +func (d *FaceDetector) Detect(img *image.RGBA) ([][]uint8, error) { + res := make([][]uint8, img.Bounds().Dx()) + for x := range res { + res[x] = make([]uint8, img.Bounds().Dy()) + } + + if img == nil { + return res, fmt.Errorf("img can't be nil") + } + if d.FaceDetectionHaarCascadeFilepath == "" { + return res, fmt.Errorf("FaceDetector's FaceDetectionHaarCascadeFilepath not specified") + } + + _, err := os.Stat(d.FaceDetectionHaarCascadeFilepath) + if err != nil { + return res, err + } + + classifier := gocv.NewCascadeClassifier() + defer classifier.Close() + if !classifier.Load(d.FaceDetectionHaarCascadeFilepath) { + return res, fmt.Errorf("FaceDetector failed loading cascade file") + } + + // image.NRGBA-compatible params + cvMat, err := gocv.NewMatFromBytes(img.Rect.Dy(), img.Rect.Dx(), gocv.MatTypeCV8UC4, img.Pix) + defer cvMat.Close() + if err != nil { + return res, err + } + + faces := classifier.DetectMultiScale(cvMat) + + if d.Logger.DebugMode == true { + d.Logger.Log.Printf("Number of faces detected: %d\n", len(faces)) + } + + for _, face := range faces { + // Upper left corner of detected face-rectangle + x := face.Min.X + y := face.Min.Y + + width := face.Dx() + height := face.Dy() + + if d.Logger.DebugMode == true { + d.Logger.Log.Printf("Face: x: %d y: %d w: %d h: %d\n", x, y, width, height) + } + + drawAFilledCircle(res, x+(width/2), y+(height/2), width/2) + } + return res, nil +} + +func drawAFilledCircle(pix [][]uint8, x0, y0, r int) { + x := r - 1 + y := 0 + dx := 1 + dy := 1 + err := dx - (r << 1) + + for { + if x < y { + return + } + + for i := -x; i <= x; i++ { + putPixel(pix, x0+i, y0+y) + putPixel(pix, x0+i, y0-y) + putPixel(pix, x0+y, y0+i) + putPixel(pix, x0-y, y0+i) + } + + if err <= 0 { + y++ + err += dy + dy += 2 + } else { + x-- + dx += 2 + err += dx - (r << 1) + } + } +} + +func putPixel(pix [][]uint8, x, y int) { + if x >= len(pix) { + return + } + if y >= len(pix[x]) { + return + } + pix[x][y] = uint8(255) +} diff --git a/logger/logger.go b/logger/logger.go new file mode 100644 index 0000000..3290966 --- /dev/null +++ b/logger/logger.go @@ -0,0 +1,11 @@ +package logger + +import ( + "log" +) + +// Logger contains a logger. +type Logger struct { + DebugMode bool + Log *log.Logger +} diff --git a/smartcrop-rundebug/main.go b/smartcrop-rundebug/main.go new file mode 100644 index 0000000..44a6ad8 --- /dev/null +++ b/smartcrop-rundebug/main.go @@ -0,0 +1,46 @@ +package main + +import ( + "fmt" + "image" + _ "image/jpeg" + _ "image/png" + "log" + "os" + + "github.com/muesli/smartcrop" + sclogger "github.com/muesli/smartcrop/logger" + "github.com/muesli/smartcrop/nfnt" + // "github.com/muesli/smartcrop/gocv" +) + +func main() { + if len(os.Args) < 2 { + fmt.Println("Please give me an argument") + os.Exit(1) + } + + f, _ := os.Open(os.Args[1]) + img, _, _ := image.Decode(f) + + l := sclogger.Logger{ + DebugMode: true, + Log: log.New(os.Stderr, "", 0), + } + + analyzer := smartcrop.NewAnalyzerWithLogger(nfnt.NewDefaultResizer(), l) + + /* + To replace skin detection with gocv-based face detection: + + analyzer.SetDetectors([]smartcrop.Detector{ + &gocv.FaceDetector{"./cascade.xml", &l}, + &smartcrop.SaturationDetector{}, + }) + */ + + topCrop, _ := analyzer.FindBestCrop(img, 300, 200) + + // The crop will have the requested aspect ratio, but you need to copy/scale it yourself + fmt.Printf("Top crop: %+v\n", topCrop) +} diff --git a/smartcrop.go b/smartcrop.go index 5796ac4..96cf14d 100644 --- a/smartcrop.go +++ b/smartcrop.go @@ -40,9 +40,10 @@ import ( "math" "time" - "github.com/muesli/smartcrop/options" - "golang.org/x/image/draw" + + sclogger "github.com/muesli/smartcrop/logger" + "github.com/muesli/smartcrop/options" ) var ( @@ -81,13 +82,14 @@ const ( // width and height returns an error if invalid type Analyzer interface { FindBestCrop(img image.Image, width, height int) (image.Rectangle, error) + SetDetailDetector(d DetailDetector) + SetDetectors(ds []Detector) } // Score contains values that classify matches type Score struct { - Detail float64 - Saturation float64 - Skin float64 + Detail float64 + PerDetector []float64 } // Crop contains results @@ -96,20 +98,35 @@ type Crop struct { Score Score } -// Logger contains a logger. -type Logger struct { - DebugMode bool - Log *log.Logger +/* + DetailDetector detects detail that Detectors can use. +*/ +type DetailDetector interface { + Name() string + Detect(original *image.RGBA) ([][]uint8, error) + Weight() float64 +} + +/* + Detector contains a method that detects features like skin or saturation. +*/ +type Detector interface { + Name() string + Detect(original *image.RGBA) ([][]uint8, error) + Bias() float64 + Weight() float64 } type smartcropAnalyzer struct { - logger Logger + detailDetector DetailDetector + detectors []Detector + logger sclogger.Logger options.Resizer } // NewAnalyzer returns a new Analyzer using the given Resizer. func NewAnalyzer(resizer options.Resizer) Analyzer { - logger := Logger{ + logger := sclogger.Logger{ DebugMode: false, } @@ -117,11 +134,27 @@ func NewAnalyzer(resizer options.Resizer) Analyzer { } // NewAnalyzerWithLogger returns a new analyzer with the given Resizer and Logger. -func NewAnalyzerWithLogger(resizer options.Resizer, logger Logger) Analyzer { +func NewAnalyzerWithLogger(resizer options.Resizer, logger sclogger.Logger) Analyzer { if logger.Log == nil { logger.Log = log.New(ioutil.Discard, "", 0) } - return &smartcropAnalyzer{Resizer: resizer, logger: logger} + + // Set default detectors here + detailDetector := &EdgeDetector{} + detectors := []Detector{ + &SkinDetector{}, + &SaturationDetector{}, + } + + return &smartcropAnalyzer{detailDetector: detailDetector, detectors: detectors, Resizer: resizer, logger: logger} +} + +func (o *smartcropAnalyzer) SetDetectors(ds []Detector) { + o.detectors = ds +} + +func (o *smartcropAnalyzer) SetDetailDetector(d DetailDetector) { + o.detailDetector = d } func (o smartcropAnalyzer) FindBestCrop(img image.Image, width, height int) (image.Rectangle, error) { @@ -163,7 +196,7 @@ func (o smartcropAnalyzer) FindBestCrop(img image.Image, width, height int) (ima o.logger.Log.Printf("original resolution: %dx%d\n", img.Bounds().Dx(), img.Bounds().Dy()) o.logger.Log.Printf("scale: %f, cropw: %f, croph: %f, minscale: %f\n", scale, cropWidth, cropHeight, realMinScale) - topCrop, err := analyse(o.logger, lowimg, cropWidth, cropHeight, realMinScale) + topCrop, err := o.analyse(lowimg, cropWidth, cropHeight, realMinScale) if err != nil { return topCrop, err } @@ -178,8 +211,18 @@ func (o smartcropAnalyzer) FindBestCrop(img image.Image, width, height int) (ima return topCrop.Canon(), nil } -func (c Crop) totalScore() float64 { - return (c.Score.Detail*detailWeight + c.Score.Skin*skinWeight + c.Score.Saturation*saturationWeight) / float64(c.Dx()) / float64(c.Dy()) +func (o *smartcropAnalyzer) totalScoreForCrop(c Crop) float64 { + t := 0.0 + + t += c.Score.Detail * o.detailDetector.Weight() + + for i := range c.Score.PerDetector { + t += c.Score.PerDetector[i] * o.detectors[i].Weight() + } + + t = t / float64(c.Dx()) / float64(c.Dy()) + + return t } func chop(x float64) float64 { @@ -221,73 +264,97 @@ func importance(crop Crop, x, y int) float64 { return s + d } -func score(output *image.RGBA, crop Crop) Score { - width := output.Bounds().Dx() - height := output.Bounds().Dy() +func score(detailDetection detection, detections []detection, crop Crop) Score { + width := len(detailDetection.Pix) + height := len(detailDetection.Pix[0]) score := Score{} + score.PerDetector = make([]float64, len(detections)) + // same loops but with downsampling //for y := 0; y < height; y++ { //for x := 0; x < width; x++ { for y := 0; y <= height-scoreDownSample; y += scoreDownSample { for x := 0; x <= width-scoreDownSample; x += scoreDownSample { - c := output.RGBAAt(x, y) - r8 := float64(c.R) - g8 := float64(c.G) - b8 := float64(c.B) - imp := importance(crop, int(x), int(y)) - det := g8 / 255.0 + det := float64(detailDetection.Pix[x][y]) / 255.0 - score.Skin += r8 / 255.0 * (det + skinBias) * imp score.Detail += det * imp - score.Saturation += b8 / 255.0 * (det + saturationBias) * imp + + for i, d := range detections { + score.PerDetector[i] += float64(d.Pix[x][y]) / 255.0 * (det + d.Bias) * imp + } } } return score } -func analyse(logger Logger, img *image.RGBA, cropWidth, cropHeight, realMinScale float64) (image.Rectangle, error) { - o := image.NewRGBA(img.Bounds()) +type detection struct { + Pix [][]uint8 + Weight float64 + Bias float64 +} - now := time.Now() - edgeDetect(img, o) - logger.Log.Println("Time elapsed edge:", time.Since(now)) - debugOutput(logger.DebugMode, o, "edge") +func (a *smartcropAnalyzer) analyse(img *image.RGBA, cropWidth, cropHeight, realMinScale float64) (image.Rectangle, error) { + debugImg := NewDebugImage(img.Bounds()) - now = time.Now() - skinDetect(img, o) - logger.Log.Println("Time elapsed skin:", time.Since(now)) - debugOutput(logger.DebugMode, o, "skin") + d := a.detailDetector + start := time.Now() + detailPix, err := d.Detect(img) + if err != nil { + return image.Rectangle{}, err + } + a.logger.Log.Printf("Time elapsed detecting %s: %s\n", d.Name(), time.Since(start)) + if a.logger.DebugMode { + debugImg.AddDetected(detailPix) + debugImg.DebugOutput(d.Name()) + } - now = time.Now() - saturationDetect(img, o) - logger.Log.Println("Time elapsed sat:", time.Since(now)) - debugOutput(logger.DebugMode, o, "saturation") + detailDetection := detection{Pix: detailPix, Weight: a.detailDetector.Weight()} - now = time.Now() + detections := make([]detection, len(a.detectors)) + + for i, d := range a.detectors { + start := time.Now() + pix, err := d.Detect(img) + if err != nil { + return image.Rectangle{}, err + } + + detections[i] = detection{Pix: pix, Weight: d.Weight(), Bias: d.Bias()} + + a.logger.Log.Printf("Time elapsed detecting %s: %s\n", d.Name(), time.Since(start)) + if a.logger.DebugMode { + debugImg.AddDetected(detections[i].Pix) + debugImg.DebugOutput(d.Name()) + } + } + + now := time.Now() var topCrop Crop topScore := -1.0 - cs := crops(o, cropWidth, cropHeight, realMinScale) - logger.Log.Println("Time elapsed crops:", time.Since(now), len(cs)) + cs := crops(img.Bounds(), cropWidth, cropHeight, realMinScale) + a.logger.Log.Println("Time elapsed crops:", time.Since(now), len(cs)) now = time.Now() for _, crop := range cs { nowIn := time.Now() - crop.Score = score(o, crop) - logger.Log.Println("Time elapsed single-score:", time.Since(nowIn)) - if crop.totalScore() > topScore { + crop.Score = score(detailDetection, detections, crop) + a.logger.Log.Println("Time elapsed single-score:", time.Since(nowIn)) + + totalScore := a.totalScoreForCrop(crop) + if totalScore > topScore { topCrop = crop - topScore = crop.totalScore() + topScore = totalScore } } - logger.Log.Println("Time elapsed score:", time.Since(now)) + a.logger.Log.Println("Time elapsed score:", time.Since(now)) - if logger.DebugMode { - drawDebugCrop(topCrop, o) - debugOutput(true, o, "final") + if a.logger.DebugMode { + debugImg.DrawDebugCrop(topCrop) + debugImg.DebugOutput("final") } return topCrop.Rectangle, nil @@ -361,14 +428,27 @@ func makeCies(img *image.RGBA) []float64 { return cies } -func edgeDetect(i *image.RGBA, o *image.RGBA) { +type EdgeDetector struct{} + +func (d *EdgeDetector) Name() string { + return "edge" +} + +func (d *EdgeDetector) Weight() float64 { + return detailWeight +} + +func (d *EdgeDetector) Detect(i *image.RGBA) ([][]uint8, error) { width := i.Bounds().Dx() height := i.Bounds().Dy() cies := makeCies(i) + res := make([][]uint8, width) + var lightness float64 - for y := 0; y < height; y++ { - for x := 0; x < width; x++ { + for x := 0; x < width; x++ { + res[x] = make([]uint8, height) + for y := 0; y < height; y++ { if x == 0 || x >= width-1 || y == 0 || y >= height-1 { //lightness = cie((*i).At(x, y)) lightness = 0 @@ -380,60 +460,86 @@ func edgeDetect(i *image.RGBA, o *image.RGBA) { cies[x+(y+1)*width] } - nc := color.RGBA{0, uint8(bounds(lightness)), 0, 255} - o.SetRGBA(x, y, nc) + res[x][y] = uint8(bounds(lightness)) } } + return res, nil } -func skinDetect(i *image.RGBA, o *image.RGBA) { +type SkinDetector struct{} + +func (d *SkinDetector) Name() string { + return "skin" +} + +func (d *SkinDetector) Bias() float64 { + return skinBias +} + +func (d *SkinDetector) Weight() float64 { + return skinWeight +} + +func (d *SkinDetector) Detect(i *image.RGBA) ([][]uint8, error) { width := i.Bounds().Dx() height := i.Bounds().Dy() - for y := 0; y < height; y++ { - for x := 0; x < width; x++ { + res := make([][]uint8, width) + + for x := 0; x < width; x++ { + res[x] = make([]uint8, height) + for y := 0; y < height; y++ { lightness := cie(i.RGBAAt(x, y)) / 255.0 skin := skinCol(i.RGBAAt(x, y)) - c := o.RGBAAt(x, y) if skin > skinThreshold && lightness >= skinBrightnessMin && lightness <= skinBrightnessMax { r := (skin - skinThreshold) * (255.0 / (1.0 - skinThreshold)) - nc := color.RGBA{uint8(bounds(r)), c.G, c.B, 255} - o.SetRGBA(x, y, nc) - } else { - nc := color.RGBA{0, c.G, c.B, 255} - o.SetRGBA(x, y, nc) + res[x][y] = uint8(bounds(r)) } } } + return res, nil +} + +type SaturationDetector struct{} + +func (d *SaturationDetector) Name() string { + return "saturation" +} + +func (d *SaturationDetector) Bias() float64 { + return saturationBias +} + +func (d *SaturationDetector) Weight() float64 { + return saturationWeight } -func saturationDetect(i *image.RGBA, o *image.RGBA) { +func (d *SaturationDetector) Detect(i *image.RGBA) ([][]uint8, error) { width := i.Bounds().Dx() height := i.Bounds().Dy() - for y := 0; y < height; y++ { - for x := 0; x < width; x++ { + res := make([][]uint8, width) + + for x := 0; x < width; x++ { + res[x] = make([]uint8, height) + for y := 0; y < height; y++ { lightness := cie(i.RGBAAt(x, y)) / 255.0 saturation := saturation(i.RGBAAt(x, y)) - c := o.RGBAAt(x, y) if saturation > saturationThreshold && lightness >= saturationBrightnessMin && lightness <= saturationBrightnessMax { b := (saturation - saturationThreshold) * (255.0 / (1.0 - saturationThreshold)) - nc := color.RGBA{c.R, c.G, uint8(bounds(b)), 255} - o.SetRGBA(x, y, nc) - } else { - nc := color.RGBA{c.R, c.G, 0, 255} - o.SetRGBA(x, y, nc) + res[x][y] = uint8(bounds(b)) } } } + return res, nil } -func crops(i image.Image, cropWidth, cropHeight, realMinScale float64) []Crop { +func crops(bounds image.Rectangle, cropWidth, cropHeight, realMinScale float64) []Crop { res := []Crop{} - width := i.Bounds().Dx() - height := i.Bounds().Dy() + width := bounds.Dx() + height := bounds.Dy() minDimension := math.Min(float64(width), float64(height)) var cropW, cropH float64 diff --git a/smartcrop_test.go b/smartcrop_test.go index fbb2ae7..c3f4d70 100644 --- a/smartcrop_test.go +++ b/smartcrop_test.go @@ -29,7 +29,6 @@ package smartcrop import ( "errors" - "fmt" "image" _ "image/jpeg" _ "image/png" @@ -118,8 +117,8 @@ func BenchmarkEdge(b *testing.B) { rgbaImg := toRGBA(img) b.ResetTimer() for i := 0; i < b.N; i++ { - o := image.NewRGBA(img.Bounds()) - edgeDetect(rgbaImg, o) + d := EdgeDetector{} + d.Detect(rgbaImg) } } @@ -146,7 +145,6 @@ func BenchmarkImageDir(b *testing.B) { b.Error(err) continue } - fmt.Printf("Top crop: %+v\n", topCrop) sub, ok := img.(SubImager) if ok { @@ -158,5 +156,4 @@ func BenchmarkImageDir(b *testing.B) { } } } - // fmt.Println("average time/image:", b.t) }