-
-
Notifications
You must be signed in to change notification settings - Fork 113
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Optional face detection with gocv #35
base: master
Are you sure you want to change the base?
Changes from 9 commits
b035707
9fa7dac
fa369ab
654bf38
d6b4b4c
41573b1
e15f810
96dc0c1
a3a40ff
6da9bd3
0b7246f
15bd6ac
964bc81
d607a1b
ff0aec3
9acfd05
f8c5cd0
cef7d23
751d4f9
fe4a524
fc94470
9b8c1bb
d2c290a
bbd2e71
1961e95
f09e3d5
2f3124c
4908101
58f6608
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
package gocv | ||
|
||
import ( | ||
"fmt" | ||
"image" | ||
"image/color" | ||
"log" | ||
"os" | ||
|
||
"github.com/llgcode/draw2d/draw2dimg" | ||
"github.com/llgcode/draw2d/draw2dkit" | ||
"gocv.io/x/gocv" | ||
) | ||
|
||
type FaceDetector struct { | ||
FaceDetectionHaarCascadeFilepath string | ||
DebugMode bool | ||
} | ||
|
||
func (d *FaceDetector) Name() string { | ||
return "face" | ||
} | ||
|
||
func (d *FaceDetector) Detect(i *image.RGBA, o *image.RGBA) error { | ||
if i == nil { | ||
return fmt.Errorf("i can't be nil") | ||
} | ||
if o == nil { | ||
return fmt.Errorf("o can't be nil") | ||
} | ||
if d.FaceDetectionHaarCascadeFilepath == "" { | ||
return fmt.Errorf("FaceDetector's FaceDetectionHaarCascadeFilepath not specified") | ||
} | ||
|
||
_, err := os.Stat(d.FaceDetectionHaarCascadeFilepath) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
classifier := gocv.NewCascadeClassifier() | ||
defer classifier.Close() | ||
if !classifier.Load(d.FaceDetectionHaarCascadeFilepath) { | ||
return fmt.Errorf("FaceDetector failed loading cascade file") | ||
} | ||
|
||
// image.NRGBA-compatible params | ||
cvMat := gocv.NewMatFromBytes(i.Rect.Dy(), i.Rect.Dx(), gocv.MatTypeCV8UC4, i.Pix) | ||
defer cvMat.Close() | ||
|
||
faces := classifier.DetectMultiScale(cvMat) | ||
|
||
gc := draw2dimg.NewGraphicContext(o) | ||
|
||
if d.DebugMode == true { | ||
log.Println("Faces detected:", len(faces)) | ||
} | ||
|
||
for _, face := range faces { | ||
// Upper left corner of detected face-rectangle | ||
x := face.Min.X | ||
y := face.Min.Y | ||
|
||
width := face.Dx() | ||
height := face.Dy() | ||
|
||
if d.DebugMode == true { | ||
log.Printf("Face: x: %d y: %d w: %d h: %d\n", x, y, width, height) | ||
} | ||
|
||
// Draw a filled circle where the face is | ||
draw2dkit.Ellipse( | ||
gc, | ||
float64(x+(width/2)), | ||
float64(y+(height/2)), | ||
float64(width/2), | ||
float64(height)/2) | ||
gc.SetFillColor(color.RGBA{255, 0, 0, 255}) | ||
gc.Fill() | ||
} | ||
return nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
package main | ||
|
||
import ( | ||
"fmt" | ||
"image" | ||
_ "image/jpeg" | ||
_ "image/png" | ||
"log" | ||
"os" | ||
|
||
"github.com/muesli/smartcrop" | ||
// "github.com/muesli/smartcrop/gocv" | ||
"github.com/muesli/smartcrop/nfnt" | ||
) | ||
|
||
func main() { | ||
if len(os.Args) < 2 { | ||
fmt.Println("Please give me an argument") | ||
os.Exit(1) | ||
} | ||
|
||
f, _ := os.Open(os.Args[1]) | ||
img, _, _ := image.Decode(f) | ||
|
||
l := smartcrop.Logger{ | ||
DebugMode: true, | ||
Log: log.New(os.Stderr, "", 0), | ||
} | ||
|
||
analyzer := smartcrop.NewAnalyzerWithLogger(nfnt.NewDefaultResizer(), l) | ||
|
||
/* | ||
To replace skin detection with gocv-based face detection: | ||
|
||
analyzer.SetDetectors([]smartcrop.Detector{ | ||
&smartcrop.EdgeDetector{}, | ||
&gocv.FaceDetector{"./cascade.xml", true}, | ||
&smartcrop.SaturationDetector{}, | ||
}) | ||
*/ | ||
|
||
topCrop, _ := analyzer.FindBestCrop(img, 300, 200) | ||
|
||
// The crop will have the requested aspect ratio, but you need to copy/scale it yourself | ||
fmt.Printf("Top crop: %+v\n", topCrop) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -54,7 +54,7 @@ var ( | |
|
||
const ( | ||
detailWeight = 0.2 | ||
skinBias = 0.01 | ||
skinBias = 0.9 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This probably isn't the best idea, as it changes the current behavior (without face detection). We should introduce a separate bias here, maybe. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, that's a valid concern. I ended up with this since now the score function does not know whether skin or face was detected. Would it be useful to be able to override these values, or are the same values useful for all use cases? If it is useful some kind of API for providing overrides could solve this problem. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe each |
||
skinBrightnessMin = 0.2 | ||
skinBrightnessMax = 1.0 | ||
skinThreshold = 0.8 | ||
|
@@ -81,6 +81,7 @@ const ( | |
// width and height returns an error if invalid | ||
type Analyzer interface { | ||
FindBestCrop(img image.Image, width, height int) (image.Rectangle, error) | ||
SetDetectors(ds []Detector) | ||
} | ||
|
||
// Score contains values that classify matches | ||
|
@@ -102,8 +103,18 @@ type Logger struct { | |
Log *log.Logger | ||
} | ||
|
||
/* | ||
Detector contains a method that detects either skin, features or saturation. Its Detect method writes | ||
the detected skin, features or saturation to red, green and blue channels, respectively. | ||
*/ | ||
type Detector interface { | ||
Name() string | ||
Detect(original *image.RGBA, sharedResult *image.RGBA) error | ||
} | ||
|
||
type smartcropAnalyzer struct { | ||
logger Logger | ||
detectors []Detector | ||
logger Logger | ||
options.Resizer | ||
} | ||
|
||
|
@@ -121,7 +132,19 @@ func NewAnalyzerWithLogger(resizer options.Resizer, logger Logger) Analyzer { | |
if logger.Log == nil { | ||
logger.Log = log.New(ioutil.Discard, "", 0) | ||
} | ||
return &smartcropAnalyzer{Resizer: resizer, logger: logger} | ||
|
||
// Set default detectors here | ||
detectors := []Detector{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a nice idea, but requires a bit more restructuring, I think. For example, if you initialize smartcrop with only the FaceDetector, the debug output looks a bit funky. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, when this goes public the interface needs to be a bit stricter. I witnessed that by unintentionally changing the order of detectors, and got my detected face overwritten by the artist formerly known as edgeDetect. I'll try to figure something out. |
||
&EdgeDetector{}, | ||
&SkinDetector{}, | ||
&SaturationDetector{}, | ||
} | ||
|
||
return &smartcropAnalyzer{detectors: detectors, Resizer: resizer, logger: logger} | ||
} | ||
|
||
func (o *smartcropAnalyzer) SetDetectors(ds []Detector) { | ||
o.detectors = ds | ||
} | ||
|
||
func (o smartcropAnalyzer) FindBestCrop(img image.Image, width, height int) (image.Rectangle, error) { | ||
|
@@ -163,7 +186,7 @@ func (o smartcropAnalyzer) FindBestCrop(img image.Image, width, height int) (ima | |
o.logger.Log.Printf("original resolution: %dx%d\n", img.Bounds().Dx(), img.Bounds().Dy()) | ||
o.logger.Log.Printf("scale: %f, cropw: %f, croph: %f, minscale: %f\n", scale, cropWidth, cropHeight, realMinScale) | ||
|
||
topCrop, err := analyse(o.logger, lowimg, cropWidth, cropHeight, realMinScale) | ||
topCrop, err := analyse(o.logger, o.detectors, lowimg, cropWidth, cropHeight, realMinScale) | ||
if err != nil { | ||
return topCrop, err | ||
} | ||
|
@@ -249,25 +272,24 @@ func score(output *image.RGBA, crop Crop) Score { | |
return score | ||
} | ||
|
||
func analyse(logger Logger, img *image.RGBA, cropWidth, cropHeight, realMinScale float64) (image.Rectangle, error) { | ||
func analyse(logger Logger, detectors []Detector, img *image.RGBA, cropWidth, cropHeight, realMinScale float64) (image.Rectangle, error) { | ||
o := image.NewRGBA(img.Bounds()) | ||
|
||
now := time.Now() | ||
edgeDetect(img, o) | ||
logger.Log.Println("Time elapsed edge:", time.Since(now)) | ||
debugOutput(logger.DebugMode, o, "edge") | ||
|
||
now = time.Now() | ||
skinDetect(img, o) | ||
logger.Log.Println("Time elapsed skin:", time.Since(now)) | ||
debugOutput(logger.DebugMode, o, "skin") | ||
|
||
now = time.Now() | ||
saturationDetect(img, o) | ||
logger.Log.Println("Time elapsed sat:", time.Since(now)) | ||
debugOutput(logger.DebugMode, o, "saturation") | ||
/* | ||
Run each detector. They write to R (skin), G (features) and B (saturation) channels on image 'o'. | ||
The score function will use that information. | ||
*/ | ||
for _, d := range detectors { | ||
start := time.Now() | ||
err := d.Detect(img, o) | ||
if err != nil { | ||
return image.Rectangle{}, err | ||
} | ||
logger.Log.Printf("Time elapsed detecting %s: %s\n", d.Name(), time.Since(start)) | ||
debugOutput(logger.DebugMode, o, d.Name()) | ||
} | ||
|
||
now = time.Now() | ||
now := time.Now() | ||
var topCrop Crop | ||
topScore := -1.0 | ||
cs := crops(o, cropWidth, cropHeight, realMinScale) | ||
|
@@ -361,7 +383,13 @@ func makeCies(img *image.RGBA) []float64 { | |
return cies | ||
} | ||
|
||
func edgeDetect(i *image.RGBA, o *image.RGBA) { | ||
type EdgeDetector struct{} | ||
|
||
func (d *EdgeDetector) Name() string { | ||
return "edge" | ||
} | ||
|
||
func (d *EdgeDetector) Detect(i *image.RGBA, o *image.RGBA) error { | ||
width := i.Bounds().Dx() | ||
height := i.Bounds().Dy() | ||
cies := makeCies(i) | ||
|
@@ -384,9 +412,16 @@ func edgeDetect(i *image.RGBA, o *image.RGBA) { | |
o.SetRGBA(x, y, nc) | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
func skinDetect(i *image.RGBA, o *image.RGBA) { | ||
type SkinDetector struct{} | ||
|
||
func (d *SkinDetector) Name() string { | ||
return "skin" | ||
} | ||
|
||
func (d *SkinDetector) Detect(i *image.RGBA, o *image.RGBA) error { | ||
width := i.Bounds().Dx() | ||
height := i.Bounds().Dy() | ||
|
||
|
@@ -406,9 +441,16 @@ func skinDetect(i *image.RGBA, o *image.RGBA) { | |
} | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
type SaturationDetector struct{} | ||
|
||
func (d *SaturationDetector) Name() string { | ||
return "saturation" | ||
} | ||
|
||
func saturationDetect(i *image.RGBA, o *image.RGBA) { | ||
func (d *SaturationDetector) Detect(i *image.RGBA, o *image.RGBA) error { | ||
width := i.Bounds().Dx() | ||
height := i.Bounds().Dy() | ||
|
||
|
@@ -428,6 +470,7 @@ func saturationDetect(i *image.RGBA, o *image.RGBA) { | |
} | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
func crops(i image.Image, cropWidth, cropHeight, realMinScale float64) []Crop { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should think about injecting the Logger here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes that seems smart