diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 62ecd89..c237859 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,20 +12,20 @@ - id: detect-private-key files: (?!.*tar.gz)^.*$ - id: end-of-file-fixer - files: \.md$ + files: \.(md|py|mm|swift|java|c|cc|cxx|cpp|cu|h|hpp|hxx)$ - id: trailing-whitespace - files: \.md$ + files: \.(md|py|mm|swift|java|c|cc|cxx|cpp|cu|h|hpp|hxx)$ - repo: https://github.com/Lucas-C/pre-commit-hooks.git sha: v1.0.1 hooks: - id: forbid-crlf - files: \.(md|c|cc|cxx|cpp|cu|h|hpp|hxx)$ + files: \.(md|py|mm|swift|java|c|cc|cxx|cpp|cu|h|hpp|hxx)$ - id: remove-crlf - files: \.(md|c|cc|cxx|cpp|cu|h|hpp|hxx)$ + files: \.(md|py|mm|swift|java|c|cc|cxx|cpp|cu|h|hpp|hxx)$ - id: forbid-tabs - files: \.(md|c|cc|cxx|cpp|cu|h|hpp|hxx)$ + files: \.(md|py|mm|swift|java|c|cc|cxx|cpp|cu|h|hpp|hxx)$ - id: remove-tabs - files: \.(md|c|cc|cxx|cpp|cu|h|hpp|hxx)$ + files: \.(md|py|mm|swift|java|c|cc|cxx|cpp|cu|h|hpp|hxx)$ - repo: local hooks: - id: clang-format-with-version-check diff --git a/Demo/iOS/AICamera/AICamera/AICamera-Bridging-Header.h b/Demo/iOS/AICamera/AICamera/AICamera-Bridging-Header.h index 74b8357..8119416 100644 --- a/Demo/iOS/AICamera/AICamera/AICamera-Bridging-Header.h +++ b/Demo/iOS/AICamera/AICamera/AICamera-Bridging-Header.h @@ -1,5 +1,6 @@ // -// Use this file to import your target's public headers that you would like to expose to Swift. +// Use this file to import your target's public headers that you would like to +// expose to Swift. // #import "ImageRecognizerPaddleWrapper.h" diff --git a/Demo/iOS/AICamera/AICamera/AppDelegate.swift b/Demo/iOS/AICamera/AICamera/AppDelegate.swift index b524e79..71244c2 100644 --- a/Demo/iOS/AICamera/AICamera/AppDelegate.swift +++ b/Demo/iOS/AICamera/AICamera/AppDelegate.swift @@ -46,4 +46,3 @@ class AppDelegate: UIResponder, UIApplicationDelegate { } - diff --git a/Demo/iOS/AICamera/AICamera/ImageRecognizer.swift b/Demo/iOS/AICamera/AICamera/ImageRecognizer.swift index 18e197a..5fcd0e7 100644 --- a/Demo/iOS/AICamera/AICamera/ImageRecognizer.swift +++ b/Demo/iOS/AICamera/AICamera/ImageRecognizer.swift @@ -14,20 +14,20 @@ protocol ImageRecognizerDelegate { } class ImageRecognizer { - + var imageRecognizer: ImageRecognizerPaddleWrapper? - + init(model: SSDModel) { imageRecognizer = ImageRecognizerPaddleWrapper(model: model.rawValue, withNormHeight: model.normDimension().0, withNormWidth: model.normDimension().1) } - + func inference(imageBuffer: UnsafeMutablePointer!, width: Int32, height: Int32, score: Float) -> NSMutableArray! { - + return imageRecognizer?.inference(imageBuffer, withHeight: height, withWidth: width, withFilterScore: score) } - + func release() { imageRecognizer?.destroy() } - -} \ No newline at end of file + +} diff --git a/Demo/iOS/AICamera/AICamera/ImageRecognizerPaddleWrapper.h b/Demo/iOS/AICamera/AICamera/ImageRecognizerPaddleWrapper.h index a330c8c..c5839be 100644 --- a/Demo/iOS/AICamera/AICamera/ImageRecognizerPaddleWrapper.h +++ b/Demo/iOS/AICamera/AICamera/ImageRecognizerPaddleWrapper.h @@ -13,8 +13,13 @@ @interface ImageRecognizerPaddleWrapper : NSObject -- (instancetype)initWithModel:(NSString*)modelFileName withNormHeight:(int)height withNormWidth:(int)width; -- (NSMutableArray*)inference:(unsigned char *)pixels withHeight:(int)height withWidth:(int)width withFilterScore:(float) filterScore; +- (instancetype)initWithModel:(NSString *)modelFileName + withNormHeight:(int)height + withNormWidth:(int)width; +- (NSMutableArray *)inference:(unsigned char *)pixels + withHeight:(int)height + withWidth:(int)width + withFilterScore:(float)filterScore; - (void)destroy; @end diff --git a/Demo/iOS/AICamera/AICamera/ImageRecognizerPaddleWrapper.mm b/Demo/iOS/AICamera/AICamera/ImageRecognizerPaddleWrapper.mm index 0d829d6..7b2b641 100644 --- a/Demo/iOS/AICamera/AICamera/ImageRecognizerPaddleWrapper.mm +++ b/Demo/iOS/AICamera/AICamera/ImageRecognizerPaddleWrapper.mm @@ -32,13 +32,13 @@ - (instancetype)initWithModel:(NSString*)modelFileName withNormHeight:(int)heigh { int channel = 3; const std::vector means({104, 117, 124}); - + NSBundle* bundle = [NSBundle mainBundle]; NSString* resourceDirectoryPath = [bundle bundlePath]; NSString* path = [[resourceDirectoryPath stringByAppendingString:@"/"] stringByAppendingString:modelFileName]; - + self->recognizer.init([path UTF8String], height, width, channel, means); - + } return self; } @@ -48,14 +48,14 @@ - (NSMutableArray*)inference:(unsigned char *)pixels withHeight:(int)height with int channel = 4; image::Config config(image::kBGR, image::CLOCKWISE_R90); self->recognizer.infer(pixels, height, width, channel, config, result); - + NSMutableArray *array = [[NSMutableArray alloc] initWithCapacity:result.height]; int w = result.width; - + for (int i = 0; i < result.height; i++) { float score = result.data[i * w + 2]; if (score < filterScore) continue; - + SSDData *ssdData = [[SSDData alloc] init]; ssdData.label = kLabels[(int) result.data[i * w + 1]]; ssdData.accuracy = score; @@ -63,10 +63,10 @@ - (NSMutableArray*)inference:(unsigned char *)pixels withHeight:(int)height with ssdData.ymin = result.data[i * w + 4]; ssdData.xmax = result.data[i * w + 5]; ssdData.ymax = result.data[i * w + 6]; - + [array addObject:ssdData]; } - + return array; } diff --git a/Demo/iOS/AICamera/AICamera/SSDData.h b/Demo/iOS/AICamera/AICamera/SSDData.h index 2745648..b52a59d 100644 --- a/Demo/iOS/AICamera/AICamera/SSDData.h +++ b/Demo/iOS/AICamera/AICamera/SSDData.h @@ -10,11 +10,11 @@ @interface SSDData : NSObject -@property (nonatomic) NSString *label; -@property (nonatomic) float accuracy; -@property (nonatomic) float xmin; -@property (nonatomic) float ymin; -@property (nonatomic) float xmax; -@property (nonatomic) float ymax; +@property(nonatomic) NSString *label; +@property(nonatomic) float accuracy; +@property(nonatomic) float xmin; +@property(nonatomic) float ymin; +@property(nonatomic) float xmax; +@property(nonatomic) float ymax; -@end \ No newline at end of file +@end diff --git a/Demo/iOS/AICamera/AICamera/SSDDrawLayer.swift b/Demo/iOS/AICamera/AICamera/SSDDrawLayer.swift index 6dfb651..fdd677f 100644 --- a/Demo/iOS/AICamera/AICamera/SSDDrawLayer.swift +++ b/Demo/iOS/AICamera/AICamera/SSDDrawLayer.swift @@ -10,41 +10,41 @@ import UIKit class SSDDrawLayer: CAShapeLayer { var labelLayer = CATextLayer() - + required override init() { super.init() } - + required init?(coder aDecoder: NSCoder) { fatalError("init(coder:) has not been implemented") } - + func render(_ data: SSDData, model:SSDModel, isBackCamera:Bool) { - + let screenWidth = UIScreen.main.bounds.size.width let screenHeight = UIScreen.main.bounds.size.height - + let x = CGFloat(isBackCamera ? data.xmin : 1 - data.xmax) * screenWidth let y = CGFloat(data.ymin) * screenHeight let width = CGFloat(data.xmax - data.xmin) * screenWidth let height = CGFloat(data.ymax - data.ymin) * screenHeight - + if (model == SSDModel.FaceMobileNet160 && data.label != "aeroplane") { return; } - + //draw box self.path = UIBezierPath(roundedRect: CGRect(x: x, y: y, width: width, height: height), cornerRadius: 10).cgPath self.strokeColor = UIColor.cyan.cgColor self.lineWidth = 4.0 self.fillColor = nil self.lineJoin = kCALineJoinBevel - + if (model == SSDModel.FaceMobileNet160) { //do not draw label for face return; } - + let text = String.init(format: "%@: %.02f", data.label, data.accuracy) var displayString = NSAttributedString(string: text, attributes: [ NSStrokeColorAttributeName : UIColor.black, @@ -52,9 +52,9 @@ class SSDDrawLayer: CAShapeLayer { NSStrokeWidthAttributeName : NSNumber(value: -6.0), NSFontAttributeName : UIFont.systemFont(ofSize: 20, weight: 3) ]) - + //draw label - + labelLayer.string = displayString labelLayer.frame = CGRect.init(x: x + 4, y: y + height - 22, width: 1000, height: 30) addSublayer(labelLayer) diff --git a/Demo/iOS/AICamera/AICamera/SSDModel.swift b/Demo/iOS/AICamera/AICamera/SSDModel.swift index 711b2dc..0e9ef90 100644 --- a/Demo/iOS/AICamera/AICamera/SSDModel.swift +++ b/Demo/iOS/AICamera/AICamera/SSDModel.swift @@ -12,7 +12,7 @@ enum SSDModel : String { case PascalMobileNet300 = "pascal_mobilenet_300_66.paddle" case FaceMobileNet160 = "face_mobilenet_160_91.paddle" case PascalVGG300 = "vgg_ssd_net.paddle" - + func normDimension() -> (Int32, Int32) { switch self @@ -26,4 +26,3 @@ enum SSDModel : String { } } } - diff --git a/Demo/iOS/AICamera/AICamera/SSDMultiboxLayer.swift b/Demo/iOS/AICamera/AICamera/SSDMultiboxLayer.swift index 3d17e21..c1556ec 100644 --- a/Demo/iOS/AICamera/AICamera/SSDMultiboxLayer.swift +++ b/Demo/iOS/AICamera/AICamera/SSDMultiboxLayer.swift @@ -9,16 +9,16 @@ import UIKit class SSDMultiboxLayer: CALayer { - + func displayBoxs(with ssdDataList: NSMutableArray, model: SSDModel, isBackCamera: Bool){ self.sublayers?.forEach({ (layer) in layer.removeFromSuperlayer() }) - + for ssdData in ssdDataList { let boxLayer = SSDDrawLayer.init() boxLayer.render(ssdData as! SSDData, model: model, isBackCamera: isBackCamera) - + self.addSublayer(boxLayer) } } diff --git a/Demo/iOS/AICamera/AICamera/ViewController.swift b/Demo/iOS/AICamera/AICamera/ViewController.swift index 9ee0d99..aba38c5 100644 --- a/Demo/iOS/AICamera/AICamera/ViewController.swift +++ b/Demo/iOS/AICamera/AICamera/ViewController.swift @@ -12,28 +12,28 @@ import Foundation class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate { - + var captureSession : AVCaptureSession? var multiboxLayer : SSDMultiboxLayer? var previewLayer : AVCaptureVideoPreviewLayer? var captureDevice : AVCaptureDevice? - + var isRestarting = false; - + var imageRecognizer : ImageRecognizer? - + var timeStamp : TimeInterval? - + var index = 0 - + //default settings var ssdModel : SSDModel = SSDModel.PascalMobileNet300 var accuracyThreshold : Float = 0.5 var minTimeInterval : Float = 0.3 var backCamera = true - + @IBOutlet weak var settingsView: UIView! - + @IBOutlet weak var accuracyLabel: UILabel! @IBOutlet weak var timeRefreshLabel: UILabel! @IBOutlet weak var pascalMobileNetBtn: UIButton! @@ -43,123 +43,123 @@ class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDele @IBOutlet weak var frontCameraBtn: UIButton! @IBOutlet weak var accuracySlider: UISlider! @IBOutlet weak var timeRefreshSlider: UISlider! - - + + @IBAction func pascalMobileNet300Click(_ sender: UIButton) { pendingRestartWithNewModel(newModel: SSDModel.PascalMobileNet300) } - + @IBAction func faceMobileNet300Click(_ sender: UIButton) { pendingRestartWithNewModel(newModel: SSDModel.FaceMobileNet160) } - + @IBAction func pascalVgg300Click(_ sender: UIButton) { pendingRestartWithNewModel(newModel: SSDModel.PascalVGG300) } - + @IBAction func backCameraClick(_ sender: UIButton) { pendingRestartWithCamera(backCamera: true) } - + @IBAction func frontCameraClick(_ sender: UIButton) { pendingRestartWithCamera(backCamera: false) } - + @IBAction func accurcyThresholdChanged(_ sender: UISlider) { - + accuracyThreshold = sender.value accuracyLabel.text = String.init(format: "%.02f", accuracyThreshold) let defaults = UserDefaults.standard defaults.set(accuracyThreshold, forKey: "accuracyThreshold") } - + @IBAction func timeRefreshChanged(_ sender: UISlider) { - + minTimeInterval = sender.value timeRefreshLabel.text = String.init(format: "%.02f", minTimeInterval) let defaults = UserDefaults.standard defaults.set(minTimeInterval, forKey: "timeRefresh") } - + func pendingRestartWithNewModel(newModel: SSDModel) { - + if ssdModel == newModel { return; } - + let defaults = UserDefaults.standard defaults.set(newModel.rawValue , forKey: "model") - + isRestarting = true ssdModel = newModel } - - + + func pendingRestartWithCamera(backCamera: Bool) { - + if self.backCamera == backCamera { return; } - + let defaults = UserDefaults.standard defaults.set(backCamera , forKey: "backCamera") - + isRestarting = true self.backCamera = backCamera } - + func restart() { //hack: just make it crash so that we can restart exit(0) DispatchQueue.main.async { self.timeStamp = nil self.index = 0; - + self.imageRecognizer?.release() self.imageRecognizer = ImageRecognizer(model: self.ssdModel) - + self.captureSession?.stopRunning() - + self.previewLayer?.removeFromSuperlayer() self.multiboxLayer?.removeFromSuperlayer() self.setupVideoCaptureAndStart() - + self.isRestarting = false } } - + func toggleSettings(_ sender:UITapGestureRecognizer){ settingsView.isHidden = !settingsView.isHidden } - + override func viewDidLoad() { super.viewDidLoad() - + self.settingsView.isHidden = true - + checkModel() - + populateInitialSettings() - + let gesture = UITapGestureRecognizer(target: self, action: #selector (self.toggleSettings (_:))) self.view.addGestureRecognizer(gesture) - + imageRecognizer = ImageRecognizer(model: ssdModel) - + setupVideoCaptureAndStart() } - + func checkModel() { var bundlePath = Bundle.main.bundlePath bundlePath.append("/") bundlePath.append(SSDModel.PascalVGG300.rawValue) pascalVgg300Btn.isHidden = !FileManager.default.fileExists(atPath: bundlePath) } - + func populateInitialSettings() { - + let defaults = UserDefaults.standard - + if let modelStr = defaults.string(forKey:"model") { self.ssdModel = SSDModel(rawValue: modelStr)! } @@ -173,11 +173,11 @@ class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDele } highlightBtn?.titleLabel?.font = UIFont.boldSystemFont(ofSize: 16) highlightBtn?.setTitleColor(self.view.tintColor, for: .normal) - + if let backCamera = defaults.object(forKey: "backCamera") { self.backCamera = backCamera as! Bool } - + if self.backCamera { backCameraBtn.titleLabel?.font = UIFont.boldSystemFont(ofSize: 16) backCameraBtn.setTitleColor(self.view.tintColor, for: .normal) @@ -185,30 +185,30 @@ class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDele frontCameraBtn.titleLabel?.font = UIFont.boldSystemFont(ofSize: 16) frontCameraBtn.setTitleColor(self.view.tintColor, for: .normal) } - + if let accuracyThreshold = defaults.object(forKey: "accuracyThreshold") { self.accuracyThreshold = accuracyThreshold as! Float accuracySlider.setValue(self.accuracyThreshold, animated: false) } - + if let timeRefresh = defaults.object(forKey: "timeRefresh") { self.minTimeInterval = timeRefresh as! Float timeRefreshSlider.setValue(self.minTimeInterval, animated: false) } - + accuracyLabel.text = String.init(format: "%.02f", accuracyThreshold) timeRefreshLabel.text = String.init(format: "%.02f", minTimeInterval) - + } - + func setupVideoCaptureAndStart() { - + captureSession = AVCaptureSession() if let captureSession = captureSession { captureSession.sessionPreset = AVCaptureSessionPresetHigh - + captureDevice = AVCaptureDeviceDiscoverySession(deviceTypes: [AVCaptureDeviceType.builtInWideAngleCamera], mediaType: AVMediaTypeVideo, position: backCamera ? AVCaptureDevicePosition.back : AVCaptureDevicePosition.front).devices.first - + // setup video device input do { let videoDeviceInput: AVCaptureDeviceInput @@ -218,14 +218,14 @@ class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDele catch { fatalError("Could not create AVCaptureDeviceInput instance with error: \(error).") } - + captureSession.beginConfiguration() guard captureSession.canAddInput(videoDeviceInput) else { fatalError("CaptureSession can not add input") } captureSession.addInput(videoDeviceInput) } - + // setup preview let previewContainer = self.view.layer let previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)! @@ -234,10 +234,10 @@ class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDele previewLayer.videoGravity = AVLayerVideoGravityResizeAspect previewContainer.insertSublayer(previewLayer, at: 0) self.previewLayer = previewLayer - + multiboxLayer = SSDMultiboxLayer() previewContainer.insertSublayer(multiboxLayer!, at: 1) - + // setup video output do { let videoDataOutput = AVCaptureVideoDataOutput() @@ -247,12 +247,12 @@ class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDele fatalError("CaptureSession can not add output") } captureSession.addOutput(videoDataOutput) - + captureSession.commitConfiguration() - + let queue = DispatchQueue(label: "com.paddlepaddle.SSDDemo") videoDataOutput.setSampleBufferDelegate(self, queue: queue) - + if let connection = videoDataOutput.connection(withMediaType: AVMediaTypeVideo) { if connection.isVideoOrientationSupported { // Force recording to portrait @@ -264,12 +264,12 @@ class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDele } } } - + func captureOutput(_ output: AVCaptureOutput, didDrop sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) { } - + func captureOutput(_ output: AVCaptureOutput, didOutputSampleBuffer sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) { - + if let ts = self.timeStamp { while(true) { if (NSDate().timeIntervalSince1970 >= Double(minTimeInterval) + ts) { @@ -277,39 +277,39 @@ class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDele } } } - + index = index + 1 if let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) { CVPixelBufferLockBaseAddress(imageBuffer, CVPixelBufferLockFlags(rawValue: 0)) - + let width = CVPixelBufferGetWidth(imageBuffer) let height = CVPixelBufferGetHeight(imageBuffer) let baseAddress = CVPixelBufferGetBaseAddress(imageBuffer) - + let intBuffer = unsafeBitCast(baseAddress, to: UnsafeMutablePointer.self) - + CVPixelBufferUnlockBaseAddress(imageBuffer, CVPixelBufferLockFlags(rawValue: 0)) - + let ssdDataList = imageRecognizer?.inference(imageBuffer: intBuffer, width: Int32(width), height: Int32(height), score: accuracyThreshold) - + self.timeStamp = NSDate().timeIntervalSince1970 - + DispatchQueue.main.async { self.multiboxLayer?.displayBoxs(with: ssdDataList!, model:self.ssdModel, isBackCamera:self.backCamera) } } - + if (isRestarting) { restart() } } - - + + override func didReceiveMemoryWarning() { super.didReceiveMemoryWarning() - + print("didReceiveMemoryWarning") // Dispose of any resources that can be recreated. } - + } diff --git a/Demo/iOS/AICamera/AICamera/image.h b/Demo/iOS/AICamera/AICamera/image.h index fe6553a..1fd92f4 100644 --- a/Demo/iOS/AICamera/AICamera/image.h +++ b/Demo/iOS/AICamera/AICamera/image.h @@ -1,40 +1,40 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License */ + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License */ #pragma once namespace image { - - enum Order { kCHW = 0, kHWC = 1 }; - - enum Format { - kRGB = 0x1, // support RGB, RGBA - kBGR = 0x2 // support BGR, BGRA - }; - - enum RotateOption { - NO_ROTATE = 0, - CLOCKWISE_R90 = 1, - CLOCKWISE_R180 = 2, - CLOCKWISE_R270 = 3 - }; - - struct Config { - Config() : format(kRGB), option(NO_ROTATE) {} - Config(Format f, RotateOption o) : format(f), option(o) {} - Format format; - RotateOption option; - }; - + +enum Order { kCHW = 0, kHWC = 1 }; + +enum Format { + kRGB = 0x1, // support RGB, RGBA + kBGR = 0x2 // support BGR, BGRA +}; + +enum RotateOption { + NO_ROTATE = 0, + CLOCKWISE_R90 = 1, + CLOCKWISE_R180 = 2, + CLOCKWISE_R270 = 3 +}; + +struct Config { + Config() : format(kRGB), option(NO_ROTATE) {} + Config(Format f, RotateOption o) : format(f), option(o) {} + Format format; + RotateOption option; +}; + } // namespace image diff --git a/benchmark/README.md b/benchmark/README.md index a3aa91a..36df99d 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -9,5 +9,4 @@ Inference benchmark of deep learning models implemented by paddlepaddle. Note: - ST means using single-threaded calculation. -- MI 5, Android 7.0, Snapdragon 820 1.8GHz - +- MI 5, Android 7.0, Snapdragon 820 1.8GHz diff --git a/benchmark/tool/C/inference.cc b/benchmark/tool/C/inference.cc index 23e8f89..e4a1a72 100644 --- a/benchmark/tool/C/inference.cc +++ b/benchmark/tool/C/inference.cc @@ -12,15 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include -#include #include -#include +#include +#include + #include +#include -inline paddle_error& operator |=(paddle_error& a, paddle_error b) { - return a = - static_cast(static_cast(a) | static_cast(b)); +inline paddle_error& operator|=(paddle_error& a, paddle_error b) { + return a = static_cast(static_cast(a) | + static_cast(b)); } class Timer { @@ -31,8 +32,8 @@ class Timer { ~Timer() { struct timespec tp_end; clock_gettime(CLOCK_MONOTONIC, &tp_end); - float time = ((tp_end.tv_nsec - tp_start.tv_nsec)/1000000.0f); - time += (tp_end.tv_sec - tp_start.tv_sec)*1000; + float time = ((tp_end.tv_nsec - tp_start.tv_nsec) / 1000000.0f); + time += (tp_end.tv_sec - tp_start.tv_sec) * 1000; time /= iter_; std::cout << "Time of " << name_ << " " << time << " ms." << std::endl; } @@ -90,7 +91,7 @@ int main(int argc, char* argv[]) { void* buf = NULL; read_file(merged_model.c_str(), &buf, &size); paddle_gradient_machine_create_for_inference_with_parameters( - &machine, buf, size); + &machine, buf, size); free(buf); } else { // Reading config binary file. It is generated by `convert_protobin.sh` @@ -102,15 +103,15 @@ int main(int argc, char* argv[]) { read_file(predict_config.c_str(), &buf, &size); } - error |= - paddle_gradient_machine_create_for_inference(&machine, buf, (int)size); + error |= + paddle_gradient_machine_create_for_inference(&machine, buf, (int)size); if (predict_model.empty()) { error |= paddle_gradient_machine_randomize_param(machine); } else { Timer time("load model parameter"); error |= paddle_gradient_machine_load_parameter_from_disk( - machine, predict_model.c_str()); + machine, predict_model.c_str()); } free(buf); } diff --git a/deployment/library/build_for_minimum_size.md b/deployment/library/build_for_minimum_size.md index cdd9534..cfa4a01 100644 --- a/deployment/library/build_for_minimum_size.md +++ b/deployment/library/build_for_minimum_size.md @@ -4,7 +4,7 @@ In the mobile application, there usually are some limitations to the size of an Here we explore how to compile the inference library for minimum size. **Note:** -In the original PaddlePaddle, all computationally relevant code is implemented in Matrix.cpp and BaseMatrix.cu, +In the original PaddlePaddle, all computationally relevant code is implemented in Matrix.cpp and BaseMatrix.cu, this causes the large compiled Matrix.o and BaseMatrix.o files which can not be split. The module of Layer can be split, but the Layer forward and backward computing is included in the same file. The configuration definition in proto is redundant. These all will lead to the size of inference library larger. @@ -24,4 +24,3 @@ But don't forget to use `-no-whole-archive` after `libpaddle_capi_layers.a`, avo - Remove useless symbols in the shared library: When building a shared library by `libpaddle_capi_layers.a` and `libpaddle_capi_engine.a`, you can remove the useless export symbols with the `--version-script` option to reduce the size of the `.dynsym` and `.dynstr` sections. [Here's an example](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/capi/CMakeLists.txt#L61) - diff --git a/deployment/model/merge_batch_normalization/demo/merge_batch_norm.py b/deployment/model/merge_batch_normalization/demo/merge_batch_norm.py index ae0a043..c701e66 100755 --- a/deployment/model/merge_batch_normalization/demo/merge_batch_norm.py +++ b/deployment/model/merge_batch_normalization/demo/merge_batch_norm.py @@ -7,15 +7,19 @@ from paddle.v2.topology import Topology from mobilenet_with_bn import mobile_net + def parse_args(): """Parse input arguments.""" - parser = argparse.ArgumentParser(description='merge batch normalization to Conv or Fc') - parser.add_argument('--source_model', - help='the source model that should be merged', - type=str) - parser.add_argument('--dest_model', - help='the dest model name, we save our merged model of this name', - type=str) + parser = argparse.ArgumentParser( + description='merge batch normalization to Conv or Fc') + parser.add_argument( + '--source_model', + help='the source model that should be merged', + type=str) + parser.add_argument( + '--dest_model', + help='the dest model name, we save our merged model of this name', + type=str) return parser.parse_args() @@ -32,7 +36,6 @@ def __init__(self, source_net, source_model, dest_model): self.dest_model = dest_model - def fuse_param(self, current_layer, bn_layer): ''' fuse the bn_layer' parameters to current_layer @@ -62,17 +65,18 @@ def fuse_param(self, current_layer, bn_layer): std_bn = np.float32(np.sqrt(np.add(var_bn, 1e-5))) tmp1 = np.float32(np.divide(a_bn, std_bn)) - bias = np.float32(np.add(np.multiply(np.subtract(bias, mean_bn), tmp1), b_bn)) + bias = np.float32( + np.add(np.multiply(np.subtract(bias, mean_bn), tmp1), b_bn)) tmp1 = tmp1.reshape(tmp1.shape[1], -1) param = param.reshape((tmp1.shape[0], -1)) param = np.float32(np.multiply(param, tmp1)) if param_name in self.dest_param.names(): - print ('Merge ' + param_name + ' SUCCEED ') + print('Merge ' + param_name + ' SUCCEED ') param_shape = self.dest_param.get(param_name).shape self.dest_param.set(param_name, param.reshape(param_shape)) if bias_name in self.dest_param.names(): - print ('Merge ' + bias_name + ' SUCCEED ') + print('Merge ' + bias_name + ' SUCCEED ') bias_shape = self.dest_param.get(bias_name).shape self.dest_param.set(bias_name, bias.reshape(bias_shape)) @@ -86,18 +90,18 @@ def save_layer_without_bn(self, current_layer): param = self.source_param.get(param_name) if param_name in self.dest_param.names(): - print ('Merge ' + param_name + ' SUCCEED ') + print('Merge ' + param_name + ' SUCCEED ') self.dest_param.set(param_name, param) if bias_name: bias = self.source_param.get(bias_name) if bias_name in self.dest_param.names(): - print ('Merge ' + bias_name + ' SUCCEED ') + print('Merge ' + bias_name + ' SUCCEED ') self.dest_param.set(bias_name, bias) def merge(self): ''' - Merge batch norm + Merge batch norm Currently, the default layer with parameters are fc and exconv layers. ''' layer_num = len(self.source_layers) @@ -105,10 +109,10 @@ def merge(self): while i < layer_num: current_layer = self.source_layers[i] - + if current_layer.type in ['exconv', 'fc']: if (i + 1 < layer_num and - self.source_layers[i + 1].type == 'batch_norm'): + self.source_layers[i + 1].type == 'batch_norm'): self.fuse_param(current_layer, self.source_layers[i + 1]) i = i + 2 continue @@ -130,9 +134,10 @@ def merge(self): base_path = os.path.dirname(os.path.realpath(__file__)) # Attr: net, source_model_path, target_model_path - mb = Merge_BN(net, os.path.join(base_path, 'models', args.source_model), - os.path.join(base_path, 'models', args.dest_model)) + mb = Merge_BN(net, + os.path.join(base_path, 'models', args.source_model), + os.path.join(base_path, 'models', args.dest_model)) mb.merge() - print ('Merged SUCCESS!\n' ) + print('Merged SUCCESS!\n') - print ('Then Run verify.py to verify the correctness of merged model :)') + print('Then Run verify.py to verify the correctness of merged model :)') diff --git a/deployment/model/merge_batch_normalization/demo/mobilenet_with_bn.py b/deployment/model/merge_batch_normalization/demo/mobilenet_with_bn.py index d84bffa..8749f80 100644 --- a/deployment/model/merge_batch_normalization/demo/mobilenet_with_bn.py +++ b/deployment/model/merge_batch_normalization/demo/mobilenet_with_bn.py @@ -1,8 +1,14 @@ # edit-mode: -*- python -*- import paddle.v2 as paddle -def conv_bn_layer(input, filter_size, num_filters, - stride, padding, channels=None, num_groups=1, + +def conv_bn_layer(input, + filter_size, + num_filters, + stride, + padding, + channels=None, + num_groups=1, active_type=paddle.activation.Relu(), layer_type=None): """ @@ -11,114 +17,133 @@ def conv_bn_layer(input, filter_size, num_filters, conv layer has no activation. """ tmp = paddle.layer.img_conv( - input=input, - filter_size=filter_size, - num_channels=channels, - num_filters=num_filters, - stride=stride, - padding=padding, - groups=num_groups, - act=paddle.activation.Linear(), - bias_attr=False, - layer_type=layer_type) - return paddle.layer.batch_norm( - input=tmp, - act=active_type) + input=input, + filter_size=filter_size, + num_channels=channels, + num_filters=num_filters, + stride=stride, + padding=padding, + groups=num_groups, + act=paddle.activation.Linear(), + bias_attr=False, + layer_type=layer_type) + return paddle.layer.batch_norm(input=tmp, act=active_type) + -def depthwise_separable(input, num_filters1, num_filters2, num_groups, stride, scale): +def depthwise_separable(input, num_filters1, num_filters2, num_groups, stride, + scale): """ """ tmp = conv_bn_layer( - input=input, - filter_size=3, - num_filters=int(num_filters1*scale), - stride=stride, - padding=1, - num_groups=int(num_groups*scale), layer_type='exconv') + input=input, + filter_size=3, + num_filters=int(num_filters1 * scale), + stride=stride, + padding=1, + num_groups=int(num_groups * scale), + layer_type='exconv') tmp = conv_bn_layer( - input=tmp, - filter_size=1, - num_filters=int(num_filters2*scale), - stride=1, - padding=0) + input=tmp, + filter_size=1, + num_filters=int(num_filters2 * scale), + stride=1, + padding=0) return tmp -def mobile_net(img_size, class_num, scale = 1.0): + +def mobile_net(img_size, class_num, scale=1.0): img = paddle.layer.data( name="image", type=paddle.data_type.dense_vector(img_size)) # conv1: 112x112 - tmp = conv_bn_layer(img, - filter_size=3, - channels=3, - num_filters=int(32*scale), - stride=2, - padding=1) + tmp = conv_bn_layer( + img, + filter_size=3, + channels=3, + num_filters=int(32 * scale), + stride=2, + padding=1) # 56x56 - tmp = depthwise_separable(tmp, - num_filters1=32, - num_filters2=64, - num_groups=32, - stride=1, scale = scale) - tmp = depthwise_separable(tmp, - num_filters1=64, - num_filters2=128, - num_groups=64, - stride=2, scale = scale) + tmp = depthwise_separable( + tmp, + num_filters1=32, + num_filters2=64, + num_groups=32, + stride=1, + scale=scale) + tmp = depthwise_separable( + tmp, + num_filters1=64, + num_filters2=128, + num_groups=64, + stride=2, + scale=scale) # 28x28 - tmp = depthwise_separable(tmp, - num_filters1=128, - num_filters2=128, - num_groups=128, - stride=1, scale = scale) - tmp = depthwise_separable(tmp, - num_filters1=128, - num_filters2=256, - num_groups=128, - stride=2, scale = scale) + tmp = depthwise_separable( + tmp, + num_filters1=128, + num_filters2=128, + num_groups=128, + stride=1, + scale=scale) + tmp = depthwise_separable( + tmp, + num_filters1=128, + num_filters2=256, + num_groups=128, + stride=2, + scale=scale) # 14x14 - tmp = depthwise_separable(tmp, - num_filters1=256, - num_filters2=256, - num_groups=256, - stride=1, scale = scale) - tmp = depthwise_separable(tmp, - num_filters1=256, - num_filters2=512, - num_groups=256, - stride=2, scale = scale) + tmp = depthwise_separable( + tmp, + num_filters1=256, + num_filters2=256, + num_groups=256, + stride=1, + scale=scale) + tmp = depthwise_separable( + tmp, + num_filters1=256, + num_filters2=512, + num_groups=256, + stride=2, + scale=scale) # 14x14 for i in range(5): - tmp = depthwise_separable(tmp, - num_filters1=512, - num_filters2=512, - num_groups=512, - stride=1, scale = scale) + tmp = depthwise_separable( + tmp, + num_filters1=512, + num_filters2=512, + num_groups=512, + stride=1, + scale=scale) # 7x7 - tmp = depthwise_separable(tmp, - num_filters1=512, - num_filters2=1024, - num_groups=512, - stride=2, scale = scale) - tmp = depthwise_separable(tmp, - num_filters1=1024, - num_filters2=1024, - num_groups=1024, - stride=1, scale = scale) + tmp = depthwise_separable( + tmp, + num_filters1=512, + num_filters2=1024, + num_groups=512, + stride=2, + scale=scale) + tmp = depthwise_separable( + tmp, + num_filters1=1024, + num_filters2=1024, + num_groups=1024, + stride=1, + scale=scale) tmp = paddle.layer.img_pool( - input=tmp, - pool_size=7, - stride=1, - pool_type=paddle.pooling.Avg()) + input=tmp, pool_size=7, stride=1, pool_type=paddle.pooling.Avg()) out = paddle.layer.fc( input=tmp, size=class_num, act=paddle.activation.Softmax()) return out + if __name__ == '__main__': img_size = 3 * 224 * 224 data_dim = 102 diff --git a/deployment/model/merge_batch_normalization/demo/mobilenet_without_bn.py b/deployment/model/merge_batch_normalization/demo/mobilenet_without_bn.py index dcf887c..db33f8c 100644 --- a/deployment/model/merge_batch_normalization/demo/mobilenet_without_bn.py +++ b/deployment/model/merge_batch_normalization/demo/mobilenet_without_bn.py @@ -1,127 +1,154 @@ # edit-mode: -*- python -*- import paddle.v2 as paddle -def conv_bn_layer(input, filter_size, num_filters, - stride, padding, channels=None, num_groups=1, + +def conv_bn_layer(input, + filter_size, + num_filters, + stride, + padding, + channels=None, + num_groups=1, active_type=paddle.activation.Relu(), - layer_type=None): + layer_type=None): """ A wrapper for conv layer with batch normalization layers. Note: conv layer has no activation. """ tmp = paddle.layer.img_conv( - input=input, - filter_size=filter_size, - num_channels=channels, - num_filters=num_filters, - stride=stride, - padding=padding, - groups=num_groups, - # !!! the act in the network with batch norm - # is paddle.activation.Linear() - act=active_type, - # !!! the bias_attr in origin network is False - bias_attr=True, - layer_type=layer_type) + input=input, + filter_size=filter_size, + num_channels=channels, + num_filters=num_filters, + stride=stride, + padding=padding, + groups=num_groups, + # !!! the act in the network with batch norm + # is paddle.activation.Linear() + act=active_type, + # !!! the bias_attr in origin network is False + bias_attr=True, + layer_type=layer_type) # !!! we have deleted the batch_norm layer here. return tmp -def depthwise_separable(input, num_filters1, num_filters2, num_groups, stride, scale): + +def depthwise_separable(input, num_filters1, num_filters2, num_groups, stride, + scale): """ """ tmp = conv_bn_layer( - input=input, - filter_size=3, - num_filters=int(num_filters1*scale), - stride=stride, - padding=1, - num_groups=int(num_groups*scale), layer_type='exconv') + input=input, + filter_size=3, + num_filters=int(num_filters1 * scale), + stride=stride, + padding=1, + num_groups=int(num_groups * scale), + layer_type='exconv') tmp = conv_bn_layer( - input=tmp, - filter_size=1, - num_filters=int(num_filters2*scale), - stride=1, - padding=0) + input=tmp, + filter_size=1, + num_filters=int(num_filters2 * scale), + stride=1, + padding=0) return tmp -def mobile_net(img_size, class_num, scale = 1.0): + +def mobile_net(img_size, class_num, scale=1.0): img = paddle.layer.data( name="image", type=paddle.data_type.dense_vector(img_size)) # conv1: 112x112 - tmp = conv_bn_layer(img, - filter_size=3, - channels=3, - num_filters=int(32*scale), - stride=2, - padding=1) + tmp = conv_bn_layer( + img, + filter_size=3, + channels=3, + num_filters=int(32 * scale), + stride=2, + padding=1) # 56x56 - tmp = depthwise_separable(tmp, - num_filters1=32, - num_filters2=64, - num_groups=32, - stride=1, scale = scale) - tmp = depthwise_separable(tmp, - num_filters1=64, - num_filters2=128, - num_groups=64, - stride=2, scale = scale) + tmp = depthwise_separable( + tmp, + num_filters1=32, + num_filters2=64, + num_groups=32, + stride=1, + scale=scale) + tmp = depthwise_separable( + tmp, + num_filters1=64, + num_filters2=128, + num_groups=64, + stride=2, + scale=scale) # 28x28 - tmp = depthwise_separable(tmp, - num_filters1=128, - num_filters2=128, - num_groups=128, - stride=1, scale = scale) - tmp = depthwise_separable(tmp, - num_filters1=128, - num_filters2=256, - num_groups=128, - stride=2, scale = scale) + tmp = depthwise_separable( + tmp, + num_filters1=128, + num_filters2=128, + num_groups=128, + stride=1, + scale=scale) + tmp = depthwise_separable( + tmp, + num_filters1=128, + num_filters2=256, + num_groups=128, + stride=2, + scale=scale) # 14x14 - tmp = depthwise_separable(tmp, - num_filters1=256, - num_filters2=256, - num_groups=256, - stride=1, scale = scale) - tmp = depthwise_separable(tmp, - num_filters1=256, - num_filters2=512, - num_groups=256, - stride=2, scale = scale) + tmp = depthwise_separable( + tmp, + num_filters1=256, + num_filters2=256, + num_groups=256, + stride=1, + scale=scale) + tmp = depthwise_separable( + tmp, + num_filters1=256, + num_filters2=512, + num_groups=256, + stride=2, + scale=scale) # 14x14 for i in range(5): - tmp = depthwise_separable(tmp, - num_filters1=512, - num_filters2=512, - num_groups=512, - stride=1, scale = scale) + tmp = depthwise_separable( + tmp, + num_filters1=512, + num_filters2=512, + num_groups=512, + stride=1, + scale=scale) # 7x7 - tmp = depthwise_separable(tmp, - num_filters1=512, - num_filters2=1024, - num_groups=512, - stride=2, scale = scale) - tmp = depthwise_separable(tmp, - num_filters1=1024, - num_filters2=1024, - num_groups=1024, - stride=1, scale = scale) + tmp = depthwise_separable( + tmp, + num_filters1=512, + num_filters2=1024, + num_groups=512, + stride=2, + scale=scale) + tmp = depthwise_separable( + tmp, + num_filters1=1024, + num_filters2=1024, + num_groups=1024, + stride=1, + scale=scale) tmp = paddle.layer.img_pool( - input=tmp, - pool_size=7, - stride=1, - pool_type=paddle.pooling.Avg()) + input=tmp, pool_size=7, stride=1, pool_type=paddle.pooling.Avg()) out = paddle.layer.fc( input=tmp, size=class_num, act=paddle.activation.Softmax()) return out + if __name__ == '__main__': img_size = 3 * 224 * 224 data_dim = 102 diff --git a/deployment/model/merge_batch_normalization/demo/pre_generate_model.py b/deployment/model/merge_batch_normalization/demo/pre_generate_model.py index f4709e1..3fc0bd4 100755 --- a/deployment/model/merge_batch_normalization/demo/pre_generate_model.py +++ b/deployment/model/merge_batch_normalization/demo/pre_generate_model.py @@ -19,19 +19,22 @@ import paddle.v2 as paddle from mobilenet_without_bn import mobile_net + def parse_args(): """Parse input arguments.""" - parser = argparse.ArgumentParser(description='we pre-generate model parameters without BN') - parser.add_argument('--model_name', - help='name the pre-generate model name', - type=str) + parser = argparse.ArgumentParser( + description='we pre-generate model parameters without BN') + parser.add_argument( + '--model_name', help='name the pre-generate model name', type=str) return parser.parse_args() + def generate_model(net, model_path): with gzip.open(model_path, 'w') as f: paddle.parameters.create(net).to_tar(f) print 'SUCCESS! ', 'we pre-generate our model without bn in ', model_path + if __name__ == '__main__': args = parse_args() diff --git a/deployment/model/merge_batch_normalization/demo/verify.py b/deployment/model/merge_batch_normalization/demo/verify.py index ef5f22d..e5741ae 100755 --- a/deployment/model/merge_batch_normalization/demo/verify.py +++ b/deployment/model/merge_batch_normalization/demo/verify.py @@ -31,10 +31,10 @@ def infer(net, model): cur_dir = os.path.dirname(os.path.realpath(__file__)) inference = Inference(output_layer=net, parameters=parameters) - + test_img = load_image(cur_dir + '/image/cat.jpg') test_data = [] - + test_data.append((test_img, )) sum = 0.0 loops_num = 1 @@ -43,11 +43,12 @@ def infer(net, model): probs = inference.infer(field='value', input=test_data) end = time.time() sum += (end - start) - + print 'time :', sum / loops_num print 'class : ', probs[0].argmax() print 'prob : ', probs[0].max() + def load_image(file, resize_size=256, crop_size=224, mean_file=None): # load image im = cv2.imread(file) @@ -66,18 +67,17 @@ def load_image(file, resize_size=256, crop_size=224, mean_file=None): h_end, w_end = h_start + crop_size, w_start + crop_size im = im[h_start:h_end, w_start:w_end, :] # transpose to CHW order - mean = np.array([103.94,116.78,123.68]) + mean = np.array([103.94, 116.78, 123.68]) im = im - mean im = im.transpose((2, 0, 1)) - #im = im * 0.017 + #im = im * 0.017 return im - if __name__ == '__main__': - img_size = 3 * 224 * 224 + img_size = 3 * 224 * 224 class_num = 102 paddle.init(use_gpu=False, trainer_count=1) diff --git a/deployment/model/rounding/README.md b/deployment/model/rounding/README.md index ec0f9cf..14594db 100644 --- a/deployment/model/rounding/README.md +++ b/deployment/model/rounding/README.md @@ -1,4 +1,4 @@ -# Rounding +# Rounding ### Rounding test of Mobilenet(1.0, 224 * 224 input) on flowers102 dataset. @@ -12,17 +12,15 @@ |Mobilenet|√|√||3.0M|96.96%|[download](https://pan.baidu.com/s/1bo66hUR)| - ## Principle Given floating parameters `V`, first our goal is to represent `V` as 8-bit integers `V'`. And then we transformed back `V'` back into its approximate high-precision value by performing the inverse of the quantization operation. At last, we perform gzip to our quantized && inverse-quantized model. The whole process can reduces our model by 70%. ### Process -- First, we use UInt8 quantization, that is, the parameters are sacled to [0, 255] +- First, we use UInt8 quantization, that is, the parameters are sacled to [0, 255] ![](./source/rounding1.png) -- Second, we inverse the quantization. - ![](./source/rounding2.png) +- Second, we inverse the quantization. + ![](./source/rounding2.png) In the last, We apply gzip to compress the inverse-quantized model, and the compression ratio can be up to 70%. - diff --git a/model_compression/pruning/README.md b/model_compression/pruning/README.md index 1564148..a3545c3 100644 --- a/model_compression/pruning/README.md +++ b/model_compression/pruning/README.md @@ -16,21 +16,20 @@ Let's assume that the size of parameters in a layer is `M` and current sparsity Paddle uses an **automatic, gradual pruning** approach. We use `interval_pass`, `sparsity_upper_bound` and `end_pass`to control the process of this. The parameters are pruned every `interval_pass` pass (**a pass represents a epoch**) as the network is fine-tuned to gradually increase the sparsity while allowing the network recover from any pruning-induced loss in accuracy. The network will reach `sparsity_upper_bound` sparsity finally, and the whole process will undergo `end_pass/inter_pass` times pruning. -As shown below, we use a **log function for sparsity changes**. We cut our network more aggressively in the initial stage for there exists a lot of redundant parameters and gradually reduced the number of the parameters being cutted for there are less redundant parameters in late stage and it's helpful for our network recover from pruning-induced loss in accuracy. +As shown below, we use a **log function for sparsity changes**. We cut our network more aggressively in the initial stage for there exists a lot of redundant parameters and gradually reduced the number of the parameters being cutted for there are less redundant parameters in late stage and it's helpful for our network recover from pruning-induced loss in accuracy. ![](../image/model.png) - ### Usage: -```python +```python from paddle.v2.attr import Hook from paddle.v2.attr import ParamAttr -# The interval_pass value defalut is 3, end_pass value default is 60 +# The interval_pass value defalut is 3, end_pass value default is 60 pa = ParamAttr(update_hooks = Hook('dynamic_pruning', sparsity_upper_bound=0.75, interval_pass=1, end_pass=3)) -# for conv layer +# for conv layer paddle.layer.img_conv(input=input, filter_size=3, num_channels=32, @@ -47,11 +46,11 @@ out = paddle.layer.fc(input=input, ``` ### Demo of Mobilenet pruning - + Mobilenet is based on depthwise separable convolution that consists a `depthwise convolution` followed by a 1*1 convolution called `pointwise convolution`. About 99% parameters are from `pointwise convolution` and last `fully-connected layer`, so we only prune those two type layers in Mobilenet. **1**. Download the Mobilenet model pre-trained on flower102 - + |Model|Dataset|Accuracy|Download| |---|---|---|---| |Mobilenet|flowers102|97.16%|[Download from BaiduCloud](https://pan.baidu.com/s/1geHkrw3)| @@ -62,11 +61,11 @@ Mobilenet is based on depthwise separable convolution that consists a `depthwise python ./demo/train.py ``` -**3**. Result - +**3**. Result + we evaluated the result in accuracy and modle size. -|--| mobilenet |mobilenet pruning| +|--| mobilenet |mobilenet pruning| |---| --- | --- | |accuracy| 0.9716 |0.970 | |model size| 12M | 4.3M | diff --git a/model_compression/pruning/demo/mobilenet_pruning.py b/model_compression/pruning/demo/mobilenet_pruning.py index 31968e6..2b9f78c 100644 --- a/model_compression/pruning/demo/mobilenet_pruning.py +++ b/model_compression/pruning/demo/mobilenet_pruning.py @@ -1,134 +1,163 @@ # edit-mode: -*- python -*- import paddle.v2 as paddle -from paddle.v2.attr import Hook -from paddle.v2.attr import ParamAttr +from paddle.v2.attr import Hook +from paddle.v2.attr import ParamAttr -def conv_bn_layer(input, filter_size, num_filters, - stride, padding, channels=None, num_groups=1, - active_type=paddle.activation.Relu(), param_attr=None, - layer_type=None): + +def conv_bn_layer(input, + filter_size, + num_filters, + stride, + padding, + channels=None, + num_groups=1, + active_type=paddle.activation.Relu(), + param_attr=None, + layer_type=None): """ A wrapper for conv layer with batch normalization layers. Note: conv layer has no activation. """ tmp = paddle.layer.img_conv( - input=input, - filter_size=filter_size, - num_channels=channels, - num_filters=num_filters, - stride=stride, - padding=padding, - groups=num_groups, - param_attr=param_attr, - act=paddle.activation.Linear(), - bias_attr=False, - layer_type=layer_type) - - return paddle.layer.batch_norm( - input=tmp, - act=active_type) - -def depthwise_separable(input, num_filters1, num_filters2, num_groups, stride, scale): + input=input, + filter_size=filter_size, + num_channels=channels, + num_filters=num_filters, + stride=stride, + padding=padding, + groups=num_groups, + param_attr=param_attr, + act=paddle.activation.Linear(), + bias_attr=False, + layer_type=layer_type) + + return paddle.layer.batch_norm(input=tmp, act=active_type) + + +def depthwise_separable(input, num_filters1, num_filters2, num_groups, stride, + scale): """ """ tmp = conv_bn_layer( - input=input, - filter_size=3, - num_filters=int(num_filters1*scale), - stride=stride, - padding=1, - num_groups=int(num_groups*scale), - layer_type='exconv') - - pa0 = ParamAttr(update_hooks = Hook('dynamic_pruning', sparsity_upper_bound=0.75)) + input=input, + filter_size=3, + num_filters=int(num_filters1 * scale), + stride=stride, + padding=1, + num_groups=int(num_groups * scale), + layer_type='exconv') + + pa0 = ParamAttr(update_hooks=Hook( + 'dynamic_pruning', sparsity_upper_bound=0.75)) tmp = conv_bn_layer( - input=tmp, - filter_size=1, - num_filters=int(num_filters2*scale), - stride=1, - padding=0, - param_attr = pa0) + input=tmp, + filter_size=1, + num_filters=int(num_filters2 * scale), + stride=1, + padding=0, + param_attr=pa0) return tmp -def mobile_net(img_size, class_num, scale = 1.0): + +def mobile_net(img_size, class_num, scale=1.0): img = paddle.layer.data( name="image", type=paddle.data_type.dense_vector(img_size)) # conv1: 112x112 - tmp = conv_bn_layer(img, - filter_size=3, - channels=3, - num_filters=int(32*scale), - stride=2, - padding=1) + tmp = conv_bn_layer( + img, + filter_size=3, + channels=3, + num_filters=int(32 * scale), + stride=2, + padding=1) # 56x56 - tmp = depthwise_separable(tmp, - num_filters1=32, - num_filters2=64, - num_groups=32, - stride=1, scale = scale) - tmp = depthwise_separable(tmp, - num_filters1=64, - num_filters2=128, - num_groups=64, - stride=2, scale = scale) + tmp = depthwise_separable( + tmp, + num_filters1=32, + num_filters2=64, + num_groups=32, + stride=1, + scale=scale) + tmp = depthwise_separable( + tmp, + num_filters1=64, + num_filters2=128, + num_groups=64, + stride=2, + scale=scale) # 28x28 - tmp = depthwise_separable(tmp, - num_filters1=128, - num_filters2=128, - num_groups=128, - stride=1, scale = scale) - tmp = depthwise_separable(tmp, - num_filters1=128, - num_filters2=256, - num_groups=128, - stride=2, scale = scale) + tmp = depthwise_separable( + tmp, + num_filters1=128, + num_filters2=128, + num_groups=128, + stride=1, + scale=scale) + tmp = depthwise_separable( + tmp, + num_filters1=128, + num_filters2=256, + num_groups=128, + stride=2, + scale=scale) # 14x14 - tmp = depthwise_separable(tmp, - num_filters1=256, - num_filters2=256, - num_groups=256, - stride=1, scale = scale) - tmp = depthwise_separable(tmp, - num_filters1=256, - num_filters2=512, - num_groups=256, - stride=2, scale = scale) + tmp = depthwise_separable( + tmp, + num_filters1=256, + num_filters2=256, + num_groups=256, + stride=1, + scale=scale) + tmp = depthwise_separable( + tmp, + num_filters1=256, + num_filters2=512, + num_groups=256, + stride=2, + scale=scale) # 14x14 for i in range(5): - tmp = depthwise_separable(tmp, - num_filters1=512, - num_filters2=512, - num_groups=512, - stride=1, scale = scale) + tmp = depthwise_separable( + tmp, + num_filters1=512, + num_filters2=512, + num_groups=512, + stride=1, + scale=scale) # 7x7 - tmp = depthwise_separable(tmp, - num_filters1=512, - num_filters2=1024, - num_groups=512, - stride=2, scale = scale) - tmp = depthwise_separable(tmp, - num_filters1=1024, - num_filters2=1024, - num_groups=1024, - stride=1, scale = scale) + tmp = depthwise_separable( + tmp, + num_filters1=512, + num_filters2=1024, + num_groups=512, + stride=2, + scale=scale) + tmp = depthwise_separable( + tmp, + num_filters1=1024, + num_filters2=1024, + num_groups=1024, + stride=1, + scale=scale) tmp = paddle.layer.img_pool( - input=tmp, - pool_size=7, - stride=1, - pool_type=paddle.pooling.Avg()) + input=tmp, pool_size=7, stride=1, pool_type=paddle.pooling.Avg()) out = paddle.layer.fc( - input=tmp, size=class_num, act=paddle.activation.Softmax(), - param_attr = ParamAttr(update_hooks=Hook('dynamic_pruning', sparsity_upper_bound=0.8))) + input=tmp, + size=class_num, + act=paddle.activation.Softmax(), + param_attr=ParamAttr(update_hooks=Hook( + 'dynamic_pruning', sparsity_upper_bound=0.8))) return out + if __name__ == '__main__': img_size = 3 * 224 * 224 data_dim = 102 diff --git a/model_compression/pruning/demo/train.py b/model_compression/pruning/demo/train.py index 5a22d88..56f0dfa 100644 --- a/model_compression/pruning/demo/train.py +++ b/model_compression/pruning/demo/train.py @@ -20,17 +20,19 @@ from mobilenet_pruning import mobile_net BATCH = 40 + + def main(): - datadim = 3 * 224 * 224 + datadim = 3 * 224 * 224 classdim = 102 # PaddlePaddle init - paddle.init(use_gpu=True, trainer_count=1, gpu_id = 0) + paddle.init(use_gpu=True, trainer_count=1, gpu_id=0) momentum_optimizer = paddle.optimizer.Momentum( momentum=0.9, regularization=paddle.optimizer.L2Regularization(rate=0.0005 * BATCH), - learning_rate=0.001/ BATCH, + learning_rate=0.001 / BATCH, learning_rate_schedule='constant') out = mobile_net(datadim, classdim, 1.0) @@ -46,7 +48,7 @@ def main(): for param_name in fparameters.names(): if param_name in parameters.names(): parameters.set(param_name, fparameters.get(param_name)) - + # End batch and end pass event handler def event_handler(event): if isinstance(event, paddle.event.EndIteration): @@ -58,7 +60,8 @@ def event_handler(event): sys.stdout.flush() if isinstance(event, paddle.event.EndPass): # save parameters - with gzip.open('pruning_mobilenet_params_pass_%d.tar.gz' % event.pass_id, 'w') as f: + with gzip.open('pruning_mobilenet_params_pass_%d.tar.gz' % + event.pass_id, 'w') as f: parameters.to_tar(f) result = trainer.test( @@ -81,5 +84,6 @@ def event_handler(event): feeding={'image': 0, 'label': 1}) + if __name__ == '__main__': main() diff --git a/models/mobilenet_ssd_pascal/README.md b/models/mobilenet_ssd_pascal/README.md index 80972b7..4bb4511 100644 --- a/models/mobilenet_ssd_pascal/README.md +++ b/models/mobilenet_ssd_pascal/README.md @@ -1,4 +1,4 @@ -# Mobilenet SSD +# Mobilenet SSD We offer the mobilenet(1.0) ssd model trained on PASCAL VOC0712 dataset. This model can be deployed on embedded system and you can modify the network to adapt to your own application. @@ -13,4 +13,3 @@ and you can modify the network to adapt to your own application. ## train on your own dataset You can modify the network to adapt to your own application. PaddlePaddle provides a detailed document to show how to train your model with SSD, refer the document [here](https://github.com/PaddlePaddle/models/tree/develop/ssd). - diff --git a/models/mobilenet_ssd_pascal/config/pascal_voc_conf.py b/models/mobilenet_ssd_pascal/config/pascal_voc_conf.py index 0bf5af4..639868c 100644 --- a/models/mobilenet_ssd_pascal/config/pascal_voc_conf.py +++ b/models/mobilenet_ssd_pascal/config/pascal_voc_conf.py @@ -1,94 +1,90 @@ -from easydict import EasyDict as edict -import numpy as np - -__C = edict() -cfg = __C - -__C.TRAIN = edict() - -__C.IMG_WIDTH = 300 -__C.IMG_HEIGHT = 300 -__C.IMG_CHANNEL = 3 -__C.CLASS_NUM = 21 -__C.BACKGROUND_ID = 0 - -# training settings -__C.TRAIN.MOMENTUM = 0.9 -__C.TRAIN.BATCH_SIZE = 32 -__C.TRAIN.LEARNING_RATE = 0.0005 / 4 -#__C.TRAIN.LEARNING_RATE = 0.001 / __C.TRAIN.BATCH_SIZE -__C.TRAIN.NUM_PASS = 50000 -__C.TRAIN.L2REGULARIZATION = 0.0005 * 4 -#__C.TRAIN.L2REGULARIZATION = 0.0005 * __C.TRAIN.BATCH_SIZE -__C.TRAIN.LEARNING_RATE_DECAY_A = 0.1 -__C.TRAIN.LEARNING_RATE_DECAY_B = 16551 * 80 -__C.TRAIN.LEARNING_RATE_SCHEDULE = 'discexp' -#__C.TRAIN.LEARNING_RATE_SCHEDULE = 'constant' - -__C.NET = edict() - -# configuration for multibox_loss_layer -__C.NET.MBLOSS = edict() -__C.NET.MBLOSS.OVERLAP_THRESHOLD = 0.5 -__C.NET.MBLOSS.NEG_POS_RATIO = 3.0 -__C.NET.MBLOSS.NEG_OVERLAP = 0.5 - -# configuration for detection_map -__C.NET.DETMAP = edict() -__C.NET.DETMAP.OVERLAP_THRESHOLD = 0.5 -__C.NET.DETMAP.EVAL_DIFFICULT = False -__C.NET.DETMAP.AP_TYPE = "11point" - -# configuration for detection_output_layer -__C.NET.DETOUT = edict() -__C.NET.DETOUT.CONFIDENCE_THRESHOLD = 0.01 -__C.NET.DETOUT.NMS_THRESHOLD = 0.45 -__C.NET.DETOUT.NMS_TOP_K = 400 -__C.NET.DETOUT.KEEP_TOP_K = 200 - -################################################ -__C.NET.CONV11 = edict() -__C.NET.CONV11.PB = edict() -__C.NET.CONV11.PB.MIN_SIZE = [60] -__C.NET.CONV11.PB.ASPECT_RATIO = [2.] -#__C.NET.CONV11.PB.ASPECT_RATIO = [2.] -__C.NET.CONV11.PB.VARIANCE = [0.1, 0.1, 0.2, 0.2] - - -__C.NET.CONV13 = edict() -__C.NET.CONV13.PB = edict() -__C.NET.CONV13.PB.MIN_SIZE = [105] -__C.NET.CONV13.PB.MAX_SIZE = [150] -__C.NET.CONV13.PB.ASPECT_RATIO = [2., 3.] -__C.NET.CONV13.PB.VARIANCE = [0.1, 0.1, 0.2, 0.2] - - -__C.NET.CONV14_2 = edict() -__C.NET.CONV14_2.PB = edict() -__C.NET.CONV14_2.PB.MIN_SIZE = [150] -__C.NET.CONV14_2.PB.MAX_SIZE = [195] -__C.NET.CONV14_2.PB.ASPECT_RATIO = [2., 3.] -__C.NET.CONV14_2.PB.VARIANCE = [0.1, 0.1, 0.2, 0.2] - - -__C.NET.CONV15_2 = edict() -__C.NET.CONV15_2.PB = edict() -__C.NET.CONV15_2.PB.MIN_SIZE = [195] -__C.NET.CONV15_2.PB.MAX_SIZE = [240] -__C.NET.CONV15_2.PB.ASPECT_RATIO = [2., 3.] -__C.NET.CONV15_2.PB.VARIANCE = [0.1, 0.1, 0.2, 0.2] - -__C.NET.CONV16_2 = edict() -__C.NET.CONV16_2.PB = edict() -__C.NET.CONV16_2.PB.MIN_SIZE = [240] -__C.NET.CONV16_2.PB.MAX_SIZE = [285] -__C.NET.CONV16_2.PB.ASPECT_RATIO = [2., 3.] -__C.NET.CONV16_2.PB.VARIANCE = [0.1, 0.1, 0.2, 0.2] - -__C.NET.CONV17_2 = edict() -__C.NET.CONV17_2.PB = edict() -__C.NET.CONV17_2.PB.MIN_SIZE = [285] -__C.NET.CONV17_2.PB.MAX_SIZE = [300] -__C.NET.CONV17_2.PB.ASPECT_RATIO = [2., 3.] -__C.NET.CONV17_2.PB.VARIANCE = [0.1, 0.1, 0.2, 0.2] - +from easydict import EasyDict as edict +import numpy as np + +__C = edict() +cfg = __C + +__C.TRAIN = edict() + +__C.IMG_WIDTH = 300 +__C.IMG_HEIGHT = 300 +__C.IMG_CHANNEL = 3 +__C.CLASS_NUM = 21 +__C.BACKGROUND_ID = 0 + +# training settings +__C.TRAIN.MOMENTUM = 0.9 +__C.TRAIN.BATCH_SIZE = 32 +__C.TRAIN.LEARNING_RATE = 0.0005 / 4 +#__C.TRAIN.LEARNING_RATE = 0.001 / __C.TRAIN.BATCH_SIZE +__C.TRAIN.NUM_PASS = 50000 +__C.TRAIN.L2REGULARIZATION = 0.0005 * 4 +#__C.TRAIN.L2REGULARIZATION = 0.0005 * __C.TRAIN.BATCH_SIZE +__C.TRAIN.LEARNING_RATE_DECAY_A = 0.1 +__C.TRAIN.LEARNING_RATE_DECAY_B = 16551 * 80 +__C.TRAIN.LEARNING_RATE_SCHEDULE = 'discexp' +#__C.TRAIN.LEARNING_RATE_SCHEDULE = 'constant' + +__C.NET = edict() + +# configuration for multibox_loss_layer +__C.NET.MBLOSS = edict() +__C.NET.MBLOSS.OVERLAP_THRESHOLD = 0.5 +__C.NET.MBLOSS.NEG_POS_RATIO = 3.0 +__C.NET.MBLOSS.NEG_OVERLAP = 0.5 + +# configuration for detection_map +__C.NET.DETMAP = edict() +__C.NET.DETMAP.OVERLAP_THRESHOLD = 0.5 +__C.NET.DETMAP.EVAL_DIFFICULT = False +__C.NET.DETMAP.AP_TYPE = "11point" + +# configuration for detection_output_layer +__C.NET.DETOUT = edict() +__C.NET.DETOUT.CONFIDENCE_THRESHOLD = 0.01 +__C.NET.DETOUT.NMS_THRESHOLD = 0.45 +__C.NET.DETOUT.NMS_TOP_K = 400 +__C.NET.DETOUT.KEEP_TOP_K = 200 + +################################################ +__C.NET.CONV11 = edict() +__C.NET.CONV11.PB = edict() +__C.NET.CONV11.PB.MIN_SIZE = [60] +__C.NET.CONV11.PB.ASPECT_RATIO = [2.] +#__C.NET.CONV11.PB.ASPECT_RATIO = [2.] +__C.NET.CONV11.PB.VARIANCE = [0.1, 0.1, 0.2, 0.2] + +__C.NET.CONV13 = edict() +__C.NET.CONV13.PB = edict() +__C.NET.CONV13.PB.MIN_SIZE = [105] +__C.NET.CONV13.PB.MAX_SIZE = [150] +__C.NET.CONV13.PB.ASPECT_RATIO = [2., 3.] +__C.NET.CONV13.PB.VARIANCE = [0.1, 0.1, 0.2, 0.2] + +__C.NET.CONV14_2 = edict() +__C.NET.CONV14_2.PB = edict() +__C.NET.CONV14_2.PB.MIN_SIZE = [150] +__C.NET.CONV14_2.PB.MAX_SIZE = [195] +__C.NET.CONV14_2.PB.ASPECT_RATIO = [2., 3.] +__C.NET.CONV14_2.PB.VARIANCE = [0.1, 0.1, 0.2, 0.2] + +__C.NET.CONV15_2 = edict() +__C.NET.CONV15_2.PB = edict() +__C.NET.CONV15_2.PB.MIN_SIZE = [195] +__C.NET.CONV15_2.PB.MAX_SIZE = [240] +__C.NET.CONV15_2.PB.ASPECT_RATIO = [2., 3.] +__C.NET.CONV15_2.PB.VARIANCE = [0.1, 0.1, 0.2, 0.2] + +__C.NET.CONV16_2 = edict() +__C.NET.CONV16_2.PB = edict() +__C.NET.CONV16_2.PB.MIN_SIZE = [240] +__C.NET.CONV16_2.PB.MAX_SIZE = [285] +__C.NET.CONV16_2.PB.ASPECT_RATIO = [2., 3.] +__C.NET.CONV16_2.PB.VARIANCE = [0.1, 0.1, 0.2, 0.2] + +__C.NET.CONV17_2 = edict() +__C.NET.CONV17_2.PB = edict() +__C.NET.CONV17_2.PB.MIN_SIZE = [285] +__C.NET.CONV17_2.PB.MAX_SIZE = [300] +__C.NET.CONV17_2.PB.ASPECT_RATIO = [2., 3.] +__C.NET.CONV17_2.PB.VARIANCE = [0.1, 0.1, 0.2, 0.2] diff --git a/models/mobilenet_ssd_pascal/infer.py b/models/mobilenet_ssd_pascal/infer.py index df7434d..fbbd7a9 100644 --- a/models/mobilenet_ssd_pascal/infer.py +++ b/models/mobilenet_ssd_pascal/infer.py @@ -9,6 +9,7 @@ label_lists = open('./config/label_list').readlines() + def _infer(inferer, infer_data, threshold): ret = [] infer_res = inferer.infer(input=infer_data) @@ -21,6 +22,7 @@ def _infer(inferer, infer_data, threshold): ]) return ret + def draw_result(frame, ret_res, h, w): print ret_res for det_res in ret_res: @@ -32,33 +34,36 @@ def draw_result(frame, ret_res, h, w): xmax = int(round(det_res[5] * w)) ymax = int(round(det_res[6] * h)) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), - (0, (1 - xmin) * 255, xmin * 255), 2) - font=cv2.FONT_HERSHEY_SIMPLEX - cv2.putText(frame, label_lists[label + 1].strip(), (xmin + 10, ymin + 10), font, 1.0, (255, 0, 0), 2) + (0, (1 - xmin) * 255, xmin * 255), 2) + font = cv2.FONT_HERSHEY_SIMPLEX + cv2.putText(frame, label_lists[label + 1].strip(), + (xmin + 10, ymin + 10), font, 1.0, (255, 0, 0), 2) def pre_process(img): - img = cv2.resize(img, (cfg.IMG_HEIGHT, cfg.IMG_WIDTH), interpolation=cv2.INTER_AREA) + img = cv2.resize( + img, (cfg.IMG_HEIGHT, cfg.IMG_WIDTH), interpolation=cv2.INTER_AREA) # image should be RGB format img = img[:, :, ::-1] - # image shoud be in CHW format + # image shoud be in CHW format img = np.swapaxes(img, 1, 2) img = np.swapaxes(img, 1, 0) img = img.astype('float32') - img_mean = np.array([104, 117, 124])[:, np.newaxis, np.newaxis].astype( - 'float32') + img_mean = np.array( + [104, 117, 124])[:, np.newaxis, np.newaxis].astype('float32') img -= img_mean img = img.flatten() return img -def infer(model_path,threshold): - + +def infer(model_path, threshold): + net = net_conf(mode='infer') assert os.path.isfile(model_path), 'Invalid model.' parameters = paddle.parameters.Parameters.from_tar(gzip.open(model_path)) - + #build the inference network inferer = paddle.inference.Inference( output_layer=net, parameters=parameters) @@ -73,16 +78,16 @@ def infer(model_path,threshold): # preprocess the image img = pre_process(img) test_data.append([img]) - + #the forward process ret_res = _infer(inferer, test_data, threshold) - draw_result(frame, ret_res, h, w) + draw_result(frame, ret_res, h, w) cv2.imwrite('./images/result.jpg', frame) + if __name__ == "__main__": # init paddle environment - paddle.init(use_gpu=False, trainer_count=1, gpu_id= 3) + paddle.init(use_gpu=False, trainer_count=1, gpu_id=3) - infer(model_path='./mobilenet_ssd_pascal.tar.gz', - threshold=0.3) + infer(model_path='./mobilenet_ssd_pascal.tar.gz', threshold=0.3) diff --git a/models/mobilenet_ssd_pascal/mobilenet_ssd_pascal.py b/models/mobilenet_ssd_pascal/mobilenet_ssd_pascal.py index 9d218f3..29d7c34 100644 --- a/models/mobilenet_ssd_pascal/mobilenet_ssd_pascal.py +++ b/models/mobilenet_ssd_pascal/mobilenet_ssd_pascal.py @@ -1,425 +1,431 @@ -# edit-mode: -*- python -*- -import paddle.v2 as paddle -#from config.test_conf import cfg -from config.pascal_voc_conf import cfg - - -def net_conf(mode, scale = 1.0): - """Network configuration. Total three modes included 'train' 'eval' - and 'infer'. Loss and mAP evaluation layer will return if using 'train' - and 'eval'. In 'infer' mode, only detection output layer will be returned. - """ - default_l2regularization = cfg.TRAIN.L2REGULARIZATION - - default_bias_attr = paddle.attr.ParamAttr(l2_rate=0.0, learning_rate=2.0) - default_static_bias_attr = paddle.attr.ParamAttr(is_static=True) - - def get_param_attr(local_lr, regularization): - is_static = False - if local_lr == 0.0: - is_static = True - return paddle.attr.ParamAttr( - learning_rate=local_lr, l2_rate=regularization, is_static=is_static) - - def mbox_block(layer_name, input, num_channels, filter_size, loc_filters, - conf_filters): - #mbox_loc_name = layer_idx + "_mbox_loc" - mbox_loc = paddle.layer.img_conv( - #name = layer_name + '_' + 'loc', - input=input, - filter_size=filter_size, - num_channels=num_channels, - num_filters=loc_filters, - stride=1, - padding=0, - layer_type='exconv', - bias_attr=default_bias_attr, - param_attr=get_param_attr(1, default_l2regularization), - act=paddle.activation.Identity()) - - #mbox_conf_name = layer_idx + "_mbox_conf" - mbox_conf = paddle.layer.img_conv( - #name = layer_name + '_' + 'conf', - input=input, - filter_size=filter_size, - num_channels=num_channels, - num_filters=conf_filters, - stride=1, - padding=0, - layer_type='exconv', - bias_attr=default_bias_attr, - param_attr=get_param_attr(1, default_l2regularization), - act=paddle.activation.Identity()) - - return mbox_loc, mbox_conf - - def conv_bn_layer(input, filter_size, num_filters, - stride, padding, channels=None, num_groups=1, - active_type=paddle.activation.Relu(), name = None): - """ - A wrapper for conv layer with batch normalization layers. - Note: - conv layer has no activation. - """ - tmp = paddle.layer.img_conv( - #name = name, - input=input, - filter_size=filter_size, - num_channels=channels, - num_filters=num_filters, - stride=stride, - padding=padding, - groups=num_groups, - layer_type='exconv', - # !!! the act in the network with batch norm - # is paddle.activation.Linear() - act=active_type, - # !!! the bias_attr in origin network is False - bias_attr=True) - #print tmp.name - - # !!! we have deleted the batch_norm layer here. - return tmp - - def depthwise_separable(input, num_filters1, num_filters2, num_groups, stride): - """ - """ - tmp = conv_bn_layer(input=input, - filter_size=3, - num_filters=num_filters1, - stride=stride, - padding=1, - num_groups=num_groups) - - tmp = conv_bn_layer(input=tmp, - filter_size=1, - num_filters=num_filters2, - stride=1, - padding=0) - return tmp - - img = paddle.layer.data( - name='image', - type=paddle.data_type.dense_vector(cfg.IMG_CHANNEL * cfg.IMG_HEIGHT * - cfg.IMG_WIDTH), - height=cfg.IMG_HEIGHT, - width=cfg.IMG_WIDTH) - - # conv1: 112x112 - #"conv0" "conv0/relu" - conv0 = conv_bn_layer(img, - filter_size=3, - channels=3, - num_filters=int(32 * scale), - stride=2, - padding=1) - - # 56x56 - # "conv1/dw" "conv1/dw/relu" "conv1" "conv1/relu" - conv1 = depthwise_separable(conv0, - num_filters1=int(32 * scale), - num_filters2=int(64 * scale), - num_groups=int(32 * scale), - stride=1) - - #"conv2/dw" "conv2/dw/relu" "conv2" "conv2/relu" - conv2 = depthwise_separable(conv1, - num_filters1=int(64 * scale), - num_filters2=int(128 * scale), - num_groups=int(64 * scale), - stride=2) - # 28x28 - #"conv3/dw" "conv3/dw/relu" "conv3" "conv3/relu" - conv3 = depthwise_separable(conv2, - num_filters1=int(128*scale), - num_filters2=int(128*scale), - num_groups=int(128*scale), - stride=1) - - #"conv4/dw" "conv4/dw/relu" "conv4" "conv4/relu" - conv4 = depthwise_separable(conv3, - num_filters1=int(128*scale), - num_filters2=int(256*scale), - num_groups=int(128*scale), - stride=2) - - # 14x14 - #"conv5/dw" "conv5/dw/relu" "conv5" "conv5/relu" - conv5 = depthwise_separable(conv4, - num_filters1=int(256*scale), - num_filters2=int(256*scale), - num_groups=int(256*scale), - stride=1) - - #"conv6/dw" "conv6/dw/relu" "conv6" "conv6/relu" - conv6 = depthwise_separable(conv5, - num_filters1=int(256*scale), - num_filters2=int(512*scale), - num_groups=int(256*scale), - stride=2) - - tmp = conv6 - - # 14x14 - #"conv7/dw" "conv7/dw/relu" "conv7" "conv7/relu" - #conv7~11 - for i in range(5): - tmp = depthwise_separable(tmp, - num_filters1=int(512*scale), - num_filters2=int(512*scale), - num_groups=int(512*scale), - stride=1) - conv11 = tmp - - # 7x7 - #"conv12/dw" "conv12/dw/relu" "conv12" "conv12/relu" - conv12 = depthwise_separable(conv11, - num_filters1=int(512*scale), - num_filters2=int(1024*scale), - num_groups=int(512*scale), - stride=2) - - #"conv13/dw" "conv13/dw/relu" "conv13" "conv13/relu" - conv13 = depthwise_separable(conv12, - num_filters1=int(1024*scale), - num_filters2=int(1024*scale), - num_groups=int(1024*scale), - stride=1) - - # add begin - # conv14_1 "conv14_1/relu" - conv14_1 = conv_bn_layer( - #name = 'module3_1', - input=conv13, - filter_size=1, - num_filters=int(256*scale), - stride=1, - padding=0) - - #conv14_2 "conv14_2/relu" - conv14_2 = conv_bn_layer( - #name = 'module3_2', - input=conv14_1, - filter_size=3, - num_filters=int(512*scale), - stride=2, - padding=1) - - #conv15_1 "conv15_1/relu" - conv15_1 = conv_bn_layer( - # name = 'module4_1', - input=conv14_2, - filter_size=1, - num_filters=int(128*scale), - stride=1, - padding=0) - - #"conv15_2" "conv15_2/relu" - conv15_2 = conv_bn_layer( - #name = 'module4_2', - input=conv15_1, - filter_size=3, - num_filters=int(256*scale), - stride=2, - padding=1) - - #conv16_1 "conv16_1/relu" - conv16_1 = conv_bn_layer( - #name = 'module5_1', - input=conv15_2, - filter_size=1, - num_filters=int(128*scale), - stride=1, - padding=0) - - #"conv16_2" "conv16_2/relu" - conv16_2 = conv_bn_layer( - #name = 'module5_2', - input=conv16_1, - filter_size=3, - num_filters=int(256*scale), - stride=2, - padding=1) - - #conv17_1 conv17_1/relu - conv17_1 = conv_bn_layer( - #name = 'module6_1', - input=conv16_2, - filter_size=1, - num_filters=int(64*scale), - stride=1, - padding=0) - - #conv17_2 conv17_2/relu - conv17_2 = conv_bn_layer( - #name = 'module6_2', - input=conv17_1, - filter_size=3, - num_filters=int(128*scale), - stride=2, - padding=1) - - conv11_mbox_priorbox = paddle.layer.priorbox( - input=conv11, - image=img, - min_size=cfg.NET.CONV11.PB.MIN_SIZE, - aspect_ratio=cfg.NET.CONV11.PB.ASPECT_RATIO, - variance=cfg.NET.CONV11.PB.VARIANCE) - - conv11_norm = paddle.layer.cross_channel_norm( - name="conv11_norm", - input=conv11, - param_attr=paddle.attr.ParamAttr( - initial_mean=20, initial_std=0, is_static=False, learning_rate=1)) - - conv11_mbox_loc, conv11_mbox_conf= \ - mbox_block("module1", conv11_norm, int(512*scale), 1, 12, 63) # kernel_size=1 - - - conv13_mbox_priorbox = paddle.layer.priorbox( - input=conv13, - image=img, - min_size=cfg.NET.CONV13.PB.MIN_SIZE, - max_size=cfg.NET.CONV13.PB.MAX_SIZE, - aspect_ratio=cfg.NET.CONV13.PB.ASPECT_RATIO, - variance=cfg.NET.CONV13.PB.VARIANCE) - conv13_norm = paddle.layer.cross_channel_norm( - name="conv13_norm", - input=conv13, - param_attr=paddle.attr.ParamAttr( - initial_mean=20, initial_std=0, is_static=False, learning_rate=1)) - conv13_mbox_loc, conv13_mbox_conf= \ - mbox_block("module2", conv13_norm, int(1024*scale), 1, 24, 126) - - conv14_2_mbox_priorbox = paddle.layer.priorbox( - input=conv14_2, - image=img, - min_size=cfg.NET.CONV14_2.PB.MIN_SIZE, - max_size=cfg.NET.CONV14_2.PB.MAX_SIZE, - aspect_ratio=cfg.NET.CONV14_2.PB.ASPECT_RATIO, - variance=cfg.NET.CONV14_2.PB.VARIANCE) - conv14_2_norm = paddle.layer.cross_channel_norm( - name="conv14_2", - input=conv14_2, - param_attr=paddle.attr.ParamAttr( - initial_mean=20, initial_std=0, is_static=False, learning_rate=1)) - conv14_2_mbox_loc, conv14_2_mbox_conf= \ - mbox_block("module3", conv14_2_norm, int(512*scale), 1, 24, 126) - - conv15_2_mbox_priorbox = paddle.layer.priorbox( - input=conv15_2, - image=img, - min_size=cfg.NET.CONV15_2.PB.MIN_SIZE, - max_size=cfg.NET.CONV15_2.PB.MAX_SIZE, - aspect_ratio=cfg.NET.CONV15_2.PB.ASPECT_RATIO, - variance=cfg.NET.CONV15_2.PB.VARIANCE) - conv15_2_norm = paddle.layer.cross_channel_norm( - name="conv15_2_norm", - input=conv15_2, - param_attr=paddle.attr.ParamAttr( - initial_mean=20, initial_std=0, is_static=False, learning_rate=1)) - - conv15_2_mbox_loc, conv15_2_mbox_conf= \ - mbox_block("module4", conv15_2_norm, int(256*scale), 1, 24, 126) - - conv16_2_mbox_priorbox = paddle.layer.priorbox( - input=conv16_2, - image=img, - min_size=cfg.NET.CONV16_2.PB.MIN_SIZE, - max_size=cfg.NET.CONV16_2.PB.MAX_SIZE, - aspect_ratio=cfg.NET.CONV16_2.PB.ASPECT_RATIO, - variance=cfg.NET.CONV16_2.PB.VARIANCE) - conv16_2_norm = paddle.layer.cross_channel_norm( - name="conv16_2_norm", - input=conv16_2, - param_attr=paddle.attr.ParamAttr( - initial_mean=20, initial_std=0, is_static=False, learning_rate=1)) - conv16_2_mbox_loc, conv16_2_mbox_conf= \ - mbox_block("module5", conv16_2_norm, int(256*scale), 1, 24, 126) - - conv17_2_mbox_priorbox = paddle.layer.priorbox( - input=conv17_2, - image=img, - min_size=cfg.NET.CONV17_2.PB.MIN_SIZE, - max_size=cfg.NET.CONV17_2.PB.MAX_SIZE, - aspect_ratio=cfg.NET.CONV17_2.PB.ASPECT_RATIO, - variance=cfg.NET.CONV17_2.PB.VARIANCE) - conv17_2_norm = paddle.layer.cross_channel_norm( - name="conv17_2_norm", - input=conv17_2, - param_attr=paddle.attr.ParamAttr( - initial_mean=20, initial_std=0, is_static=False, learning_rate=1)) - conv17_2_mbox_loc, conv17_2_mbox_conf= \ - mbox_block("module6", conv17_2_norm, int(128*scale), 1, 24, 126) - - mbox_priorbox = paddle.layer.concat( - name="mbox_priorbox", - input=[ - conv11_mbox_priorbox - , conv13_mbox_priorbox - , conv14_2_mbox_priorbox - , conv15_2_mbox_priorbox - , conv16_2_mbox_priorbox - , conv17_2_mbox_priorbox - ]) - - - loc_loss_input = [ - conv11_mbox_loc - , conv13_mbox_loc - , conv14_2_mbox_loc - , conv15_2_mbox_loc - , conv16_2_mbox_loc - , conv17_2_mbox_loc - ] - - conf_loss_input = [ - conv11_mbox_conf - , conv13_mbox_conf - , conv14_2_mbox_conf - , conv15_2_mbox_conf - , conv16_2_mbox_conf - , conv17_2_mbox_conf - ] - - - detection_out = paddle.layer.detection_output( - input_loc=loc_loss_input, - input_conf=conf_loss_input, - priorbox=mbox_priorbox, - confidence_threshold=cfg.NET.DETOUT.CONFIDENCE_THRESHOLD, - nms_threshold=cfg.NET.DETOUT.NMS_THRESHOLD, - num_classes=cfg.CLASS_NUM, - nms_top_k=cfg.NET.DETOUT.NMS_TOP_K, - keep_top_k=cfg.NET.DETOUT.KEEP_TOP_K, - background_id=cfg.BACKGROUND_ID, - name="detection_output") - - if mode == 'train' or mode == 'eval': - bbox = paddle.layer.data( - name='bbox', type=paddle.data_type.dense_vector_sequence(6)) - loss = paddle.layer.multibox_loss( - input_loc=loc_loss_input, - input_conf=conf_loss_input, - priorbox=mbox_priorbox, - label=bbox, - num_classes=cfg.CLASS_NUM, - overlap_threshold=cfg.NET.MBLOSS.OVERLAP_THRESHOLD, - neg_pos_ratio=cfg.NET.MBLOSS.NEG_POS_RATIO, - neg_overlap=cfg.NET.MBLOSS.NEG_OVERLAP, - background_id=cfg.BACKGROUND_ID, - name="multibox_loss") - paddle.evaluator.detection_map( - input=detection_out, - label=bbox, - overlap_threshold=cfg.NET.DETMAP.OVERLAP_THRESHOLD, - background_id=cfg.BACKGROUND_ID, - evaluate_difficult=cfg.NET.DETMAP.EVAL_DIFFICULT, - ap_type=cfg.NET.DETMAP.AP_TYPE, - name="detection_evaluator") - return loss, detection_out - elif mode == 'infer': - return detection_out - -if __name__ == '__main__': - out = net_conf('infer', scale = 1.0) +# edit-mode: -*- python -*- +import paddle.v2 as paddle +#from config.test_conf import cfg +from config.pascal_voc_conf import cfg + + +def net_conf(mode, scale=1.0): + """Network configuration. Total three modes included 'train' 'eval' + and 'infer'. Loss and mAP evaluation layer will return if using 'train' + and 'eval'. In 'infer' mode, only detection output layer will be returned. + """ + default_l2regularization = cfg.TRAIN.L2REGULARIZATION + + default_bias_attr = paddle.attr.ParamAttr(l2_rate=0.0, learning_rate=2.0) + default_static_bias_attr = paddle.attr.ParamAttr(is_static=True) + + def get_param_attr(local_lr, regularization): + is_static = False + if local_lr == 0.0: + is_static = True + return paddle.attr.ParamAttr( + learning_rate=local_lr, l2_rate=regularization, is_static=is_static) + + def mbox_block(layer_name, input, num_channels, filter_size, loc_filters, + conf_filters): + #mbox_loc_name = layer_idx + "_mbox_loc" + mbox_loc = paddle.layer.img_conv( + #name = layer_name + '_' + 'loc', + input=input, + filter_size=filter_size, + num_channels=num_channels, + num_filters=loc_filters, + stride=1, + padding=0, + layer_type='exconv', + bias_attr=default_bias_attr, + param_attr=get_param_attr(1, default_l2regularization), + act=paddle.activation.Identity()) + + #mbox_conf_name = layer_idx + "_mbox_conf" + mbox_conf = paddle.layer.img_conv( + #name = layer_name + '_' + 'conf', + input=input, + filter_size=filter_size, + num_channels=num_channels, + num_filters=conf_filters, + stride=1, + padding=0, + layer_type='exconv', + bias_attr=default_bias_attr, + param_attr=get_param_attr(1, default_l2regularization), + act=paddle.activation.Identity()) + + return mbox_loc, mbox_conf + + def conv_bn_layer(input, + filter_size, + num_filters, + stride, + padding, + channels=None, + num_groups=1, + active_type=paddle.activation.Relu(), + name=None): + """ + A wrapper for conv layer with batch normalization layers. + Note: + conv layer has no activation. + """ + tmp = paddle.layer.img_conv( + #name = name, + input=input, + filter_size=filter_size, + num_channels=channels, + num_filters=num_filters, + stride=stride, + padding=padding, + groups=num_groups, + layer_type='exconv', + # !!! the act in the network with batch norm + # is paddle.activation.Linear() + act=active_type, + # !!! the bias_attr in origin network is False + bias_attr=True) + #print tmp.name + + # !!! we have deleted the batch_norm layer here. + return tmp + + def depthwise_separable(input, num_filters1, num_filters2, num_groups, + stride): + """ + """ + tmp = conv_bn_layer( + input=input, + filter_size=3, + num_filters=num_filters1, + stride=stride, + padding=1, + num_groups=num_groups) + + tmp = conv_bn_layer( + input=tmp, + filter_size=1, + num_filters=num_filters2, + stride=1, + padding=0) + return tmp + + img = paddle.layer.data( + name='image', + type=paddle.data_type.dense_vector(cfg.IMG_CHANNEL * cfg.IMG_HEIGHT * + cfg.IMG_WIDTH), + height=cfg.IMG_HEIGHT, + width=cfg.IMG_WIDTH) + + # conv1: 112x112 + #"conv0" "conv0/relu" + conv0 = conv_bn_layer( + img, + filter_size=3, + channels=3, + num_filters=int(32 * scale), + stride=2, + padding=1) + + # 56x56 + # "conv1/dw" "conv1/dw/relu" "conv1" "conv1/relu" + conv1 = depthwise_separable( + conv0, + num_filters1=int(32 * scale), + num_filters2=int(64 * scale), + num_groups=int(32 * scale), + stride=1) + + #"conv2/dw" "conv2/dw/relu" "conv2" "conv2/relu" + conv2 = depthwise_separable( + conv1, + num_filters1=int(64 * scale), + num_filters2=int(128 * scale), + num_groups=int(64 * scale), + stride=2) + # 28x28 + #"conv3/dw" "conv3/dw/relu" "conv3" "conv3/relu" + conv3 = depthwise_separable( + conv2, + num_filters1=int(128 * scale), + num_filters2=int(128 * scale), + num_groups=int(128 * scale), + stride=1) + + #"conv4/dw" "conv4/dw/relu" "conv4" "conv4/relu" + conv4 = depthwise_separable( + conv3, + num_filters1=int(128 * scale), + num_filters2=int(256 * scale), + num_groups=int(128 * scale), + stride=2) + + # 14x14 + #"conv5/dw" "conv5/dw/relu" "conv5" "conv5/relu" + conv5 = depthwise_separable( + conv4, + num_filters1=int(256 * scale), + num_filters2=int(256 * scale), + num_groups=int(256 * scale), + stride=1) + + #"conv6/dw" "conv6/dw/relu" "conv6" "conv6/relu" + conv6 = depthwise_separable( + conv5, + num_filters1=int(256 * scale), + num_filters2=int(512 * scale), + num_groups=int(256 * scale), + stride=2) + + tmp = conv6 + + # 14x14 + #"conv7/dw" "conv7/dw/relu" "conv7" "conv7/relu" + #conv7~11 + for i in range(5): + tmp = depthwise_separable( + tmp, + num_filters1=int(512 * scale), + num_filters2=int(512 * scale), + num_groups=int(512 * scale), + stride=1) + conv11 = tmp + + # 7x7 + #"conv12/dw" "conv12/dw/relu" "conv12" "conv12/relu" + conv12 = depthwise_separable( + conv11, + num_filters1=int(512 * scale), + num_filters2=int(1024 * scale), + num_groups=int(512 * scale), + stride=2) + + #"conv13/dw" "conv13/dw/relu" "conv13" "conv13/relu" + conv13 = depthwise_separable( + conv12, + num_filters1=int(1024 * scale), + num_filters2=int(1024 * scale), + num_groups=int(1024 * scale), + stride=1) + + # add begin + # conv14_1 "conv14_1/relu" + conv14_1 = conv_bn_layer( + #name = 'module3_1', + input=conv13, + filter_size=1, + num_filters=int(256 * scale), + stride=1, + padding=0) + + #conv14_2 "conv14_2/relu" + conv14_2 = conv_bn_layer( + #name = 'module3_2', + input=conv14_1, + filter_size=3, + num_filters=int(512 * scale), + stride=2, + padding=1) + + #conv15_1 "conv15_1/relu" + conv15_1 = conv_bn_layer( + # name = 'module4_1', + input=conv14_2, + filter_size=1, + num_filters=int(128 * scale), + stride=1, + padding=0) + + #"conv15_2" "conv15_2/relu" + conv15_2 = conv_bn_layer( + #name = 'module4_2', + input=conv15_1, + filter_size=3, + num_filters=int(256 * scale), + stride=2, + padding=1) + + #conv16_1 "conv16_1/relu" + conv16_1 = conv_bn_layer( + #name = 'module5_1', + input=conv15_2, + filter_size=1, + num_filters=int(128 * scale), + stride=1, + padding=0) + + #"conv16_2" "conv16_2/relu" + conv16_2 = conv_bn_layer( + #name = 'module5_2', + input=conv16_1, + filter_size=3, + num_filters=int(256 * scale), + stride=2, + padding=1) + + #conv17_1 conv17_1/relu + conv17_1 = conv_bn_layer( + #name = 'module6_1', + input=conv16_2, + filter_size=1, + num_filters=int(64 * scale), + stride=1, + padding=0) + + #conv17_2 conv17_2/relu + conv17_2 = conv_bn_layer( + #name = 'module6_2', + input=conv17_1, + filter_size=3, + num_filters=int(128 * scale), + stride=2, + padding=1) + + conv11_mbox_priorbox = paddle.layer.priorbox( + input=conv11, + image=img, + min_size=cfg.NET.CONV11.PB.MIN_SIZE, + aspect_ratio=cfg.NET.CONV11.PB.ASPECT_RATIO, + variance=cfg.NET.CONV11.PB.VARIANCE) + + conv11_norm = paddle.layer.cross_channel_norm( + name="conv11_norm", + input=conv11, + param_attr=paddle.attr.ParamAttr( + initial_mean=20, initial_std=0, is_static=False, learning_rate=1)) + + conv11_mbox_loc, conv11_mbox_conf= \ + mbox_block("module1", conv11_norm, int(512*scale), 1, 12, 63) # kernel_size=1 + + conv13_mbox_priorbox = paddle.layer.priorbox( + input=conv13, + image=img, + min_size=cfg.NET.CONV13.PB.MIN_SIZE, + max_size=cfg.NET.CONV13.PB.MAX_SIZE, + aspect_ratio=cfg.NET.CONV13.PB.ASPECT_RATIO, + variance=cfg.NET.CONV13.PB.VARIANCE) + conv13_norm = paddle.layer.cross_channel_norm( + name="conv13_norm", + input=conv13, + param_attr=paddle.attr.ParamAttr( + initial_mean=20, initial_std=0, is_static=False, learning_rate=1)) + conv13_mbox_loc, conv13_mbox_conf= \ + mbox_block("module2", conv13_norm, int(1024*scale), 1, 24, 126) + + conv14_2_mbox_priorbox = paddle.layer.priorbox( + input=conv14_2, + image=img, + min_size=cfg.NET.CONV14_2.PB.MIN_SIZE, + max_size=cfg.NET.CONV14_2.PB.MAX_SIZE, + aspect_ratio=cfg.NET.CONV14_2.PB.ASPECT_RATIO, + variance=cfg.NET.CONV14_2.PB.VARIANCE) + conv14_2_norm = paddle.layer.cross_channel_norm( + name="conv14_2", + input=conv14_2, + param_attr=paddle.attr.ParamAttr( + initial_mean=20, initial_std=0, is_static=False, learning_rate=1)) + conv14_2_mbox_loc, conv14_2_mbox_conf= \ + mbox_block("module3", conv14_2_norm, int(512*scale), 1, 24, 126) + + conv15_2_mbox_priorbox = paddle.layer.priorbox( + input=conv15_2, + image=img, + min_size=cfg.NET.CONV15_2.PB.MIN_SIZE, + max_size=cfg.NET.CONV15_2.PB.MAX_SIZE, + aspect_ratio=cfg.NET.CONV15_2.PB.ASPECT_RATIO, + variance=cfg.NET.CONV15_2.PB.VARIANCE) + conv15_2_norm = paddle.layer.cross_channel_norm( + name="conv15_2_norm", + input=conv15_2, + param_attr=paddle.attr.ParamAttr( + initial_mean=20, initial_std=0, is_static=False, learning_rate=1)) + + conv15_2_mbox_loc, conv15_2_mbox_conf= \ + mbox_block("module4", conv15_2_norm, int(256*scale), 1, 24, 126) + + conv16_2_mbox_priorbox = paddle.layer.priorbox( + input=conv16_2, + image=img, + min_size=cfg.NET.CONV16_2.PB.MIN_SIZE, + max_size=cfg.NET.CONV16_2.PB.MAX_SIZE, + aspect_ratio=cfg.NET.CONV16_2.PB.ASPECT_RATIO, + variance=cfg.NET.CONV16_2.PB.VARIANCE) + conv16_2_norm = paddle.layer.cross_channel_norm( + name="conv16_2_norm", + input=conv16_2, + param_attr=paddle.attr.ParamAttr( + initial_mean=20, initial_std=0, is_static=False, learning_rate=1)) + conv16_2_mbox_loc, conv16_2_mbox_conf= \ + mbox_block("module5", conv16_2_norm, int(256*scale), 1, 24, 126) + + conv17_2_mbox_priorbox = paddle.layer.priorbox( + input=conv17_2, + image=img, + min_size=cfg.NET.CONV17_2.PB.MIN_SIZE, + max_size=cfg.NET.CONV17_2.PB.MAX_SIZE, + aspect_ratio=cfg.NET.CONV17_2.PB.ASPECT_RATIO, + variance=cfg.NET.CONV17_2.PB.VARIANCE) + conv17_2_norm = paddle.layer.cross_channel_norm( + name="conv17_2_norm", + input=conv17_2, + param_attr=paddle.attr.ParamAttr( + initial_mean=20, initial_std=0, is_static=False, learning_rate=1)) + conv17_2_mbox_loc, conv17_2_mbox_conf= \ + mbox_block("module6", conv17_2_norm, int(128*scale), 1, 24, 126) + + mbox_priorbox = paddle.layer.concat( + name="mbox_priorbox", + input=[ + conv11_mbox_priorbox, conv13_mbox_priorbox, conv14_2_mbox_priorbox, + conv15_2_mbox_priorbox, conv16_2_mbox_priorbox, + conv17_2_mbox_priorbox + ]) + + loc_loss_input = [ + conv11_mbox_loc, conv13_mbox_loc, conv14_2_mbox_loc, conv15_2_mbox_loc, + conv16_2_mbox_loc, conv17_2_mbox_loc + ] + + conf_loss_input = [ + conv11_mbox_conf, conv13_mbox_conf, conv14_2_mbox_conf, + conv15_2_mbox_conf, conv16_2_mbox_conf, conv17_2_mbox_conf + ] + + detection_out = paddle.layer.detection_output( + input_loc=loc_loss_input, + input_conf=conf_loss_input, + priorbox=mbox_priorbox, + confidence_threshold=cfg.NET.DETOUT.CONFIDENCE_THRESHOLD, + nms_threshold=cfg.NET.DETOUT.NMS_THRESHOLD, + num_classes=cfg.CLASS_NUM, + nms_top_k=cfg.NET.DETOUT.NMS_TOP_K, + keep_top_k=cfg.NET.DETOUT.KEEP_TOP_K, + background_id=cfg.BACKGROUND_ID, + name="detection_output") + + if mode == 'train' or mode == 'eval': + bbox = paddle.layer.data( + name='bbox', type=paddle.data_type.dense_vector_sequence(6)) + loss = paddle.layer.multibox_loss( + input_loc=loc_loss_input, + input_conf=conf_loss_input, + priorbox=mbox_priorbox, + label=bbox, + num_classes=cfg.CLASS_NUM, + overlap_threshold=cfg.NET.MBLOSS.OVERLAP_THRESHOLD, + neg_pos_ratio=cfg.NET.MBLOSS.NEG_POS_RATIO, + neg_overlap=cfg.NET.MBLOSS.NEG_OVERLAP, + background_id=cfg.BACKGROUND_ID, + name="multibox_loss") + paddle.evaluator.detection_map( + input=detection_out, + label=bbox, + overlap_threshold=cfg.NET.DETMAP.OVERLAP_THRESHOLD, + background_id=cfg.BACKGROUND_ID, + evaluate_difficult=cfg.NET.DETMAP.EVAL_DIFFICULT, + ap_type=cfg.NET.DETMAP.AP_TYPE, + name="detection_evaluator") + return loss, detection_out + elif mode == 'infer': + return detection_out + + +if __name__ == '__main__': + out = net_conf('infer', scale=1.0) diff --git a/models/standard_network/mobilenet.py b/models/standard_network/mobilenet.py index 681aae2..a29678b 100644 --- a/models/standard_network/mobilenet.py +++ b/models/standard_network/mobilenet.py @@ -1,8 +1,14 @@ # edit-mode: -*- python -*- import paddle.v2 as paddle -def conv_bn_layer(input, filter_size, num_filters, - stride, padding, channels=None, num_groups=1, + +def conv_bn_layer(input, + filter_size, + num_filters, + stride, + padding, + channels=None, + num_groups=1, active_type=paddle.activation.Relu(), layer_type=None): """ @@ -11,114 +17,133 @@ def conv_bn_layer(input, filter_size, num_filters, conv layer has no activation. """ tmp = paddle.layer.img_conv( - input=input, - filter_size=filter_size, - num_channels=channels, - num_filters=num_filters, - stride=stride, - padding=padding, - groups=num_groups, - act=paddle.activation.Linear(), - bias_attr=False, - layer_type=layer_type) - return paddle.layer.batch_norm( - input=tmp, - act=active_type) + input=input, + filter_size=filter_size, + num_channels=channels, + num_filters=num_filters, + stride=stride, + padding=padding, + groups=num_groups, + act=paddle.activation.Linear(), + bias_attr=False, + layer_type=layer_type) + return paddle.layer.batch_norm(input=tmp, act=active_type) + -def depthwise_separable(input, num_filters1, num_filters2, num_groups, stride, scale): +def depthwise_separable(input, num_filters1, num_filters2, num_groups, stride, + scale): """ """ tmp = conv_bn_layer( - input=input, - filter_size=3, - num_filters=int(num_filters1*scale), - stride=stride, - padding=1, - num_groups=int(num_groups*scale), layer_type='exconv') + input=input, + filter_size=3, + num_filters=int(num_filters1 * scale), + stride=stride, + padding=1, + num_groups=int(num_groups * scale), + layer_type='exconv') tmp = conv_bn_layer( - input=tmp, - filter_size=1, - num_filters=int(num_filters2*scale), - stride=1, - padding=0) + input=tmp, + filter_size=1, + num_filters=int(num_filters2 * scale), + stride=1, + padding=0) return tmp -def mobile_net(img_size, class_num, scale = 1.0): + +def mobile_net(img_size, class_num, scale=1.0): img = paddle.layer.data( name="image", type=paddle.data_type.dense_vector(img_size)) # conv1: 112x112 - tmp = conv_bn_layer(img, - filter_size=3, - channels=3, - num_filters=int(32*scale), - stride=2, - padding=1) + tmp = conv_bn_layer( + img, + filter_size=3, + channels=3, + num_filters=int(32 * scale), + stride=2, + padding=1) # 56x56 - tmp = depthwise_separable(tmp, - num_filters1=32, - num_filters2=64, - num_groups=32, - stride=1, scale = scale) - tmp = depthwise_separable(tmp, - num_filters1=64, - num_filters2=128, - num_groups=64, - stride=2, scale = scale) + tmp = depthwise_separable( + tmp, + num_filters1=32, + num_filters2=64, + num_groups=32, + stride=1, + scale=scale) + tmp = depthwise_separable( + tmp, + num_filters1=64, + num_filters2=128, + num_groups=64, + stride=2, + scale=scale) # 28x28 - tmp = depthwise_separable(tmp, - num_filters1=128, - num_filters2=128, - num_groups=128, - stride=1, scale = scale) - tmp = depthwise_separable(tmp, - num_filters1=128, - num_filters2=256, - num_groups=128, - stride=2, scale = scale) + tmp = depthwise_separable( + tmp, + num_filters1=128, + num_filters2=128, + num_groups=128, + stride=1, + scale=scale) + tmp = depthwise_separable( + tmp, + num_filters1=128, + num_filters2=256, + num_groups=128, + stride=2, + scale=scale) # 14x14 - tmp = depthwise_separable(tmp, - num_filters1=256, - num_filters2=256, - num_groups=256, - stride=1, scale = scale) - tmp = depthwise_separable(tmp, - num_filters1=256, - num_filters2=512, - num_groups=256, - stride=2, scale = scale) + tmp = depthwise_separable( + tmp, + num_filters1=256, + num_filters2=256, + num_groups=256, + stride=1, + scale=scale) + tmp = depthwise_separable( + tmp, + num_filters1=256, + num_filters2=512, + num_groups=256, + stride=2, + scale=scale) # 14x14 for i in range(5): - tmp = depthwise_separable(tmp, - num_filters1=512, - num_filters2=512, - num_groups=512, - stride=1, scale = scale) + tmp = depthwise_separable( + tmp, + num_filters1=512, + num_filters2=512, + num_groups=512, + stride=1, + scale=scale) # 7x7 - tmp = depthwise_separable(tmp, - num_filters1=512, - num_filters2=1024, - num_groups=512, - stride=2, scale = scale) - tmp = depthwise_separable(tmp, - num_filters1=1024, - num_filters2=1024, - num_groups=1024, - stride=1, scale = scale) + tmp = depthwise_separable( + tmp, + num_filters1=512, + num_filters2=1024, + num_groups=512, + stride=2, + scale=scale) + tmp = depthwise_separable( + tmp, + num_filters1=1024, + num_filters2=1024, + num_groups=1024, + stride=1, + scale=scale) tmp = paddle.layer.img_pool( - input=tmp, - pool_size=7, - stride=1, - pool_type=paddle.pooling.Avg()) + input=tmp, pool_size=7, stride=1, pool_type=paddle.pooling.Avg()) out = paddle.layer.fc( input=tmp, size=class_num, act=paddle.activation.Softmax()) return out + if __name__ == '__main__': img_size = 3 * 224 * 224 data_dim = 1000 diff --git a/models/standard_network/resnet.py b/models/standard_network/resnet.py index c0b7d32..cad28f5 100644 --- a/models/standard_network/resnet.py +++ b/models/standard_network/resnet.py @@ -1,5 +1,6 @@ import paddle.v2 as paddle + def conv_bn_layer(input, ch_out, filter_size, @@ -19,12 +20,15 @@ def conv_bn_layer(input, bias_attr=False) return paddle.layer.batch_norm(input=tmp, act=active_type) + def shortcut(input, ch_in, ch_out, stride): if ch_in != ch_out: - return conv_bn_layer(input, ch_out, 1, stride, 0, paddle.activation.Linear()) + return conv_bn_layer(input, ch_out, 1, stride, 0, + paddle.activation.Linear()) else: return input + def basicblock(input, ch_in, ch_out, stride): short = shortcut(input, ch_in, ch_out, stride) conv1 = conv_bn_layer(input, ch_out, 3, stride, 1) @@ -32,12 +36,14 @@ def basicblock(input, ch_in, ch_out, stride): return paddle.layer.addto( input=[short, conv2], act=paddle.activation.Relu()) + def layer_warp(block_func, input, ch_in, ch_out, count, stride): conv = block_func(input, ch_in, ch_out, stride) for i in range(1, count): conv = block_func(conv, ch_out, ch_out, 1) return conv + def resnet18(data_dim, class_dim, depth=18): input = paddle.layer.data( name="image", type=paddle.data_type.dense_vector(data_dim)) @@ -58,6 +64,7 @@ def resnet18(data_dim, class_dim, depth=18): input=pool2, size=class_dim, act=paddle.activation.Softmax()) return out + if __name__ == '__main__': data_dim = 3 * 224 * 224 class_dim = 1000