mscnn.py

# -*- coding:utf-8 -*-
"""
@Function: Structure of MSCNN crowd counting
@Source: Multi-scale Convolution Neural Networks for Crowd Counting
         https://arxiv.org/abs/1702.02359
@Data set: https://pan.baidu.com/s/12EqB1XDyFBB0kyinMA7Pqw 密码: sags  --> Have some problems

@Author: Ling Bao
@Code verification: Ling Bao
@说明：
    学习率：1e-4
    平均loss : 14.

@Data: Sep. 11, 2017
@Version: 0.1
"""

# 系统模块
import re

# 机器学习库
import tensorflow as tf

# 项目模块
import mscnn_train

# 模型参数设置
MP_NAME = 'mp'
train_log = 'train_log'
model = 'model'
output = 'output'
data_train_gt = 'Data_original/Data_gt/train_gt/'
data_train_im = 'Data_original/Data_im/train_im/'
data_train_index = 'Data_original/dir_name.txt'

FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_integer('batch_size', 1, """批次处理图片数目""")
tf.app.flags.DEFINE_string('train_log', train_log, """训练日志""")
tf.app.flags.DEFINE_string('model_dir', model, """模型保存""")
tf.app.flags.DEFINE_string('output_dir', output, """输出中间结果""")
tf.app.flags.DEFINE_boolean('log_device_placement', False, """是否记录设备布局""")
tf.app.flags.DEFINE_string('data_train_gt', data_train_gt, """训练集标签""")
tf.app.flags.DEFINE_string('data_train_im', data_train_im, """训练集图片""")
tf.app.flags.DEFINE_string('data_train_index', data_train_index, """训练集图片""")


def _activation_summary(x):
    """
    概要汇总函数
    :param x: 待保存变量
    :return: None
    """
    tensor_name = re.sub('%s_[0-9]*/' % MP_NAME, '', x.op.name)
    tf.summary.histogram(tensor_name + '/activations', x)
    tf.summary.scalar(tensor_name + '/sparsity', tf.nn.zero_fraction(x))


def _variable_on_cpu(name, shape, initializer):
    """
    创建变量
    :param name: name_scope
    :param shape: tensor维度
    :param initializer: 初始化值
    :return: tensor变量
    """
    with tf.device('/cpu:0'):
        var = tf.get_variable(name, shape, initializer=initializer)

    return var


def _variable_with_weight_decay(name, shape, stddev, wd):
    """
    创建有权重衰减项的变量
    :param name: name_scope
    :param shape: tensor维度
    :param stddev: 用于初始化的标准差
    :param wd: 权重
    :return: tensor变量
    """
    # wd 为衰减因子,若为None则无衰减项
    var = _variable_on_cpu(name, shape, tf.random_normal_initializer(stddev=stddev))
    if wd:
        weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
        tf.add_to_collection('losses', weight_decay)

    return var


class BatchNorm(object):
    """
    BN操作类
    """
    def __init__(self, epsilon=1e-5, momentum=0.9, name="batch_norm"):
        """
        初始化函数
        :param epsilon: 精度
        :param momentum: 动量因子
        :param name: name_scope
        """
        with tf.variable_scope(name):
            self.epsilon = epsilon
            self.momentum = momentum
            self.name = name

    def __call__(self, x):
        """
        BN算子
        :param x: 输入变量
        :return:
        """
        return tf.contrib.layers.batch_norm(x, decay=self.momentum, updates_collections=None,
                                            epsilon=self.epsilon, scale=True, scope=self.name)


def multi_scale_block(in_con, in_dim, out_dim, is_bn=False):
    """
    多尺度块MSB
    :param in_con: 输入tensor变量 [batch_size, filter_w, filter_h, in_dim]
    :param in_dim: 输入通道数
    :param out_dim: 输出通道数
    :param is_bn: 是否增加Batch Normal
    :return: 输出tensor变量 [4 * batch_size, filter_w, filter_h, in_dim]
    """
    with tf.variable_scope('con_9') as scope:
        kernel = _variable_with_weight_decay('weights', shape=[9, 9, in_dim, out_dim],
                                             stddev=0.01, wd=0.0005)
        con_9 = tf.nn.conv2d(in_con, kernel, [1, 1, 1, 1], padding='SAME', name=scope.name)
        _activation_summary(con_9)

    with tf.variable_scope('con_7') as scope:
        kernel = _variable_with_weight_decay('weights', shape=[7, 7, in_dim, out_dim],
                                             stddev=0.01, wd=0.0005)
        con_7 = tf.nn.conv2d(in_con, kernel, [1, 1, 1, 1], padding='SAME', name=scope.name)
        _activation_summary(con_7)

    with tf.variable_scope('con_5') as scope:
        kernel = _variable_with_weight_decay('weights', shape=[5, 5, in_dim, out_dim],
                                             stddev=0.01, wd=0.0005)
        con_5 = tf.nn.conv2d(in_con, kernel, [1, 1, 1, 1], padding='SAME', name=scope.name)
        _activation_summary(con_5)

    with tf.variable_scope('con_3') as scope:
        kernel = _variable_with_weight_decay('weights', shape=[3, 3, in_dim, out_dim],
                                             stddev=0.01, wd=0.0005)
        con_3 = tf.nn.conv2d(in_con, kernel, [1, 1, 1, 1], padding='SAME', name=scope.name)
        _activation_summary(con_3)

    with tf.variable_scope('concat') as scope:
        concat = tf.concat([con_9, con_7, con_5, con_3], 3, name=scope.name)
        biases = _variable_on_cpu('biases', [out_dim * 4], tf.constant_initializer(0))
        bias = tf.nn.bias_add(concat, biases)

        if is_bn:
            bn = BatchNorm()
            bias = bn(bias)

        msb = tf.nn.relu(bias)
        _activation_summary(msb)

    return msb


def inference(images):
    """
    构建MSCNN模型
    :param images: 原始图像
    :return: 人群密度估计图像
    """
    # -------------------------------------------------------------------------------------------- #
    # 创建模型
    # con1_1
    with tf.variable_scope('con1') as scope:
        kernel = _variable_with_weight_decay('weights', shape=[9, 9, 3, 64],
                                             stddev=0.01, wd=0.0005)
        con = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME', name=scope.name)
        biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0))
        bias = tf.nn.bias_add(con, biases)
        con1 = tf.nn.relu(bias)
        _activation_summary(con1)

    # msb_con2
    with tf.variable_scope('msb_con2'):
        msb_con2 = multi_scale_block(con1, 64, 16)

    # pool_msb_con2
    with tf.variable_scope('pool_msb_con2') as scope:
        pool_msb_con2 = tf.nn.max_pool(msb_con2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME',
                                       name=scope.name)

    # msb_con3
    with tf.variable_scope('msb_con3'):
        msb_con3 = multi_scale_block(pool_msb_con2, 64, 32)

    # msb_con4
    with tf.variable_scope('msb_con4'):
        msb_con4 = multi_scale_block(msb_con3, 128, 32)

    # pool_msb_con4
    with tf.variable_scope('pool_msb_con4') as scope:
        pool_msb_con4 = tf.nn.max_pool(msb_con4, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME',
                                       name=scope.name)

    # msb_con5
    with tf.variable_scope('msb_con5'):
        msb_con5 = multi_scale_block(pool_msb_con4, 128, 64)

    # msb_con6
    with tf.variable_scope('msb_con6'):
        msb_con6 = multi_scale_block(msb_con5, 256, 64)

    # mpl_con7
    with tf.variable_scope('mpl_con7') as scope:
        kernel = _variable_with_weight_decay('weights', shape=[1, 1, 256, 1000], stddev=0.001, wd=0.0005)
        con = tf.nn.conv2d(msb_con6, kernel, [1, 1, 1, 1], padding='SAME', name=scope.name)
        biases = _variable_on_cpu('biases', [1000], tf.constant_initializer(0))
        bias = tf.nn.bias_add(con, biases)
        mpl_con7 = tf.nn.relu(bias)
        _activation_summary(mpl_con7)

    # con_out
    with tf.variable_scope('con_out') as scope:
        kernel = _variable_with_weight_decay('weights', shape=[1, 1, 1000, 1], stddev=0.001, wd=0.0005)
        con = tf.nn.conv2d(mpl_con7, kernel, [1, 1, 1, 1], padding='SAME', name=scope.name)
        biases = _variable_on_cpu('biases', [1], tf.constant_initializer(0))
        bias = tf.nn.bias_add(con, biases)

        con_out = tf.nn.relu(bias)
        _activation_summary(con_out)

    # 删除第四维度channel, channel=1
    image_out = con_out

    tf.summary.image("con_img", image_out)

    return image_out


def inference_bn(images):
    """
    在MSCNN模型的cnn层后增加Batch Normal; 对输出的激活函数进行了改进f(x)=relu(sigmoid(x))
    $$sigmod(x)=\frac{1}{1+e^{-x}}$$
    $$relu(x)=
    \begin{equation}
    \begin{cases}
     x, & x \geq 0 \\
    0, & x < 0
    \end{cases}
    \end{equation}$$
    $$f(x)=relu(sigmod(x))$$

    :param images: 原始图像
    :return: 人群密度估计图像
    """
    # -------------------------------------------------------------------------------------------- #
    # 创建模型
    # con1_1
    with tf.variable_scope('con1') as scope:
        kernel = _variable_with_weight_decay('weights', shape=[9, 9, 3, 64],
                                             stddev=0.01, wd=0.0005)
        con = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME', name=scope.name)
        biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0))
        bias = tf.nn.bias_add(con, biases)
        con1 = tf.nn.relu(bias)
        _activation_summary(con1)

    # msb_con2
    with tf.variable_scope('msb_con2'):
        msb_con2 = multi_scale_block(con1, 64, 16, is_bn=True)

    # pool_msb_con2
    with tf.variable_scope('pool_msb_con2') as scope:
        pool_msb_con2 = tf.nn.max_pool(msb_con2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME',
                                       name=scope.name)

    # msb_con3
    with tf.variable_scope('msb_con3'):
        msb_con3 = multi_scale_block(pool_msb_con2, 64, 32, is_bn=True)

    # msb_con4
    with tf.variable_scope('msb_con4'):
        msb_con4 = multi_scale_block(msb_con3, 128, 32, is_bn=True)

    # pool_msb_con4
    with tf.variable_scope('pool_msb_con4') as scope:
        pool_msb_con4 = tf.nn.max_pool(msb_con4, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME',
                                       name=scope.name)

    # msb_con5
    with tf.variable_scope('msb_con5'):
        msb_con5 = multi_scale_block(pool_msb_con4, 128, 64, is_bn=True)

    # msb_con6
    with tf.variable_scope('msb_con6'):
        msb_con6 = multi_scale_block(msb_con5, 256, 64, is_bn=True)

    # mpl_con7
    with tf.variable_scope('mpl_con7') as scope:
        kernel = _variable_with_weight_decay('weights', shape=[1, 1, 256, 1000], stddev=0.001, wd=0.0005)
        con = tf.nn.conv2d(msb_con6, kernel, [1, 1, 1, 1], padding='SAME', name=scope.name)
        biases = _variable_on_cpu('biases', [1000], tf.constant_initializer(0))
        bias = tf.nn.bias_add(con, biases)
        mpl_con7 = tf.nn.relu(bias)
        _activation_summary(mpl_con7)

    # con_out
    with tf.variable_scope('con_out') as scope:
        kernel = _variable_with_weight_decay('weights', shape=[1, 1, 1000, 1], stddev=0.001, wd=0.0005)
        con = tf.nn.conv2d(mpl_con7, kernel, [1, 1, 1, 1], padding='SAME', name=scope.name)
        biases = _variable_on_cpu('biases', [1], tf.constant_initializer(0))
        bias = tf.nn.bias_add(con, biases)

        bn = BatchNorm()
        bias = bn(bias)

        con_out = tf.nn.relu(tf.nn.sigmoid(bias))
        _activation_summary(con_out)

    # 删除第四维度channel, channel=1
    image_out = con_out

    tf.summary.image("con_img", image_out)

    return image_out


def loss(predict, label):
    """
    计算损失
    :param predict: mscnn估计密度图
    :param label: ground truth crowd counting map
    :return: L2 loss
    """
    # L2 Loss
    predict = tf.squeeze(predict, 3)
    l2_loss = tf.reduce_sum((predict - label) * (predict - label))

    # 增加概要
    tf.summary.histogram('loss', l2_loss)

    return l2_loss


def add_avg_loss(avg_loss):
    """
    计算平均损失
    :param avg_loss:
    :return:
    """
    add_avg_loss_op = avg_loss * 1
    tf.summary.histogram('avg_loss', avg_loss)

    return add_avg_loss_op


def _add_loss_summaries(total_loss):
    """
    增加损失概要信息
    :param total_loss:
    :return:
    """
    loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
    losses = tf.get_collection('losses')
    loss_averages_op = loss_averages.apply(losses + [total_loss])

    for l in losses + [total_loss]:
        tf.summary.scalar(l.op.name + ' (raw)', l)
        tf.summary.scalar(l.op.name, loss_averages.average(l))

    return loss_averages_op


def train(total_loss, global_step, nums_per_train):
    """
    根据损失构建RMSProp优化算子
    :param total_loss: 损失
    :param global_step:
    :param nums_per_train:
    :return: RMSProp优化算子
    """
    num_batches_per_epoch = nums_per_train / FLAGS.batch_size
    decay_steps = int(num_batches_per_epoch * mscnn_train.num_epochs_per_decay)

    lr = tf.train.exponential_decay(mscnn_train.initial_learning_rate,
                                    global_step,
                                    decay_steps,
                                    mscnn_train.learning_rate_per_decay,
                                    staircase=True)
    tf.summary.scalar('learning_rate', lr)

    # 优化算法
    opt = tf.train.RMSPropOptimizer(lr)
    grads = opt.compute_gradients(total_loss)

    # 应用梯度
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
    train_op = apply_gradient_op

    # 添加概要
    for var in tf.trainable_variables():
        tf.summary.histogram(var.op.name, var)

    return train_op