Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Suggestion for Neural Architecture Search with Reinforcement Learning #339

Merged
merged 24 commits into from
Feb 21, 2019
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cmd/suggestion/nasrl/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
FROM python:3
FROM python:3.6

ADD . /usr/src/app/github.com/kubeflow/katib
WORKDIR /usr/src/app/github.com/kubeflow/katib/cmd/suggestion/nasrl
RUN pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.12.0-cp36-cp36m-linux_x86_64.whl
RUN pip install --no-cache-dir -r requirements.txt
ENV PYTHONPATH /usr/src/app/github.com/kubeflow/katib:/usr/src/app/github.com/kubeflow/katib/pkg/api/python

Expand Down
6 changes: 0 additions & 6 deletions cmd/suggestion/nasrl/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
grpcio
duecredit
cloudpickle==0.5.6
numpy>=1.13.3
scikit-learn>=0.19.0
scipy>=0.19.1
forestci
protobuf
googleapis-common-protos
220 changes: 220 additions & 0 deletions pkg/suggestion/NAS_Reinforcement_Learning/Controller.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
import tensorflow as tf
from pkg.suggestion.NAS_Reinforcement_Learning.LSTM import stack_lstm
from pkg.suggestion.NAS_Reinforcement_Learning.Trainer import get_train_ops


class Controller(object):
def __init__(self,
num_layers=12,
num_operations=16,
lstm_size=64,
lstm_num_layers=1,
lstm_keep_prob=1.0,
tanh_constant=1.5,
temperature=None,
lr_init=1e-3,
lr_dec_start=0,
lr_dec_every=1000,
lr_dec_rate=0.9,
l2_reg=0,
entropy_weight=1e-4,
clip_mode=None,
grad_bound=None,
bl_dec=0.999,
optim_algo="adam",
sync_replicas=False,
num_aggregate=20,
num_replicas=1,
skip_target=0.4,
skip_weight=0.8,
name="controller"):

print("-" * 80)
print("Building Controller")

self.num_layers = num_layers
self.num_operations = num_operations

self.lstm_size = lstm_size
self.lstm_num_layers = lstm_num_layers
self.lstm_keep_prob = lstm_keep_prob
self.tanh_constant = tanh_constant
self.temperature = temperature
self.lr_init = lr_init
self.lr_dec_start = lr_dec_start
self.lr_dec_every = lr_dec_every
self.lr_dec_rate = lr_dec_rate
self.l2_reg = l2_reg
self.entropy_weight = entropy_weight
self.clip_mode = clip_mode
self.grad_bound = grad_bound
self.bl_dec = bl_dec

self.skip_target = skip_target
self.skip_weight = skip_weight

self.optim_algo = optim_algo
self.sync_replicas = sync_replicas
self.num_aggregate = num_aggregate
self.num_replicas = num_replicas
self.name = name

self._create_params()
self._build_sampler()

def _create_params(self):
initializer = tf.random_uniform_initializer(minval=-0.1, maxval=0.1)
with tf.variable_scope(self.name, initializer=initializer):
with tf.variable_scope("lstm"):
self.w_lstm = []
for layer_id in range(self.lstm_num_layers):
with tf.variable_scope("layer_{}".format(layer_id)):
w = tf.get_variable("w", [2 * self.lstm_size, 4 * self.lstm_size])
self.w_lstm.append(w)

self.g_emb = tf.get_variable("g_emb", [1, self.lstm_size])
with tf.variable_scope("emb"):
self.w_emb = tf.get_variable("w", [self.num_operations, self.lstm_size])
with tf.variable_scope("softmax"):
self.w_soft = tf.get_variable("w", [self.lstm_size, self.num_operations])

with tf.variable_scope("attention"):
self.w_attn_1 = tf.get_variable("w_1", [self.lstm_size, self.lstm_size])
self.w_attn_2 = tf.get_variable("w_2", [self.lstm_size, self.lstm_size])
self.v_attn = tf.get_variable("v", [self.lstm_size, 1])

def _build_sampler(self):
"""Build the sampler ops and the log_prob ops."""

print("-" * 80)
print("Building Controller Sampler")
anchors = []
anchors_w_1 = []

arc_seq = []
entropys = []
log_probs = []
skip_count = []
skip_penaltys = []

prev_c = [tf.zeros([1, self.lstm_size], tf.float32) for _ in range(self.lstm_num_layers)]
prev_h = [tf.zeros([1, self.lstm_size], tf.float32) for _ in range(self.lstm_num_layers)]
inputs = self.g_emb
skip_targets = tf.constant([1.0 - self.skip_target, self.skip_target], dtype=tf.float32)
for layer_id in range(self.num_layers):
next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
prev_c, prev_h = next_c, next_h
logit = tf.matmul(next_h[-1], self.w_soft)
if self.temperature is not None:
logit /= self.temperature
if self.tanh_constant is not None:
logit = self.tanh_constant * tf.tanh(logit)

operation_id = tf.multinomial(logit, 1)
operation_id = tf.to_int32(operation_id)
operation_id = tf.reshape(operation_id, [1])

arc_seq.append(operation_id)
log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logit, labels=operation_id)
log_probs.append(log_prob)
entropy = tf.stop_gradient(log_prob * tf.exp(-log_prob))
entropys.append(entropy)
inputs = tf.nn.embedding_lookup(self.w_emb, operation_id)

next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
prev_c, prev_h = next_c, next_h

if layer_id > 0:
query = tf.concat(anchors_w_1, axis=0)
query = tf.tanh(query + tf.matmul(next_h[-1], self.w_attn_2))
query = tf.matmul(query, self.v_attn)
logit = tf.concat([-query, query], axis=1)
if self.temperature is not None:
logit /= self.temperature
if self.tanh_constant is not None:
logit = self.tanh_constant * tf.tanh(logit)

skip = tf.multinomial(logit, 1)
skip = tf.to_int32(skip)
skip = tf.reshape(skip, [layer_id])
arc_seq.append(skip)

skip_prob = tf.sigmoid(logit)
kl = skip_prob * tf.log(skip_prob / skip_targets)
kl = tf.reduce_sum(kl)
skip_penaltys.append(kl)

log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logit, labels=skip)
log_probs.append(tf.reduce_sum(log_prob, keepdims=True))

entropy = tf.stop_gradient(
tf.reduce_sum(log_prob * tf.exp(-log_prob), keepdims=True))
entropys.append(entropy)

skip = tf.to_float(skip)
skip = tf.reshape(skip, [1, layer_id])
skip_count.append(tf.reduce_sum(skip))
inputs = tf.matmul(skip, tf.concat(anchors, axis=0))
inputs /= (1.0 + tf.reduce_sum(skip))
else:
inputs = self.g_emb

anchors.append(next_h[-1])
anchors_w_1.append(tf.matmul(next_h[-1], self.w_attn_1))

arc_seq = tf.concat(arc_seq, axis=0)
self.sample_arc = tf.reshape(arc_seq, [-1])

entropys = tf.stack(entropys)
self.sample_entropy = tf.reduce_sum(entropys)

log_probs = tf.stack(log_probs)
self.sample_log_prob = tf.reduce_sum(log_probs)

skip_count = tf.stack(skip_count)
self.skip_count = tf.reduce_sum(skip_count)

skip_penaltys = tf.stack(skip_penaltys)
self.skip_penaltys = tf.reduce_mean(skip_penaltys)

def build_trainer(self):
self.reward = tf.placeholder(tf.float32, shape=())

normalize = tf.to_float(self.num_layers * (self.num_layers - 1) / 2)
self.skip_rate = tf.to_float(self.skip_count) / normalize

if self.entropy_weight is not None:
self.reward += self.entropy_weight * self.sample_entropy

self.sample_log_prob = tf.reduce_sum(self.sample_log_prob)
self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False)
baseline_update = tf.assign_sub(self.baseline, (1 - self.bl_dec) * (self.baseline - self.reward))

with tf.control_dependencies([baseline_update]):
self.reward = tf.identity(self.reward)

self.loss = self.sample_log_prob * (self.reward - self.baseline)
if self.skip_weight is not None:
self.loss += self.skip_weight * self.skip_penaltys

self.train_step = tf.Variable(0, dtype=tf.int32, trainable=False, name=self.name + "_train_step")
tf_variables = [var for var in tf.trainable_variables() if var.name.startswith(self.name)]
print("-" * 80)

self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops(
self.loss,
tf_variables,
self.train_step,
clip_mode=self.clip_mode,
grad_bound=self.grad_bound,
l2_reg=self.l2_reg,
lr_init=self.lr_init,
lr_dec_start=self.lr_dec_start,
lr_dec_every=self.lr_dec_every,
lr_dec_rate=self.lr_dec_rate,
optim_algo=self.optim_algo,
sync_replicas=self.sync_replicas,
num_aggregate=self.num_aggregate,
num_replicas=self.num_replicas)
28 changes: 28 additions & 0 deletions pkg/suggestion/NAS_Reinforcement_Learning/LSTM.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf

jinan-zhou marked this conversation as resolved.
Show resolved Hide resolved

# TODO: will remove this function and use tf.nn.LSTMCell instead

def lstm(x, prev_c, prev_h, w):
ifog = tf.matmul(tf.concat([x, prev_h], axis=1), w)
i, f, o, g = tf.split(ifog, 4, axis=1)
i = tf.sigmoid(i)
f = tf.sigmoid(f)
o = tf.sigmoid(o)
g = tf.tanh(g)
next_c = i * g + f * prev_c
next_h = o * tf.tanh(next_c)
return next_c, next_h


def stack_lstm(x, prev_c, prev_h, w):
next_c, next_h = [], []
for layer_id, (_c, _h, _w) in enumerate(zip(prev_c, prev_h, w)):
inputs = x if layer_id == 0 else next_h[-1]
curr_c, curr_h = lstm(inputs, _c, _h, _w)
next_c.append(curr_c)
next_h.append(curr_h)
return next_c, next_h
79 changes: 79 additions & 0 deletions pkg/suggestion/NAS_Reinforcement_Learning/Operation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import itertools
import numpy as np
from pkg.api.python import api_pb2


class Operation(object):
def __init__(self, opt_id, opt_type, opt_params):
self.opt_id = opt_id
self.opt_type = opt_type
self.opt_params = opt_params

def get_dict(self):
opt_dict = dict()
opt_dict['opt_id'] = self.opt_id
opt_dict['opt_type'] = self.opt_type
opt_dict['opt_params'] = self.opt_params
return opt_dict

def print_op(self, logger):
logger.info("Operation ID: \n\t{}".format(self.opt_id))
logger.info("Operation Type: \n\t{}".format(self.opt_type))
logger.info("Operations Parameters:")
for ikey in self.opt_params:
logger.info("\t{}: {}".format(ikey, self.opt_params[ikey]))
logger.info("")


class SearchSpace(object):
def __init__(self, operations):
self.operation_list = list(operations.operation)
self.search_space = list()
self._parse_operations()
print()
self.num_operations = len(self.search_space)

def _parse_operations(self):
# search_sapce is a list of Operation class

operation_id = 0

for operation_dict in self.operation_list:
opt_type = operation_dict.operationType
opt_spec = list(operation_dict.parameter_configs.configs)
# avail_space is dict with the format {"spec_nam": [spec feasible values]}
avail_space = dict()
num_spec = len(opt_spec)

for ispec in opt_spec:
spec_name = ispec.name
if ispec.parameter_type == api_pb2.CATEGORICAL:
avail_space[spec_name] = list(ispec.feasible.list)
elif ispec.parameter_type == api_pb2.INT:
spec_min = int(ispec.feasible.min)
spec_max = int(ispec.feasible.max)
spec_step = int(ispec.feasible.step)
avail_space[spec_name] = range(spec_min, spec_max+1, spec_step)
elif ispec.parameter_type == api_pb2.DOUBLE:
spec_min = float(ispec.feasible.min)
spec_max = float(ispec.feasible.max)
spec_step = float(ispec.feasible.step)
if spec_step == 0:
print("Error, NAS Reinforcement Learning algorithm cannot accept continuous search space!")
exit(999)
jinan-zhou marked this conversation as resolved.
Show resolved Hide resolved
double_list = np.arange(spec_min, spec_max+spec_step, spec_step)
if double_list[-1] > spec_max:
del double_list[-1]
avail_space[spec_name] = double_list

# generate all the combinations of possible operations
key_avail_space = list(avail_space.keys())
val_avail_space = list(avail_space.values())
jinan-zhou marked this conversation as resolved.
Show resolved Hide resolved

for this_opt_vector in itertools.product(*val_avail_space):
opt_params = dict()
for i in range(num_spec):
opt_params[key_avail_space[i]] = this_opt_vector[i]
this_opt_class = Operation(operation_id, opt_type, opt_params)
self.search_space.append(this_opt_class)
operation_id += 1
Loading