diff --git a/R-package/R/mlp.R b/R-package/R/mlp.R deleted file mode 100644 index 3cfa06e967ce..000000000000 --- a/R-package/R/mlp.R +++ /dev/null @@ -1,82 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -#' Convenience interface for multiple layer perceptron -#' -#' @param data the input matrix. Only mx.io.DataIter and R array/matrix types supported. -#' @param label the training label. Only R array type supported. -#' @param hidden_node a vector containing number of hidden nodes on each hidden layer as well as the output layer. -#' @param out_node the number of nodes on the output layer. -#' @param dropout a number in [0,1) containing the dropout ratio from the last hidden layer to the output layer. -#' @param activation either a single string or a vector containing the names of the activation functions. -#' @param out_activation a single string containing the name of the output activation function. -#' @param ctx whether train on cpu (default) or gpu. -#' @param eval.metric the evaluation metric/ -#' @param ... other parameters passing to \code{mx.model.FeedForward.create}/ -#' -#' @examples -#' -#' require(mlbench) -#' data(Sonar, package="mlbench") -#' Sonar[,61] = as.numeric(Sonar[,61])-1 -#' train.ind = c(1:50, 100:150) -#' train.x = data.matrix(Sonar[train.ind, 1:60]) -#' train.y = Sonar[train.ind, 61] -#' test.x = data.matrix(Sonar[-train.ind, 1:60]) -#' test.y = Sonar[-train.ind, 61] -#' model = mx.mlp(train.x, train.y, hidden_node = 10, out_node = 2, out_activation = "softmax", -#' learning.rate = 0.1) -#' preds = predict(model, test.x) -#' -#' @export -mx.mlp <- function(data, label, hidden_node = 1, out_node, dropout = NULL, - activation = "tanh", out_activation = "softmax", - ctx = mx.ctx.default(), ...) { - - m <- length(hidden_node) - if (!is.null(dropout)) { - if (length(dropout) != 1) { - stop("only accept dropout ratio of length 1.") - } - dropout = max(0,min(dropout, 1-1e-7)) - } - - # symbol construction - act <- mx.symbol.Variable("data") - if (length(activation) == 1) { - activation <- rep(activation, m) - } else { - if (length(activation) != m) { - stop(paste("Length of activation should be",m)) - } - } - for (i in seq_len(m)) { - fc <- mx.symbol.FullyConnected(act, num_hidden=hidden_node[i]) - act <- mx.symbol.Activation(fc, act_type=activation[i]) - if (i == m && !is.null(dropout)) { - act <- mx.symbol.Dropout(act, p = dropout) - } - } - fc <- mx.symbol.FullyConnected(act, num_hidden=out_node) - out <- switch(out_activation, - "rmse" = mx.symbol.LinearRegressionOutput(fc), - "softmax" = mx.symbol.SoftmaxOutput(fc), - "logistic" = mx.symbol.LogisticRegressionOutput(fc), - stop("Not supported yet.")) - model <- mx.model.FeedForward.create(out, X=data, y=label, ctx = ctx, ...) - return(model) -} diff --git a/R-package/R/rnn.graph.R b/R-package/R/rnn.graph.R deleted file mode 100644 index 1225fa511b51..000000000000 --- a/R-package/R/rnn.graph.R +++ /dev/null @@ -1,372 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -#' Generate a RNN symbolic model - requires CUDA -#' -#' @param config Either seq-to-one or one-to-one -#' @param cell_type Type of RNN cell: either gru or lstm -#' @param num_rnn_layer int, number of stacked layers -#' @param num_hidden int, size of the state in each RNN layer -#' @param num_embed int, default = NULL - no embedding. Dimension of the embedding vectors -#' @param num_decode int, number of output variables in the decoding layer -#' @param input_size int, number of levels in the data - only used for embedding -#' @param dropout -#' -#' @export -rnn.graph <- function (num_rnn_layer, input_size = NULL, num_embed = NULL, - num_hidden, num_decode, dropout = 0, ignore_label = -1, bidirectional = F, - loss_output = NULL, config, cell_type, masking = F, output_last_state = F, - rnn.state = NULL, rnn.state.cell = NULL, prefix = "") { - - data <- mx.symbol.Variable("data") - label <- mx.symbol.Variable("label") - seq.mask <- mx.symbol.Variable("seq.mask") - if (!is.null(num_embed)) - embed.weight <- mx.symbol.Variable("embed.weight") - rnn.params.weight <- mx.symbol.Variable("rnn.params.weight") - - if (is.null(rnn.state)) rnn.state <- mx.symbol.Variable("rnn.state") - if (cell_type == "lstm" & is.null(rnn.state.cell)) { - rnn.state.cell <- mx.symbol.Variable("rnn.state.cell") - } - - cls.weight <- mx.symbol.Variable("cls.weight") - cls.bias <- mx.symbol.Variable("cls.bias") - if (!is.null(num_embed)) { - data <- mx.symbol.Embedding(data = data, input_dim = input_size, - weight = embed.weight, output_dim = num_embed, name = "embed") - } - - data = mx.symbol.swapaxes(data = data, dim1 = 0, dim2 = 1, name = paste0(prefix, "swap_pre")) - - if (cell_type == "lstm") { - rnn <- mx.symbol.RNN(data = data, state = rnn.state, - state_cell = rnn.state.cell, parameters = rnn.params.weight, - state.size = num_hidden, num.layers = num_rnn_layer, - bidirectional = bidirectional, mode = cell_type, state.outputs = output_last_state, - p = dropout, name = paste0(prefix, "RNN")) - } else { - rnn <- mx.symbol.RNN(data = data, state = rnn.state, - parameters = rnn.params.weight, state.size = num_hidden, - num.layers = num_rnn_layer, bidirectional = bidirectional, mode = cell_type, - state.outputs = output_last_state, p = dropout, - name = paste0(prefix, "RNN")) - } - - if (config == "seq-to-one") { - if (masking) mask <- mx.symbol.SequenceLast(data = rnn[[1]], use.sequence.length = T, sequence_length = seq.mask, name = "mask") else - mask <- mx.symbol.SequenceLast(data = rnn[[1]], use.sequence.length = F, name = "mask") - - if (!is.null(loss_output)) { - decode <- mx.symbol.FullyConnected(data = mask, weight = cls.weight, bias = cls.bias, num_hidden = num_decode, name = "decode") - out <- switch(loss_output, softmax = mx.symbol.SoftmaxOutput(data = decode, label = label, use_ignore = !ignore_label == -1, ignore_label = ignore_label, name = "loss"), - linear = mx.symbol.LinearRegressionOutput(data = decode, label = label, name = "loss"), - logistic = mx.symbol.LogisticRegressionOutput(data = decode, label = label, name = "loss"), - MAE = mx.symbol.MAERegressionOutput(data = decode, label = label, name = "loss")) - } - else out <- mask - } - - else if (config == "one-to-one") { - - if (masking) mask <- mx.symbol.SequenceMask(data = rnn[[1]], use.sequence.length = T, sequence_length = seq.mask, value = 0, name = "mask") else - mask <- mx.symbol.identity(data = rnn[[1]], name = "mask") - - mask = mx.symbol.swapaxes(data = mask, dim1 = 0, dim2 = 1, name = paste0(prefix, "swap_post")) - - if (!is.null(loss_output)) { - - mask <- mx.symbol.reshape(data = mask, shape = c(0, -1), reverse = TRUE) - label <- mx.symbol.reshape(data = label, shape = c(-1)) - - decode <- mx.symbol.FullyConnected(data = mask, weight = cls.weight, bias = cls.bias, num_hidden = num_decode, - flatten = TRUE, name = paste0(prefix, "decode")) - - out <- switch(loss_output, softmax = mx.symbol.SoftmaxOutput(data = decode, label = label, use_ignore = !ignore_label == -1, ignore_label = ignore_label, name = "loss"), - linear = mx.symbol.LinearRegressionOutput(data = decode, label = label, name = "loss"), - logistic = mx.symbol.LogisticRegressionOutput(data = decode, label = label, name = "loss"), - MAE = mx.symbol.MAERegressionOutput(data = decode, label = label, name = "loss")) - } else out <- mask - } - return(out) -} - -# LSTM cell symbol -lstm.cell <- function(num_hidden, indata, prev.state, param, seqidx, layeridx, dropout = 0, prefix = "") { - - if (dropout > 0 && layeridx > 1) - indata <- mx.symbol.Dropout(data = indata, p = dropout) - - i2h <- mx.symbol.FullyConnected(data = indata, weight = param$i2h.weight, bias = param$i2h.bias, - num_hidden = num_hidden * 4, name = paste0(prefix, "t", seqidx, ".l", layeridx, ".i2h")) - - if (!is.null(prev.state)) { - h2h <- mx.symbol.FullyConnected(data = prev.state$h, weight = param$h2h.weight, - bias = param$h2h.bias, num_hidden = num_hidden * 4, - name = paste0(prefix, "t", seqidx, ".l", layeridx, ".h2h")) - gates <- i2h + h2h - } else { - gates <- i2h - } - - split.gates <- mx.symbol.split(gates, num.outputs = 4, axis = 1, squeeze.axis = F, - name = paste0(prefix, "t", seqidx, ".l", layeridx, ".slice")) - - in.gate <- mx.symbol.Activation(split.gates[[1]], act.type = "sigmoid") - in.transform <- mx.symbol.Activation(split.gates[[2]], act.type = "tanh") - forget.gate <- mx.symbol.Activation(split.gates[[3]], act.type = "sigmoid") - out.gate <- mx.symbol.Activation(split.gates[[4]], act.type = "sigmoid") - - if (is.null(prev.state)) { - next.c <- in.gate * in.transform - } else { - next.c <- (forget.gate * prev.state$c) + (in.gate * in.transform) - } - - next.h <- out.gate * mx.symbol.Activation(next.c, act.type = "tanh") - - return(list(h = next.h, c = next.c)) -} - - -# GRU cell symbol -gru.cell <- function(num_hidden, indata, prev.state, param, seqidx, layeridx, dropout = 0, prefix) -{ - if (dropout > 0 && layeridx > 1) - indata <- mx.symbol.Dropout(data = indata, p = dropout) - - i2h <- mx.symbol.FullyConnected(data = indata, weight = param$gates.i2h.weight, - bias = param$gates.i2h.bias, num_hidden = num_hidden * 2, - name = paste0(prefix, "t", seqidx, ".l", layeridx, ".gates.i2h")) - - if (!is.null(prev.state)) { - h2h <- mx.symbol.FullyConnected(data = prev.state$h, weight = param$gates.h2h.weight, - bias = param$gates.h2h.bias, num_hidden = num_hidden * 2, - name = paste0(prefix, "t", seqidx, ".l", layeridx, ".gates.h2h")) - gates <- i2h + h2h - } else { - gates <- i2h - } - - split.gates <- mx.symbol.split(gates, num.outputs = 2, axis = 1, squeeze.axis = F, - name = paste0(prefix, "t", seqidx, ".l", layeridx, ".split")) - - update.gate <- mx.symbol.Activation(split.gates[[1]], act.type = "sigmoid") - reset.gate <- mx.symbol.Activation(split.gates[[2]], act.type = "sigmoid") - - htrans.i2h <- mx.symbol.FullyConnected(data = indata, weight = param$trans.i2h.weight, - bias = param$trans.i2h.bias, num_hidden = num_hidden, - name = paste0(prefix, "t", seqidx, ".l", layeridx, ".trans.i2h")) - - if (is.null(prev.state)) { - h.after.reset <- reset.gate * 0 - } else { - h.after.reset <- prev.state$h * reset.gate - } - - htrans.h2h <- mx.symbol.FullyConnected(data = h.after.reset, weight = param$trans.h2h.weight, - bias = param$trans.h2h.bias, num_hidden = num_hidden, - name = paste0(prefix, "t", seqidx, ".l", layeridx, ".trans.h2h")) - - h.trans <- htrans.i2h + htrans.h2h - h.trans.active <- mx.symbol.Activation(h.trans, act.type = "tanh") - - if (is.null(prev.state)) { - next.h <- update.gate * h.trans.active - } else { - next.h <- prev.state$h + update.gate * (h.trans.active - prev.state$h) - } - - return(list(h = next.h)) -} - - -#' Unroll representation of RNN running on non CUDA device -#' -#' @param config Either seq-to-one or one-to-one -#' @param cell_type Type of RNN cell: either gru or lstm -#' @param num_rnn_layer int, number of stacked layers -#' @param seq_len int, number of time steps to unroll -#' @param num_hidden int, size of the state in each RNN layer -#' @param num_embed int, default = NULL - no embedding. Dimension of the embedding vectors -#' @param num_decode int, number of output variables in the decoding layer -#' @param input_size int, number of levels in the data - only used for embedding -#' @param dropout -#' -#' @export -rnn.graph.unroll <- function(num_rnn_layer, - seq_len, - input_size = NULL, - num_embed = NULL, - num_hidden, - num_decode, - dropout = 0, - ignore_label = -1, - loss_output = NULL, - init.state = NULL, - config, - cell_type = "lstm", - masking = F, - output_last_state = F, - prefix = "", - data_name = "data", - label_name = "label") { - - if (!is.null(num_embed)) embed.weight <- mx.symbol.Variable(paste0(prefix, "embed.weight")) - - # Initial state - if (is.null(init.state) & output_last_state) { - init.state <- lapply(1:num_rnn_layer, function(i) { - if (cell_type=="lstm") { - state <- list(h = mx.symbol.Variable(paste0("init_", prefix, i, "_h")), - c = mx.symbol.Variable(paste0("init_", prefix, i, "_c"))) - } else if (cell_type=="gru") { - state <- list(h = mx.symbol.Variable(paste0("init_", prefix, i, "_h"))) - } - return (state) - }) - } - - cls.weight <- mx.symbol.Variable(paste0(prefix, "cls.weight")) - cls.bias <- mx.symbol.Variable(paste0(prefix, "cls.bias")) - - param.cells <- lapply(1:num_rnn_layer, function(i) { - - if (cell_type=="lstm") { - cell <- list(i2h.weight = mx.symbol.Variable(paste0(prefix, "l", i, ".i2h.weight")), - i2h.bias = mx.symbol.Variable(paste0(prefix, "l", i, ".i2h.bias")), - h2h.weight = mx.symbol.Variable(paste0(prefix, "l", i, ".h2h.weight")), - h2h.bias = mx.symbol.Variable(paste0(prefix, "l", i, ".h2h.bias"))) - } else if (cell_type=="gru") { - cell <- list(gates.i2h.weight = mx.symbol.Variable(paste0(prefix, "l", i, ".gates.i2h.weight")), - gates.i2h.bias = mx.symbol.Variable(paste0(prefix, "l", i, ".gates.i2h.bias")), - gates.h2h.weight = mx.symbol.Variable(paste0(prefix, "l", i, ".gates.h2h.weight")), - gates.h2h.bias = mx.symbol.Variable(paste0(prefix, "l", i, ".gates.h2h.bias")), - trans.i2h.weight = mx.symbol.Variable(paste0(prefix, "l", i, ".trans.i2h.weight")), - trans.i2h.bias = mx.symbol.Variable(paste0(prefix, "l", i, ".trans.i2h.bias")), - trans.h2h.weight = mx.symbol.Variable(paste0(prefix, "l", i, ".trans.h2h.weight")), - trans.h2h.bias = mx.symbol.Variable(paste0(prefix, "l", i, ".trans.h2h.bias"))) - } - return (cell) - }) - - # embeding layer - data <- mx.symbol.Variable(data_name) - label <- mx.symbol.Variable(label_name) - seq.mask <- mx.symbol.Variable(paste0(prefix, "seq.mask")) - - data = mx.symbol.swapaxes(data = data, dim1 = 0, dim2 = 1, name = paste0(prefix, "swap_pre")) - - if (!is.null(num_embed)) { - data <- mx.symbol.Embedding(data = data, input_dim = input_size, - weight=embed.weight, output_dim = num_embed, name = paste0(prefix, "embed")) - } - - data <- mx.symbol.split(data = data, axis = 0, num.outputs = seq_len, squeeze_axis = T) - - last.hidden <- list() - last.states <- list() - - for (seqidx in 1:seq_len) { - hidden <- data[[seqidx]] - - for (i in 1:num_rnn_layer) { - - if (seqidx==1) prev.state <- init.state[[i]] else - prev.state <- last.states[[i]] - - if (cell_type=="lstm") { - cell.symbol <- lstm.cell - } else if (cell_type=="gru"){ - cell.symbol <- gru.cell - } - - next.state <- cell.symbol(num_hidden = num_hidden, - indata = hidden, - prev.state = prev.state, - param = param.cells[[i]], - seqidx = seqidx, - layeridx = i, - dropout = dropout, - prefix = prefix) - - hidden <- next.state$h - last.states[[i]] <- next.state - } - - # Aggregate outputs from each timestep - last.hidden <- c(last.hidden, hidden) - } - - if (output_last_state) { - out.states = mx.symbol.Group(unlist(last.states)) - } - - # concat hidden units - concat seq_len blocks of dimension num_hidden x batch.size - concat <- mx.symbol.concat(data = last.hidden, num.args = seq_len, dim = 0, name = paste0(prefix, "concat")) - concat <- mx.symbol.reshape(data = concat, shape = c(num_hidden, -1, seq_len), name = paste0(prefix, "rnn_reshape")) - - if (config=="seq-to-one") { - - if (masking) mask <- mx.symbol.SequenceLast(data=concat, use.sequence.length = T, sequence_length = seq.mask, name = paste0(prefix, "mask")) else - mask <- mx.symbol.SequenceLast(data=concat, use.sequence.length = F, name = paste0(prefix, "mask")) - - if (!is.null(loss_output)) { - - decode <- mx.symbol.FullyConnected(data = mask, - weight = cls.weight, - bias = cls.bias, - num_hidden = num_decode, - name = paste0(prefix, "decode")) - - out <- switch(loss_output, - softmax = mx.symbol.SoftmaxOutput(data=decode, label=label, use_ignore = !ignore_label == -1, ignore_label = ignore_label, name = paste0(prefix, "loss")), - linear = mx.symbol.LinearRegressionOutput(data=decode, label=label, name = paste0(prefix, "loss")), - logistic = mx.symbol.LogisticRegressionOutput(data=decode, label=label, paste0(prefix, name = "loss")), - MAE = mx.symbol.MAERegressionOutput(data=decode, label=label, paste0(prefix, name = "loss")) - ) - } else out <- mask - - } else if (config=="one-to-one"){ - - if (masking) mask <- mx.symbol.SequenceMask(data = concat, use.sequence.length = T, sequence_length = seq.mask, value = 0, name = paste0(prefix, "mask")) else - mask <- mx.symbol.identity(data = concat, name = paste0(prefix, "mask")) - - mask = mx.symbol.swapaxes(data = mask, dim1 = 0, dim2 = 1, name = paste0(prefix, "swap_post")) - - if (!is.null(loss_output)) { - - mask <- mx.symbol.reshape(data = mask, shape = c(0, -1), reverse = TRUE) - label <- mx.symbol.reshape(data = label, shape = c(-1)) - - decode <- mx.symbol.FullyConnected(data = mask, weight = cls.weight, bias = cls.bias, num_hidden = num_decode, - flatten = T, name = paste0(prefix, "decode")) - - out <- switch(loss_output, - softmax = mx.symbol.SoftmaxOutput(data=decode, label=label, use_ignore = !ignore_label == -1, ignore_label = ignore_label, - name = paste0(prefix, "loss")), - linear = mx.symbol.LinearRegressionOutput(data=decode, label=label, name = paste0(prefix, "loss")), - logistic = mx.symbol.LogisticRegressionOutput(data=decode, label=label, name = paste0(prefix, "loss")), - MAE = mx.symbol.MAERegressionOutput(data=decode, label=label, name = paste0(prefix, "loss")) - ) - } else out <- mask - } - - if (output_last_state) { - return(mx.symbol.Group(c(out, out.states))) - } else return(out) -} diff --git a/R-package/R/viz.graph.R b/R-package/R/viz.graph.R index ab876afdfa1e..488ed24818a1 100644 --- a/R-package/R/viz.graph.R +++ b/R-package/R/viz.graph.R @@ -57,12 +57,7 @@ graph.viz <- function(symbol, shape=NULL, direction="TD", type="graph", graph.wi "Flatten" = , "Reshape" = , "Concat" = "#fdb462", - "LinearRegressionOutput"=, - "MAERegressionOutput"=, - "SVMOutput"=, - "LogisticRegressionOutput"=, "MakeLoss"=, - "SoftmaxOutput" = "#b3de69", "#fccde5" # default value ) } diff --git a/R-package/demo/00Index b/R-package/demo/00Index index 1629d05f86f4..f467bbdb3880 100644 --- a/R-package/demo/00Index +++ b/R-package/demo/00Index @@ -1,7 +1,6 @@ basic_bench Basic benchmark basic_executor Basic executor operations basic_kvstore Basic kvstore operations -basic_model Basic model operations basic_ndarray Basic ndarray operations basic_random Basic random number generators basic_symbol Basic symbol operations diff --git a/R-package/demo/basic_model.R b/R-package/demo/basic_model.R deleted file mode 100644 index 0ac028e472c2..000000000000 --- a/R-package/demo/basic_model.R +++ /dev/null @@ -1,127 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -list.of.packages <- c("R.utils") -new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[, "Package"])] -if( length(new.packages)) install.packages(new.packages, repos = "https://cloud.r-project.org/") - -setwd(tempdir()) - -download.file("http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz", destfile="train-images-idx3-ubyte.gz") - -download.file("http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz", destfile="train-labels-idx1-ubyte.gz") - -download.file("http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz", destfile="t10k-images-idx3-ubyte.gz") - -download.file("http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz", destfile="t10k-labels-idx1-ubyte.gz") - -require(R.utils) - -gunzip("train-images-idx3-ubyte.gz") - -gunzip("train-labels-idx1-ubyte.gz") - -gunzip("t10k-images-idx3-ubyte.gz") - -gunzip("t10k-labels-idx1-ubyte.gz") - -require(mxnet) - -# Network configuration -batch.size <- 100 -data <- mx.symbol.Variable("data") -fc1 <- mx.symbol.FullyConnected(data, name = "fc1", num_hidden = 128) -act1 <- mx.symbol.Activation(fc1, name = "relu1", act_type = "relu") -fc2 <- mx.symbol.FullyConnected(act1, name = "fc2", num_hidden = 64) -act2 <- mx.symbol.Activation(fc2, name = "relu2", act_type = "relu") -fc3 <- mx.symbol.FullyConnected(act2, name = "fc3", num_hidden = 10) -softmax <- mx.symbol.Softmax(fc3, name = "sm") - -dtrain <- mx.io.MNISTIter( - image = "train-images-idx3-ubyte", - label = "train-labels-idx1-ubyte", - data.shape = c(784), - batch.size = batch.size, - shuffle = TRUE, - flat = TRUE, - silent = 0, - seed = 10) - -dtest = mx.io.MNISTIter( - image="t10k-images-idx3-ubyte", - label="t10k-labels-idx1-ubyte", - data.shape=c(784), - batch.size=batch.size, - shuffle=FALSE, - flat=TRUE, - silent=0) - -mx.set.seed(0) -devices = lapply(1:2, function(i) { - mx.cpu(i) -}) - -# create the model -model <- mx.model.FeedForward.create(softmax, X=dtrain, eval.data=dtest, - ctx=devices, num.round=1, - learning.rate=0.1, momentum=0.9, - initializer=mx.init.uniform(0.07), - epoch.end.callback=mx.callback.save.checkpoint("chkpt"), - batch.end.callback=mx.callback.log.train.metric(100)) - -# do prediction -pred <- predict(model, dtest) -label <- mx.io.extract(dtest, "label") -dataX <- mx.io.extract(dtest, "data") -# Predict with R's array -pred2 <- predict(model, X = dataX) - -accuracy <- function(label, pred) { - ypred = max.col(t(as.array(pred))) - return(sum((as.array(label) + 1) == ypred) / length(label)) -} - -print(paste0("Finish prediction... accuracy = ", accuracy(label, pred))) -print(paste0("Finish prediction... accuracy2 = ", accuracy(label, pred2))) - - - -# load the model -model <- mx.model.load("chkpt", 1) - -#continue training with some new arguments -model <- mx.model.FeedForward.create(model$symbol, X = dtrain, eval.data = dtest, - ctx = devices, num.round = 5, - learning.rate = 0.1, momentum = 0.9, - epoch.end.callback = mx.callback.save.checkpoint("reload_chkpt"), - batch.end.callback = mx.callback.log.train.metric(100), - arg.params = model$arg.params, aux.params = model$aux.params) - -# do prediction -pred <- predict(model, dtest) -label <- mx.io.extract(dtest, "label") -dataX <- mx.io.extract(dtest, "data") -# Predict with R's array -pred2 <- predict(model, X = dataX) - -accuracy <- function(label, pred) { - ypred <- max.col(t(as.array(pred))) - return(sum((as.array(label) + 1) == ypred) / length(label)) -} - -print(paste0("Finish prediction... accuracy=", accuracy(label, pred))) -print(paste0("Finish prediction... accuracy2=", accuracy(label, pred2))) diff --git a/R-package/tests/testthat/test_img_seg.R b/R-package/tests/testthat/test_img_seg.R deleted file mode 100644 index 4af7d62cf533..000000000000 --- a/R-package/tests/testthat/test_img_seg.R +++ /dev/null @@ -1,165 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -require(mxnet) - -source("get_data.R") - -if (Sys.getenv("R_GPU_ENABLE") != "" & as.integer(Sys.getenv("R_GPU_ENABLE")) == - 1) { - mx.ctx.default(new = mx.gpu()) - message("Using GPU for testing.") -} - -print_inferred_shape <- function(net) { - slist <- mx.symbol.infer.shape(symbol = net, data = c(168, 168, 1, 2)) - print(slist$out.shapes) -} - -convolution_module <- function(net, kernel_size, pad_size, filter_count, stride = c(1, - 1), work_space = 2048, batch_norm = TRUE, down_pool = FALSE, up_pool = FALSE, - act_type = "relu", convolution = TRUE) { - if (up_pool) { - net <- mx.symbol.Deconvolution(net, kernel = c(2, 2), pad = c(0, 0), stride = c(2, - 2), num_filter = filter_count, workspace = work_space) - net <- mx.symbol.BatchNorm(net) - if (act_type != "") { - net <- mx.symbol.Activation(net, act_type = act_type) - } - } - if (convolution) { - conv <- mx.symbol.Convolution(data = net, kernel = kernel_size, stride = stride, - pad = pad_size, num_filter = filter_count, workspace = work_space) - net <- conv - } - if (batch_norm) { - net <- mx.symbol.BatchNorm(net) - } - - if (act_type != "") { - net <- mx.symbol.Activation(net, act_type = act_type) - } - - if (down_pool) { - pool <- mx.symbol.Pooling(net, pool_type = "max", kernel = c(2, 2), stride = c(2, - 2)) - net <- pool - } - print_inferred_shape(net) - return(net) -} - -get_unet <- function() { - data <- mx.symbol.Variable("data") - kernel_size <- c(3, 3) - pad_size <- c(1, 1) - filter_count <- 32 - pool1 <- convolution_module(data, kernel_size, pad_size, filter_count = filter_count, - down_pool = TRUE) - net <- pool1 - pool2 <- convolution_module(net, kernel_size, pad_size, filter_count = filter_count * - 2, down_pool = TRUE) - net <- pool2 - pool3 <- convolution_module(net, kernel_size, pad_size, filter_count = filter_count * - 4, down_pool = TRUE) - net <- pool3 - pool4 <- convolution_module(net, kernel_size, pad_size, filter_count = filter_count * - 4, down_pool = TRUE) - net <- pool4 - net <- mx.symbol.Dropout(net) - pool5 <- convolution_module(net, kernel_size, pad_size, filter_count = filter_count * - 8, down_pool = TRUE) - net <- pool5 - net <- convolution_module(net, kernel_size, pad_size, filter_count = filter_count * - 4, up_pool = TRUE) - net <- convolution_module(net, kernel_size, pad_size = c(2, 2), filter_count = filter_count * - 4, up_pool = TRUE) - net <- mx.symbol.Crop(net, pool3, num.args = 2) - net <- mx.symbol.concat(c(pool3, net), num.args = 2) - net <- mx.symbol.Dropout(net) - net <- convolution_module(net, kernel_size, pad_size, filter_count = filter_count * - 4) - net <- convolution_module(net, kernel_size, pad_size, filter_count = filter_count * - 4, up_pool = TRUE) - - net <- mx.symbol.concat(c(pool2, net), num.args = 2) - net <- mx.symbol.Dropout(net) - net <- convolution_module(net, kernel_size, pad_size, filter_count = filter_count * - 4) - net <- convolution_module(net, kernel_size, pad_size, filter_count = filter_count * - 4, up_pool = TRUE) - convolution_module(net, kernel_size, pad_size, filter_count = filter_count * - 4) - net <- mx.symbol.concat(c(pool1, net), num.args = 2) - net <- mx.symbol.Dropout(net) - net <- convolution_module(net, kernel_size, pad_size, filter_count = filter_count * - 2) - net <- convolution_module(net, kernel_size, pad_size, filter_count = filter_count * - 2, up_pool = TRUE) - net <- convolution_module(net, kernel_size, pad_size, filter_count = 1, batch_norm = FALSE, - act_type = "") - net <- mx.symbol.SoftmaxOutput(data = net, name = "sm") - return(net) -} - -context("Image segmentation") - -test_that("UNET", { - list.of.packages <- c("imager") - new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[, - "Package"])] - if (length(new.packages)) - install.packages(new.packages, repos = "https://cloud.r-project.org/") - GetISBI_data() - library(imager) - IMG_SIZE <- 168 - files <- list.files(path = "data/ISBI/train-volume/") - a <- "data/ISBI/train-volume/" - filess <- paste(a, files, sep = "") - list_of_images <- lapply(filess, function(x) { - x <- load.image(x) - y <- resize(x, size_x = IMG_SIZE, size_y = IMG_SIZE) - }) - - train.x <- do.call("cbind", lapply(list_of_images, as.vector)) - train.array <- train.x - dim(train.array) <- c(IMG_SIZE, IMG_SIZE, 1, 30) - - files <- list.files(path = "data/ISBI/train-labels") - b <- "data/ISBI/train-labels/" - filess <- paste(b, files, sep = "") - list_of_images <- lapply(filess, function(x) { - x <- load.image(x) - y <- resize(x, size_x = IMG_SIZE, size_y = IMG_SIZE) - }) - - train.y <- do.call("cbind", lapply(list_of_images, as.vector)) - - train.y[which(train.y < 0.5)] <- 0 - train.y[which(train.y > 0.5)] <- 1 - train.y.array <- train.y - dim(train.y.array) <- c(IMG_SIZE, IMG_SIZE, 1, 30) - - devices <- mx.ctx.default() - mx.set.seed(0) - - net <- get_unet() - - model <- mx.model.FeedForward.create(net, X = train.array, y = train.y.array, - ctx = devices, num.round = 2, initializer = mx.init.normal(sqrt(2/576)), - learning.rate = 0.05, momentum = 0.99, array.batch.size = 2) -}) diff --git a/R-package/tests/testthat/test_model.R b/R-package/tests/testthat/test_model.R deleted file mode 100644 index e62f334a4ab8..000000000000 --- a/R-package/tests/testthat/test_model.R +++ /dev/null @@ -1,290 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -require(mxnet) - -source("get_data.R") - -context("models") - -if (Sys.getenv("R_GPU_ENABLE") != "" & as.integer(Sys.getenv("R_GPU_ENABLE")) == - 1) { - mx.ctx.default(new = mx.gpu()) - message("Using GPU for testing.") -} - -test_that("MNIST", { - # # Network configuration - GetMNIST_ubyte() - batch.size <- 100 - data <- mx.symbol.Variable("data") - fc1 <- mx.symbol.FullyConnected(data, name = "fc1", num_hidden = 128) - act1 <- mx.symbol.Activation(fc1, name = "relu1", act_type = "relu") - fc2 <- mx.symbol.FullyConnected(act1, name = "fc2", num_hidden = 64) - act2 <- mx.symbol.Activation(fc2, name = "relu2", act_type = "relu") - fc3 <- mx.symbol.FullyConnected(act2, name = "fc3", num_hidden = 10) - softmax <- mx.symbol.Softmax(fc3, name = "sm") - - dtrain <- mx.io.MNISTIter(image = "data/train-images-idx3-ubyte", label = "data/train-labels-idx1-ubyte", - data.shape = c(784), batch.size = batch.size, shuffle = TRUE, flat = TRUE, - silent = 0, seed = 10) - - dtest <- mx.io.MNISTIter(image = "data/t10k-images-idx3-ubyte", label = "data/t10k-labels-idx1-ubyte", - data.shape = c(784), batch.size = batch.size, shuffle = FALSE, flat = TRUE, - silent = 0) - - mx.set.seed(0) - - # create the model - model <- mx.model.FeedForward.create(softmax, X = dtrain, eval.data = dtest, - ctx = mx.ctx.default(), num.round = 1, learning.rate = 0.1, momentum = 0.9, - initializer = mx.init.uniform(0.07), epoch.end.callback = mx.callback.save.checkpoint("chkpt"), - batch.end.callback = mx.callback.log.train.metric(100)) - - # do prediction - pred <- predict(model, dtest) - label <- mx.io.extract(dtest, "label") - dataX <- mx.io.extract(dtest, "data") - # Predict with R's array - pred2 <- predict(model, X = dataX) - - accuracy <- function(label, pred) { - ypred <- max.col(t(as.array(pred))) - return(sum((as.array(label) + 1) == ypred)/length(label)) - } - - expect_equal(accuracy(label, pred), accuracy(label, pred2), tolerance = 0.1) - - file.remove("chkpt-0001.params") - file.remove("chkpt-symbol.json") -}) - -test_that("Regression", { - data(BostonHousing, package = "mlbench") - train.ind <- seq(1, 506, 3) - train.x <- data.matrix(BostonHousing[train.ind, -14]) - train.y <- BostonHousing[train.ind, 14] - test.x <- data.matrix(BostonHousing[-train.ind, -14]) - test.y <- BostonHousing[-train.ind, 14] - data <- mx.symbol.Variable("data") - fc1 <- mx.symbol.FullyConnected(data, num_hidden = 1) - lro <- mx.symbol.LinearRegressionOutput(fc1) - - demo.metric.mae <- mx.metric.custom("mae", function(label, pred) { - pred <- mx.nd.reshape(pred, shape = 0) - res <- mx.nd.mean(mx.nd.abs(label - pred)) - return(as.array(res)) - }) - mx.set.seed(0) - model <- mx.model.FeedForward.create(lro, X = train.x, y = train.y, ctx = mx.ctx.default(), - num.round = 5, array.batch.size = 20, learning.rate = 2e-06, momentum = 0.9, - eval.metric = demo.metric.mae) - - train.x <- data.matrix(BostonHousing[train.ind, -(13:14)]) - train.y <- BostonHousing[train.ind, c(13:14)] - test.x <- data.matrix(BostonHousing[-train.ind, -(13:14)]) - test.y <- BostonHousing[-train.ind, c(13:14)] - - data <- mx.symbol.Variable("data") - fc2 <- mx.symbol.FullyConnected(data, num_hidden = 2) - lro2 <- mx.symbol.LinearRegressionOutput(fc2) - - mx.set.seed(0) - train_iter <- mx.io.arrayiter(data = t(train.x), label = t(train.y)) - - model <- mx.model.FeedForward.create(lro2, X = train_iter, ctx = mx.ctx.default(), - num.round = 50, array.batch.size = 20, learning.rate = 2e-06, momentum = 0.9) -}) - - -test_that("Classification", { - data(Sonar, package = "mlbench") - Sonar[, 61] <- as.numeric(Sonar[, 61]) - 1 - train.ind <- c(1:50, 100:150) - train.x <- data.matrix(Sonar[train.ind, 1:60]) - train.y <- Sonar[train.ind, 61] - test.x <- data.matrix(Sonar[-train.ind, 1:60]) - test.y <- Sonar[-train.ind, 61] - mx.set.seed(0) - model <- mx.mlp(train.x, train.y, hidden_node = 10, out_node = 2, out_activation = "softmax", - num.round = 5, array.batch.size = 15, learning.rate = 0.07, momentum = 0.9, - eval.metric = mx.metric.accuracy) -}) - -test_that("Fine-tune", { - GetInception() - GetCatDog() - train_iter <- mx.io.ImageRecordIter(path.imgrec = "./data/cats_dogs/cats_dogs_train.rec", - batch.size = 8, data.shape = c(224, 224, 3), rand.crop = TRUE, rand.mirror = TRUE) - val_iter <- mx.io.ImageRecordIter(path.imgrec = "./data/cats_dogs/cats_dogs_val.rec", - batch.size = 8, data.shape = c(224, 224, 3), rand.crop = FALSE, rand.mirror = FALSE) - inception_bn <- mx.model.load("./model/Inception-BN", iteration = 126) - symbol <- inception_bn$symbol - internals <- symbol$get.internals() - outputs <- internals$outputs - - flatten <- internals$get.output(which(outputs == "flatten_output")) - - new_fc <- mx.symbol.FullyConnected(data = flatten, num_hidden = 2, name = "fc1") - new_soft <- mx.symbol.SoftmaxOutput(data = new_fc, name = "softmax") - arg_params_new <- mx.model.init.params(symbol = new_soft, input.shape = list(data = c(224, - 224, 3, 8)), output.shape = NULL, initializer = mx.init.uniform(0.1), ctx = mx.cpu())$arg.params - fc1_weights_new <- arg_params_new[["fc1_weight"]] - fc1_bias_new <- arg_params_new[["fc1_bias"]] - - arg_params_new <- inception_bn$arg.params - - arg_params_new[["fc1_weight"]] <- fc1_weights_new - arg_params_new[["fc1_bias"]] <- fc1_bias_new - - # model <- mx.model.FeedForward.create(symbol = new_soft, X = train_iter, - # eval.data = val_iter, ctx = mx.ctx.default(), eval.metric = mx.metric.accuracy, - # num.round = 2, learning.rate = 0.05, momentum = 0.9, wd = 0.00001, kvstore = - # 'local', batch.end.callback = mx.callback.log.train.metric(50), initializer = - # mx.init.Xavier(factor_type = 'in', magnitude = 2.34), optimizer = 'sgd', - # arg.params = arg_params_new, aux.params = inception_bn$aux.params) -}) - -test_that("Matrix Factorization", { - - # Use fake random data instead of GetMovieLens() to remove external dependency - set.seed(123) - user <- sample(943, size = 1e+05, replace = T) - item <- sample(1682, size = 1e+05, replace = T) - score <- sample(5, size = 1e+05, replace = T) - DF <- data.frame(user, item, score) - - max_user <- max(DF$user) - max_item <- max(DF$item) - DF_mat_x <- data.matrix(t(DF[, 1:2])) - DF_y <- DF[, 3] - k <- 64 - user <- mx.symbol.Variable("user") - item <- mx.symbol.Variable("item") - score <- mx.symbol.Variable("score") - user1 <- mx.symbol.Embedding(data = mx.symbol.BlockGrad(user), input_dim = max_user, - output_dim = k, name = "user1") - item1 <- mx.symbol.Embedding(data = mx.symbol.BlockGrad(item), input_dim = max_item, - output_dim = k, name = "item1") - pred <- user1 * item1 - pred1 <- mx.symbol.sum_axis(pred, axis = 1, name = "pred1") - pred2 <- mx.symbol.Flatten(pred1, name = "pred2") - pred3 <- mx.symbol.LinearRegressionOutput(data = pred2, label = score, name = "pred3") - - mx.set.seed(123) - - CustomIter <- setRefClass("CustomIter", fields = c("iter1", "iter2"), contains = "Rcpp_MXArrayDataIter", - methods = list(initialize = function(iter1, iter2) { - .self$iter1 <- iter1 - .self$iter2 <- iter2 - .self - }, value = function() { - user <- .self$iter1$value()$data - item <- .self$iter2$value()$data - score <- .self$iter1$value()$label - list(user = user, item = item, score = score) - }, iter.next = function() { - .self$iter1$iter.next() - .self$iter2$iter.next() - }, reset = function() { - .self$iter1$reset() - .self$iter2$reset() - }, num.pad = function() { - .self$iter1$num.pad() - }, finalize = function() { - .self$iter1$finalize() - .self$iter2$finalize() - })) - - user_iter <- mx.io.arrayiter(data = DF[, 1], label = DF[, 3], batch.size = k) - - item_iter <- mx.io.arrayiter(data = DF[, 2], label = DF[, 3], batch.size = k) - - train_iter <- CustomIter$new(user_iter, item_iter) - - model <- mx.model.FeedForward.create(pred3, X = train_iter, ctx = mx.ctx.default(), - num.round = 5, initializer = mx.init.uniform(0.07), learning.rate = 0.07, - eval.metric = mx.metric.rmse, momentum = 0.9, epoch.end.callback = mx.callback.log.train.metric(1), - input.names = c("user", "item"), output.names = "score") -}) - -test_that("Captcha", { - GetCaptcha_data() - data.shape <- c(80, 30, 3) - batch_size <- 40 - train <- mx.io.ImageRecordIter(path.imgrec = "./data/captcha_example/captcha_train.rec", - path.imglist = "./data/captcha_example/captcha_train.lst", batch.size = batch_size, - label.width = 4, data.shape = data.shape, mean.img = "mean.bin") - - val <- mx.io.ImageRecordIter(path.imgrec = "./data/captcha_example/captcha_test.rec", - path.imglist = "./data/captcha_example/captcha_test.lst", batch.size = batch_size, - label.width = 4, data.shape = data.shape, mean.img = "mean.bin") - - data <- mx.symbol.Variable("data") - label <- mx.symbol.Variable("label") - conv1 <- mx.symbol.Convolution(data = data, kernel = c(5, 5), num_filter = 32) - pool1 <- mx.symbol.Pooling(data = conv1, pool_type = "max", kernel = c(2, 2), - stride = c(1, 1)) - relu1 <- mx.symbol.Activation(data = pool1, act_type = "relu") - - conv2 <- mx.symbol.Convolution(data = relu1, kernel = c(5, 5), num_filter = 32) - pool2 <- mx.symbol.Pooling(data = conv2, pool_type = "avg", kernel = c(2, 2), - stride = c(1, 1)) - relu2 <- mx.symbol.Activation(data = pool2, act_type = "relu") - - flatten <- mx.symbol.Flatten(data = relu2) - fc1 <- mx.symbol.FullyConnected(data = flatten, num_hidden = 120) - fc21 <- mx.symbol.FullyConnected(data = fc1, num_hidden = 10) - fc22 <- mx.symbol.FullyConnected(data = fc1, num_hidden = 10) - fc23 <- mx.symbol.FullyConnected(data = fc1, num_hidden = 10) - fc24 <- mx.symbol.FullyConnected(data = fc1, num_hidden = 10) - fc2 <- mx.symbol.concat(c(fc21, fc22, fc23, fc24), dim = 0, num.args = 4) - label <- mx.symbol.transpose(data = label) - label <- mx.symbol.Reshape(data = label, target_shape = c(0)) - captcha_net <- mx.symbol.SoftmaxOutput(data = fc2, label = label, name = "softmax") - - mx.metric.acc2 <- mx.metric.custom("accuracy", function(label, pred) { - label <- as.array(label) - pred <- as.array(pred) - ypred <- max.col(t(pred)) - 1 - ypred <- matrix(ypred, nrow = nrow(label), ncol = ncol(label), byrow = TRUE) - return(sum(colSums(label == ypred) == 4)/ncol(label)) - }) - - mx.set.seed(42) - - train$reset() - train$iter.next() - - input.names <- "data" - input.shape <- sapply(input.names, function(n) { - dim(train$value()[[n]]) - }, simplify = FALSE) - arg_names <- arguments(captcha_net) - output.names <- "label" - output.shape <- sapply(output.names, function(n) { - dim(train$value()[[n]]) - }, simplify = FALSE) - params <- mx.model.init.params(captcha_net, input.shape, output.shape, mx.init.Xavier(factor_type = "in", - magnitude = 2.34), mx.cpu()) - - # model <- mx.model.FeedForward.create( X = train, eval.data = val, ctx = - # mx.ctx.default(), symbol = captcha_net, eval.metric = mx.metric.acc2, num.round - # = 1, learning.rate = 1e-04, momentum = 0.9, wd = 1e-05, batch.end.callback = - # mx.callback.log.train.metric(50), initializer = mx.init.Xavier(factor_type = - # 'in', magnitude = 2.34), optimizer = 'sgd', clip_gradient = 10) -}) diff --git a/R-package/tests/testthat/test_optimizer.R b/R-package/tests/testthat/test_optimizer.R deleted file mode 100644 index cbe9575c90ca..000000000000 --- a/R-package/tests/testthat/test_optimizer.R +++ /dev/null @@ -1,251 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -context("optimizer") - -if (Sys.getenv("R_GPU_ENABLE") != "" & as.integer(Sys.getenv("R_GPU_ENABLE")) == - 1) { - mx.ctx.default(new = mx.gpu()) - message("Using GPU for testing.") -} - -test_that("sgd", { - - data <- mx.symbol.Variable("data") - label <- mx.symbol.Variable("label") - fc_weight <- mx.symbol.Variable("fc_weight") - fc <- mx.symbol.FullyConnected(data = data, weight = fc_weight, no.bias = T, - name = "fc1", num_hidden = 1) - loss <- mx.symbol.LinearRegressionOutput(data = fc, label = label, name = "loss") - - x <- mx.nd.array(array(1:6, dim = 2:3)) - y <- mx.nd.array(c(5, 11, 16)) - w1 <- mx.nd.array(array(c(1.1, 1.8), dim = c(2, 1))) - - exec <- mxnet:::mx.symbol.bind(symbol = loss, ctx = mx.ctx.default(), arg.arrays = list(data = x, - fc1_weight = w1, label = y), aux.arrays = NULL, grad.reqs = c("null", "write", - "null")) - - optimizer <- mx.opt.create("sgd", learning.rate = 1, momentum = 0, wd = 0, rescale.grad = 1, - clip_gradient = -1) - - updaters <- mx.opt.get.updater(optimizer, exec$ref.arg.arrays, ctx = mx.ctx.default()) - - mx.exec.forward(exec, is.train = T) - mx.exec.backward(exec) - - arg.blocks <- updaters(exec$ref.arg.arrays, exec$ref.grad.arrays) - mx.exec.update.arg.arrays(exec, arg.blocks, skip.null = TRUE) - - expect_equal(as.array(arg.blocks[[2]]), array(c(1.4, 2.6), dim = c(2, 1)), tolerance = 0.1) - -}) - - -test_that("rmsprop", { - - data <- mx.symbol.Variable("data") - label <- mx.symbol.Variable("label") - fc_weight <- mx.symbol.Variable("fc_weight") - fc <- mx.symbol.FullyConnected(data = data, weight = fc_weight, no.bias = T, - name = "fc1", num_hidden = 1) - loss <- mx.symbol.LinearRegressionOutput(data = fc, label = label, name = "loss") - - x <- mx.nd.array(array(1:6, dim = 2:3)) - y <- mx.nd.array(c(5, 11, 16)) - w1 <- mx.nd.array(array(c(1.1, 1.8), dim = c(2, 1))) - - exec <- mxnet:::mx.symbol.bind(symbol = loss, ctx = mx.ctx.default(), arg.arrays = list(data = x, - fc1_weight = w1, label = y), aux.arrays = NULL, grad.reqs = c("null", "write", - "null")) - - optimizer <- mx.opt.create("rmsprop", learning.rate = 1, centered = TRUE, rho = 0.95, - momentum = 0.9, epsilon = 1e-04, wd = 0, rescale.grad = 1, clip_gradient = -1) - - updaters <- mx.opt.get.updater(optimizer, exec$ref.arg.arrays, ctx = mx.ctx.default()) - - mx.exec.forward(exec, is.train = T) - mx.exec.backward(exec) - - arg.blocks <- updaters(exec$ref.arg.arrays, exec$ref.grad.arrays) - mx.exec.update.arg.arrays(exec, arg.blocks, skip.null = TRUE) - - expect_equal(as.array(arg.blocks[[2]]), array(c(5.64, 6.38), dim = c(2, 1)), - tolerance = 0.1) - -}) - - -test_that("adam", { - - data <- mx.symbol.Variable("data") - label <- mx.symbol.Variable("label") - fc_weight <- mx.symbol.Variable("fc_weight") - fc <- mx.symbol.FullyConnected(data = data, weight = fc_weight, no.bias = T, - name = "fc1", num_hidden = 1) - loss <- mx.symbol.LinearRegressionOutput(data = fc, label = label, name = "loss") - - x <- mx.nd.array(array(1:6, dim = 2:3)) - y <- mx.nd.array(c(5, 11, 16)) - w1 <- mx.nd.array(array(c(1.1, 1.8), dim = c(2, 1))) - - exec <- mxnet:::mx.symbol.bind(symbol = loss, ctx = mx.ctx.default(), arg.arrays = list(data = x, - fc1_weight = w1, label = y), aux.arrays = NULL, grad.reqs = c("null", "write", - "null")) - - optimizer <- mx.opt.create("adam", learning.rate = 1, beta1 = 0.9, beta2 = 0.999, - epsilon = 1e-08, wd = 0, rescale.grad = 1, clip_gradient = -1) - - updaters <- mx.opt.get.updater(optimizer, exec$ref.arg.arrays, ctx = mx.ctx.default()) - - mx.exec.forward(exec, is.train = T) - mx.exec.backward(exec) - - arg.blocks <- updaters(exec$ref.arg.arrays, exec$ref.grad.arrays) - mx.exec.update.arg.arrays(exec, arg.blocks, skip.null = TRUE) - - expect_equal(as.array(arg.blocks[[2]]), array(c(4.26, 4.96), dim = c(2, 1)), - tolerance = 0.1) - -}) - - -test_that("adagrad", { - - data <- mx.symbol.Variable("data") - label <- mx.symbol.Variable("label") - fc_weight <- mx.symbol.Variable("fc_weight") - fc <- mx.symbol.FullyConnected(data = data, weight = fc_weight, no.bias = T, - name = "fc1", num_hidden = 1) - loss <- mx.symbol.LinearRegressionOutput(data = fc, label = label, name = "loss") - - x <- mx.nd.array(array(1:6, dim = 2:3)) - y <- mx.nd.array(c(5, 11, 16)) - w1 <- mx.nd.array(array(c(1.1, 1.8), dim = c(2, 1))) - - exec <- mxnet:::mx.symbol.bind(symbol = loss, ctx = mx.ctx.default(), arg.arrays = list(data = x, - fc1_weight = w1, label = y), aux.arrays = NULL, grad.reqs = c("null", "write", - "null")) - - optimizer <- mx.opt.create("adagrad", learning.rate = 1, epsilon = 1e-08, wd = 0, - rescale.grad = 1, clip_gradient = -1) - - updaters <- mx.opt.get.updater(optimizer, exec$ref.arg.arrays, ctx = mx.ctx.default()) - - mx.exec.forward(exec, is.train = T) - mx.exec.backward(exec) - - arg.blocks <- updaters(exec$ref.arg.arrays, exec$ref.grad.arrays) - mx.exec.update.arg.arrays(exec, arg.blocks, skip.null = TRUE) - - expect_equal(as.array(arg.blocks[[2]]), array(c(2.1, 2.8), dim = c(2, 1)), tolerance = 0.1) - -}) - - -test_that("adadelta", { - - data <- mx.symbol.Variable("data") - label <- mx.symbol.Variable("label") - fc_weight <- mx.symbol.Variable("fc_weight") - fc <- mx.symbol.FullyConnected(data = data, weight = fc_weight, no.bias = T, - name = "fc1", num_hidden = 1) - loss <- mx.symbol.LinearRegressionOutput(data = fc, label = label, name = "loss") - - x <- mx.nd.array(array(1:6, dim = 2:3)) - y <- mx.nd.array(c(5, 11, 16)) - w1 <- mx.nd.array(array(c(1.1, 1.8), dim = c(2, 1))) - - exec <- mxnet:::mx.symbol.bind(symbol = loss, ctx = mx.ctx.default(), arg.arrays = list(data = x, - fc1_weight = w1, label = y), aux.arrays = NULL, grad.reqs = c("null", "write", - "null")) - - optimizer <- mx.opt.create("adadelta", rho = 0.9, epsilon = 1e-05, wd = 0, rescale.grad = 1, - clip_gradient = -1) - - updaters <- mx.opt.get.updater(optimizer, exec$ref.arg.arrays, ctx = mx.ctx.default()) - - mx.exec.forward(exec, is.train = T) - mx.exec.backward(exec) - - arg.blocks <- updaters(exec$ref.arg.arrays, exec$ref.grad.arrays) - mx.exec.update.arg.arrays(exec, arg.blocks, skip.null = TRUE) - - expect_equal(as.array(arg.blocks[[2]]), array(c(1.11, 1.81), dim = c(2, 1)), - tolerance = 0.1) - -}) - - -test_that("nag_no_momentum", { - data <- mx.symbol.Variable("data") - label <- mx.symbol.Variable("label") - fc_weight <- mx.symbol.Variable("fc_weight") - fc <- mx.symbol.FullyConnected(data = data, weight = fc_weight, no.bias = T, - name = "fc1", num_hidden = 1) - loss <- mx.symbol.LinearRegressionOutput(data = fc, label = label, name = "loss") - - x <- mx.nd.array(array(1:6, dim = 2:3)) - y <- mx.nd.array(c(5, 11, 16)) - w1 <- mx.nd.array(array(c(1.1, 1.8), dim = c(2, 1))) - - exec <- mxnet:::mx.symbol.bind(symbol = loss, ctx = mx.ctx.default(), arg.arrays = list(data = x, - fc1_weight = w1, label = y), aux.arrays = NULL, grad.reqs = c("null", "write", "null")) - - optimizer <- mx.opt.create("nag", learning.rate = 1, momentum = 0, wd = 0, rescale.grad = 1, - clip_gradient = -1) - - updaters <- mx.opt.get.updater(optimizer, exec$ref.arg.arrays, ctx = mx.ctx.default()) - - mx.exec.forward(exec, is.train = T) - mx.exec.backward(exec) - - arg.blocks <- updaters(exec$ref.arg.arrays, exec$ref.grad.arrays) - mx.exec.update.arg.arrays(exec, arg.blocks, skip.null = TRUE) - - expect_equal(as.array(arg.blocks[[2]]), array(c(1.4, 2.6), dim = c(2, 1)), tolerance = 0.05) -}) - - -test_that("nag_momentum", { - data <- mx.symbol.Variable("data") - label <- mx.symbol.Variable("label") - fc_weight <- mx.symbol.Variable("fc_weight") - fc <- mx.symbol.FullyConnected(data = data, weight = fc_weight, no.bias = T, - name = "fc1", num_hidden = 1) - loss <- mx.symbol.LinearRegressionOutput(data = fc, label = label, name = "loss") - - x <- mx.nd.array(array(1:6, dim = 2:3)) - y <- mx.nd.array(c(5, 11, 16)) - w1 <- mx.nd.array(array(c(1.1, 1.8), dim = c(2, 1))) - - exec <- mxnet:::mx.symbol.bind(symbol = loss, ctx = mx.ctx.default(), arg.arrays = list(data = x, - fc1_weight = w1, label = y), aux.arrays = NULL, grad.reqs = c("null", "write", "null")) - - optimizer <- mx.opt.create("nag", learning.rate = 1, momentum = 0.1, wd = 0, rescale.grad = 1, - clip_gradient = 5) - - updaters <- mx.opt.get.updater(optimizer, exec$ref.arg.arrays, ctx = mx.ctx.default()) - - mx.exec.forward(exec, is.train = T) - mx.exec.backward(exec) - - arg.blocks <- updaters(exec$ref.arg.arrays, exec$ref.grad.arrays) - mx.exec.update.arg.arrays(exec, arg.blocks, skip.null = TRUE) - - expect_equal(as.array(arg.blocks[[2]]), array(c(1.45, 2.65), dim = c(2, 1)), tolerance = 0.1) -}) diff --git a/R-package/tests/testthat/test_symbol.R b/R-package/tests/testthat/test_symbol.R index c93118a7db1f..acad98ac7b1f 100644 --- a/R-package/tests/testthat/test_symbol.R +++ b/R-package/tests/testthat/test_symbol.R @@ -91,17 +91,6 @@ test_that("symbol infer type", { expect_equal(ret, NULL) }) -test_that("symbol save/load", { - data <- mx.symbol.Variable("data") - fc1 <- mx.symbol.FullyConnected(data, num_hidden = 1) - lro <- mx.symbol.LinearRegressionOutput(fc1) - mx.symbol.save(lro, "tmp_r_sym.json") - data2 <- mx.symbol.load("tmp_r_sym.json") - - expect_equal(data2$as.json(), lro$as.json()) - file.remove("tmp_r_sym.json") -}) - test_that("symbol attributes access", { str <- "(1, 1, 1, 1)" x <- mx.symbol.Variable("x") diff --git a/R-package/vignettes/CallbackFunction.Rmd b/R-package/vignettes/CallbackFunction.Rmd deleted file mode 100644 index 12b7e28247e9..000000000000 --- a/R-package/vignettes/CallbackFunction.Rmd +++ /dev/null @@ -1,160 +0,0 @@ -# Customized callback function - -This vignette gives users a guideline for using and writing callback functions, -which can be very useful in model training. - -## Model training example - -Let's begin from a small example. We can build and train a model using the following code: - -```{r} -library(mxnet) -data(BostonHousing, package="mlbench") -train.ind = seq(1, 506, 3) -train.x = data.matrix(BostonHousing[train.ind, -14]) -train.y = BostonHousing[train.ind, 14] -test.x = data.matrix(BostonHousing[-train.ind, -14]) -test.y = BostonHousing[-train.ind, 14] -data <- mx.symbol.Variable("data") -fc1 <- mx.symbol.FullyConnected(data, num_hidden=1) -lro <- mx.symbol.LinearRegressionOutput(fc1) -mx.set.seed(0) -model <- mx.model.FeedForward.create( - lro, X=train.x, y=train.y, - eval.data=list(data=test.x, label=test.y), - ctx=mx.cpu(), num.round=10, array.batch.size=20, - learning.rate=2e-6, momentum=0.9, eval.metric=mx.metric.rmse) -``` - -Besides, we provide two optional parameters, `batch.end.callback` and `epoch.end.callback`, which can provide great flexibility in model training. - -## How to use callback functions - - -Two callback functions are provided in this package: - -- `mx.callback.save.checkpoint` is used to save checkpoint to files each period iteration. - -```{r} -model <- mx.model.FeedForward.create( - lro, X=train.x, y=train.y, - eval.data=list(data=test.x, label=test.y), - ctx=mx.cpu(), num.round=10, array.batch.size=20, - learning.rate=2e-6, momentum=0.9, eval.metric=mx.metric.rmse, - epoch.end.callback = mx.callback.save.checkpoint("boston")) -list.files(pattern = "^boston") -``` - - -- `mx.callback.log.train.metric` is used to log training metric each period. -You can use it either as a `batch.end.callback` or a `epoch.end.callback`. - -```{r} -model <- mx.model.FeedForward.create( - lro, X=train.x, y=train.y, - eval.data=list(data=test.x, label=test.y), - ctx=mx.cpu(), num.round=10, array.batch.size=20, - learning.rate=2e-6, momentum=0.9, eval.metric=mx.metric.rmse, - batch.end.callback = mx.callback.log.train.metric(5)) -``` - -You can also save the training and evaluation errors for later usage by passing a reference class. - -```{r} -logger <- mx.metric.logger$new() -model <- mx.model.FeedForward.create( - lro, X=train.x, y=train.y, - eval.data=list(data=test.x, label=test.y), - ctx=mx.cpu(), num.round=10, array.batch.size=20, - learning.rate=2e-6, momentum=0.9, eval.metric=mx.metric.rmse, - epoch.end.callback = mx.callback.log.train.metric(5, logger)) -head(logger$train) -head(logger$eval) -``` - -## How to write your own callback functions - - -You can find the source code for two callback functions from [here](https://github.com/dmlc/mxnet/blob/master/R-package/R/callback.R) and they can be used as your template: - -Basically, all callback functions follow the structure below: - -```{r, eval=FALSE} -mx.callback.fun <- function() { - function(iteration, nbatch, env, verbose) { - } -} -``` - -The `mx.callback.save.checkpoint` function below is stateless. It just get the model from environment and save it. - -```{r, eval=FALSE} -mx.callback.save.checkpoint <- function(prefix, period=1) { - function(iteration, nbatch, env, verbose=TRUE) { - if (iteration %% period == 0) { - mx.model.save(env$model, prefix, iteration) - if(verbose) message(sprintf("Model checkpoint saved to %s-%04d.params\n", prefix, iteration)) - } - return(TRUE) - } -} -``` - -The `mx.callback.log.train.metric` is a little more complex. It holds a reference class and update it during the training process. - -```{r, eval=FALSE} -mx.callback.log.train.metric <- function(period, logger=NULL) { - function(iteration, nbatch, env, verbose=TRUE) { - if (nbatch %% period == 0 && !is.null(env$metric)) { - result <- env$metric$get(env$train.metric) - if (nbatch != 0 & verbose) - message(paste0("Batch [", nbatch, "] Train-", result$name, "=", result$value)) - if (!is.null(logger)) { - if (class(logger) != "mx.metric.logger") { - stop("Invalid mx.metric.logger.") - } - logger$train <- c(logger$train, result$value) - if (!is.null(env$eval.metric)) { - result <- env$metric$get(env$eval.metric) - if (nbatch != 0 & verbose) - message(paste0("Batch [", nbatch, "] Validation-", result$name, "=", result$value)) - logger$eval <- c(logger$eval, result$value) - } - } - } - return(TRUE) - } -} -``` - -Now you might be curious why both callback functions `return(TRUE)`. -Can we `return(FALSE)`? - -Yes! You can stop the training early by `return(FALSE)`. See the examples below. - -```{r} -mx.callback.early.stop <- function(eval.metric) { - function(iteration, nbatch, env, verbose) { - if (!is.null(env$metric)) { - if (!is.null(eval.metric)) { - result <- env$metric$get(env$eval.metric) - if (result$value < eval.metric) { - return(FALSE) - } - } - } - return(TRUE) - } -} -model <- mx.model.FeedForward.create( - lro, X=train.x, y=train.y, - eval.data=list(data=test.x, label=test.y), - ctx=mx.cpu(), num.round=10, array.batch.size=20, - learning.rate=2e-6, momentum=0.9, eval.metric=mx.metric.rmse, - epoch.end.callback = mx.callback.early.stop(10)) -``` - -You can see once the validation metric goes below the threshold we set, the training process will stop early. - - - diff --git a/R-package/vignettes/CatsDogsFinetune.Rmd b/R-package/vignettes/CatsDogsFinetune.Rmd deleted file mode 100644 index 726bb1a43c77..000000000000 --- a/R-package/vignettes/CatsDogsFinetune.Rmd +++ /dev/null @@ -1,272 +0,0 @@ -# Dogs vs. Cats classification with mxnet and R - -## Packages and prerequisites - -In this tutorial, we mainly use the following three packages: - -* `mxnet`: model training -* `imager`: image processing -* `abind`: manipulations with arrays. - -It is an end-to-end R solution for the dogs vs cats Kaggle competition (https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/) -and it can be used as an example for fine-tuning. -All the code has been test on Ubuntu 16.04. - -```{r, echo=FALSE} -knitr::opts_chunk$set(eval = FALSE) -``` - - -```{r} -library(imager) -library(mxnet) -library(abind) -``` - - -## Image processing - -### Renaming train files - -```{r} -files <- list.files("./train/") -old_names <- sapply(files, strsplit, split = ".", fixed = TRUE) -max_length <- max(sapply(old_names, function(x) nchar(x[[2]]))) -zeros <- max_length - sapply(old_names, function(x) nchar(x[[2]])) -zeros <- sapply(zeros, function(x) paste(rep(0, x), collapse = "")) -new_names <- Map(function(x, y) {paste0("./train/", x[1], "/", y, x[2], ".jpg")}, - x = old_names, y = zeros) - -# Full names -files <- paste0("./train/", files) - -dir.create("./train/cat") -dir.create("./train/dog") - -# New names will be in 00001.jpg format -Map(function(x, y) file.rename(from = x, to = y), files, new_names) -``` - -### Training images: 224x224, padded with empty space - -```{r} -files <- list.files("./train/", recursive = TRUE) -new_names <- paste0("./train_pad_224x224/", files) -files <- paste0("./train/", files) -dir.create("./train_pad_224x224/") -dir.create("./train_pad_224x224/cat") -dir.create("./train_pad_224x224/dog") - -padImage <- function(x) { - long_side <- max(dim(x)[1:2]) - short_side <- min(dim(x)[1:2]) - pad_img <- pad(x, - nPix = long_side - short_side, - axes = ifelse(dim(x)[1] < dim(x)[2], "x", "y")) - return(pad_img) -} - -Map(function(x, y) { - pad_img <- padImage(load.image(x)) - res_img <- resize(pad_img, size_x = 224, size_y = 224) - imager::save.image(res_img, y) - }, x = files, y = new_names) -``` - -### Renaming test files - -```{r} -files <- list.files("./test/") -max_length <- max(sapply(files, nchar)) -zeros <- max_length - sapply(files, nchar) -zeros <- sapply(zeros, function(x) paste(rep(0, x), collapse = "")) -newnames <- paste0("./test/", zeros, files) - -files <- paste0("./test/", files) - -Map(function(x, y) file.rename(from = x, to = y), files, newnames) -``` - - -### Test images: 224x224, padded with empty space - -```{r} -files <- list.files("./test/") -new_names <- paste0("./test_pad_224x224/", files) -files <- paste0("./test/", files) -dir.create("./test_pad_224x224/") - -Map(function(x, y) { - pad_img <- padImage(load.image(x)) - res_img <- resize(pad_img, size_x = 224, size_y = 224) - imager::save.image(res_img, y) -}, x = files, y = new_names) -``` - -### Creating .rec files - -```{r} -cat_files <- list.files("train_pad_224x224/cat/", recursive=TRUE) -cat_files <- paste0("cat/", cat_files) - -dog_files <- list.files("train_pad_224x224/dog/", recursive=TRUE) -dog_files <- paste0("dog/", dog_files) - -train_ind <- sample(length(cat_files), length(cat_files) * 0.8) -train_data <- c(1:(length(train_ind) * 2)) -train_data <- cbind(train_data, c(rep(0, length(train_ind)), rep(1, length(train_ind)))) -train_data <- cbind(train_data, c(cat_files[train_ind], dog_files[train_ind])) -train_data <- train_data[sample(nrow(train_data)),] -write.table(train_data, "cats_dogs_train.lst", quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE) -im2rec("cats_dogs_train.lst", "train_pad_224x224/", "cats_dogs_train.rec") - -val_ind <- c(1:length(cat_files))[!c(1:length(cat_files)) %in% train_ind] -val_data <- c(1:(length(val_ind) * 2)) -val_data <- cbind(val_data, c(rep(0, length(val_ind)), rep(1, length(val_ind)))) -val_data <- cbind(val_data, c(cat_files[val_ind], dog_files[val_ind])) -val_data <- val_data[sample(nrow(val_data)),] -write.table(val_data, "cats_dogs_val.lst", quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE) -im2rec("cats_dogs_val.lst", "train_pad_224x224/", "cats_dogs_val.rec") -``` - -## The data iterator - -```{r} -get_iterator <- function(data_shape, train_data, val_data, batch_size = 128) { - train <- mx.io.ImageRecordIter(path.imgrec = train_data, - batch.size = batch_size, - data.shape = data_shape, - rand.crop = TRUE, - rand.mirror = TRUE) - - val <- mx.io.ImageRecordIter(path.imgrec = val_data, - batch.size = batch_size, - data.shape = data_shape, - rand.crop = FALSE, - rand.mirror = FALSE) - - return(list(train = train, val = val)) -} -``` - - -```{r} -data <- get_iterator(data_shape = c(224, 224, 3), - train_data = "cats_dogs_train.rec", - val_data = "cats_dogs_val.rec", - batch_size = 8) -train <- data$train -val <- data$val -``` - - -## Load pretrained model - -Here we use the pretrained model from http://data.mxnet.io/mxnet/data/. -There are 1000 classes in imagenet, -and we need to replace the last fully connected layer with a new layer for 2 classes. - - -```{r} -download.file('http://data.mxnet.io/mxnet/data/Inception.zip', destfile = 'Inception.zip') -unzip("Inception.zip") -inception_bn <- mx.model.load("./Inception-BN", iteration = 126) - -symbol <- inception_bn$symbol -# check symbol$arguments for layer names -internals <- symbol$get.internals() -outputs <- internals$outputs - -flatten <- internals$get.output(which(outputs == "flatten_output")) - -new_fc <- mx.symbol.FullyConnected(data = flatten, - num_hidden = 2, - name = "fc1") -# set name to original name in symbol$arguments -new_soft <- mx.symbol.SoftmaxOutput(data = new_fc, - name = "softmax") -# set name to original name in symbol$arguments - -arg_params_new <- mx.model.init.params(symbol = new_soft, - input.shape = list("data" = c(224, 224, 3, 8)), - output.shape = NULL, - initializer = mx.init.uniform(0.1), - ctx = mx.cpu())$arg.params -fc1_weights_new <- arg_params_new[["fc1_weight"]] -fc1_bias_new <- arg_params_new[["fc1_bias"]] - -arg_params_new <- inception_bn$arg.params - -arg_params_new[["fc1_weight"]] <- fc1_weights_new -arg_params_new[["fc1_bias"]] <- fc1_bias_new -``` - - -## Fine-tuning - -```{r} -model <- mx.model.FeedForward.create( - symbol = new_soft, - X = train, - eval.data = val, - ctx = mx.gpu(0), - eval.metric = mx.metric.accuracy, - num.round = 2, - learning.rate = 0.05, - momentum = 0.9, - wd = 0.00001, - kvstore = "local", - array.batch.size = 128, - epoch.end.callback = mx.callback.save.checkpoint("inception_bn"), - batch.end.callback = mx.callback.log.train.metric(150), - initializer = mx.init.Xavier(factor_type = "in", magnitude = 2.34), - optimizer = "sgd", - arg.params = arg_params_new, - aux.params = inception_bn$aux.params -) -``` -## Making predictions - -```{r} -preprocImage<- function(src, # URL or file location - height = 224, - width = 224, - num_channels = 3, # 3 for RGB, 1 for grayscale - mult_by = 1, # set to 255 for normalized image - crop = FALSE) { # no crop by default - im <- load.image(src) - - if (crop) { - shape <- dim(im) - short_edge <- min(shape[1:2]) - xx <- floor((shape[1] - short_edge) / 2) - yy <- floor((shape[2] - short_edge) / 2) - im <- crop.borders(im, xx, yy) - } - - resized <- resize(im, size_x = width, size_y = height) - arr <- as.array(resized) * mult_by - dim(arr) <- c(width, height, num_channels, 1) - return(arr) -} -``` - -```{r} -files <- list.files("./test_pad_224x224/") -files <- paste0("./test_pad_224x224/", files) - -files <- split(files, rep(1:1250, each = 10)) -probs <- lapply(files, function(x) { - images <- lapply(x, preprocImage, mult_by = 255) - images <- do.call(abind, images) - probs <- predict(model, X = images, ctx = mx.gpu(0)) -}) -saveRDS(probs, "probs.rds") -probs <- t(do.call(cbind, probs)) - -preds <- data.frame(id = 1:12500, label = probs[, 2]) -write.csv(preds, "subm.csv", row.names = FALSE, quote = FALSE) -``` - - - diff --git a/R-package/vignettes/CustomIterator.Rmd b/R-package/vignettes/CustomIterator.Rmd deleted file mode 100644 index b5a6576a5bc6..000000000000 --- a/R-package/vignettes/CustomIterator.Rmd +++ /dev/null @@ -1,207 +0,0 @@ -# Customized iterator - - -This tutorial provides a guideline on how to use and write custom iterators, which can very useful when having a dataset that does not fit into memory. - -## Getting the data - -The data we are going to use is the [MNIST dataset](http://yann.lecun.com/exdb/mnist/) in CSV format, which can be found from [here](https://www.kaggle.com/c/digit-recognizer/data). - -To download the data: - -```{r} -download.file('https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/data/mnist_csv.zip', - destfile = 'mnist_csv.zip') -unzip('mnist_csv.zip', exdir = '.') -``` - -You'll get two files, `mnist_train.csv` that contains 60.000 examples of hand written numbers and `mxnist_test.csv` that contains 10.000 examples. The first element of each line in the CSV is the label, which is a number between 0 and 9. The rest of the line are 784 numbers between 0 and 255, corresponding to the levels of grey of a matrix of 28x28. Therefore, each line contains an image of 28x28 pixels of a hand written number and its true label. - -## Custom CSV Iterator - -Next we are going to create a custom CSV Iterator based on the [C++ CSVIterator class](https://github.com/dmlc/mxnet/blob/master/src/io/iter_csv.cc). - -For that we are going to use the R function `mx.io.CSVIter` as a base class. This class has as parameters `data.csv, data.shape, batch.size` and two main functions, `iter.next()` that calls the iterator in the next batch of data and `value()` that returns the train data and the label. - -The R Custom Iterator needs to inherit from the C++ data iterator class, for that we used the class `Rcpp_MXArrayDataIter` extracted with RCPP. Also, it needs to have the same parameters: `data.csv, data.shape, batch.size`. Apart from that, we can also add the field `iter`, which is the CSV Iterator that we are going to expand. - -```{r, eval=FALSE} -CustomCSVIter <- setRefClass("CustomCSVIter", - fields=c("iter", "data.csv", "data.shape", "batch.size"), - contains = "Rcpp_MXArrayDataIter", - #... - ) -``` - -The next step is to initialize the class. For that we call the base `mx.io.CSVIter` and fill the rest of the fields. - -```{r, eval=FALSE} -CustomCSVIter <- setRefClass("CustomCSVIter", - fields=c("iter", "data.csv", "data.shape", "batch.size"), - contains = "Rcpp_MXArrayDataIter", - methods=list( - initialize=function(iter, data.csv, data.shape, batch.size){ - feature_len <- data.shape*data.shape + 1 - csv_iter <- mx.io.CSVIter(data.csv=data.csv, data.shape=c(feature_len), batch.size=batch.size) - .self$iter <- csv_iter - .self$data.csv <- data.csv - .self$data.shape <- data.shape - .self$batch.size <- batch.size - .self - }, - #... - ) - ) -``` - -So far there is no difference between the original class and the custom class. Let's implement the function `value()`. In this case what we are going to do is transform the data that comes from the original class as an array of 785 numbers into a matrix of 28x28 and a label. We will also normalize the training data to be between 0 and 1. - -```{r, eval=FALSE} -CustomCSVIter <- setRefClass("CustomCSVIter", - fields=c("iter", "data.csv", "data.shape", "batch.size"), - contains = "Rcpp_MXArrayDataIter", - methods=list( - initialize=function(iter, data.csv, data.shape, batch.size){ - feature_len <- data.shape*data.shape + 1 - csv_iter <- mx.io.CSVIter(data.csv=data.csv, data.shape=c(feature_len), batch.size=batch.size) - .self$iter <- csv_iter - .self$data.csv <- data.csv - .self$data.shape <- data.shape - .self$batch.size <- batch.size - .self - }, - value=function(){ - val <- as.array(.self$iter$value()$data) - val.x <- val[-1,] - val.y <- val[1,] - val.x <- val.x/255 - dim(val.x) <- c(data.shape, data.shape, 1, ncol(val.x)) - val.x <- mx.nd.array(val.x) - val.y <- mx.nd.array(val.y) - list(data=val.x, label=val.y) - }, - #... - ) - ) -``` -Finally we are going to add the rest of the functions needed for the training to work correctly. The final `CustomCSVIter` looks like this: - -```{r} -CustomCSVIter <- setRefClass("CustomCSVIter", - fields=c("iter", "data.csv", "data.shape", "batch.size"), - contains = "Rcpp_MXArrayDataIter", - methods=list( - initialize=function(iter, data.csv, data.shape, batch.size){ - feature_len <- data.shape*data.shape + 1 - csv_iter <- mx.io.CSVIter(data.csv=data.csv, data.shape=c(feature_len), batch.size=batch.size) - .self$iter <- csv_iter - .self$data.csv <- data.csv - .self$data.shape <- data.shape - .self$batch.size <- batch.size - .self - }, - value=function(){ - val <- as.array(.self$iter$value()$data) - val.x <- val[-1,] - val.y <- val[1,] - val.x <- val.x/255 - dim(val.x) <- c(data.shape, data.shape, 1, ncol(val.x)) - val.x <- mx.nd.array(val.x) - val.y <- mx.nd.array(val.y) - list(data=val.x, label=val.y) - }, - iter.next=function(){ - .self$iter$iter.next() - }, - reset=function(){ - .self$iter$reset() - }, - num.pad=function(){ - .self$iter$num.pad() - }, - finalize=function(){ - .self$iter$finalize() - } - ) - ) -``` - -To call the class we can just do: - -```{r} -batch.size <- 100 -train.iter <- CustomCSVIter$new(iter = NULL, data.csv = "mnist_train.csv", data.shape = 28, batch.size = batch.size) -``` - -## CNN Model - - -For this tutorial we are going to use the known LeNet architecture: - -```{r} -library(mxnet) -lenet.model <- function(){ - data <- mx.symbol.Variable('data') - conv1 <- mx.symbol.Convolution(data=data, kernel=c(5,5), num_filter=20) #first conv - tanh1 <- mx.symbol.Activation(data=conv1, act_type="tanh") - pool1 <- mx.symbol.Pooling(data=tanh1, pool_type="max", kernel=c(2,2), stride=c(2,2)) - conv2 <- mx.symbol.Convolution(data=pool1, kernel=c(5,5), num_filter=50)# second conv - tanh2 <- mx.symbol.Activation(data=conv2, act_type="tanh") - pool2 <- mx.symbol.Pooling(data=tanh2, pool_type="max", kernel=c(2,2), stride=c(2,2)) - flatten <- mx.symbol.Flatten(data=pool2) - fc1 <- mx.symbol.FullyConnected(data=flatten, num_hidden=100) # first fullc - tanh3 <- mx.symbol.Activation(data=fc1, act_type="tanh") - fc2 <- mx.symbol.FullyConnected(data=tanh3, num_hidden=10) # second fullc - network <- mx.symbol.SoftmaxOutput(data=fc2) # loss - network -} -network <- lenet.model() -``` - -## Training with the Custom Iterator - -Finally, we can directly add the custom iterator as the training data source. - -```{r, eval=FALSE} -model <- mx.model.FeedForward.create(symbol=network, - X=train.iter, - ctx=mx.gpu(0), - num.round=10, - array.batch.size=batch.size, - learning.rate=0.1, - momentum=0.9, - eval.metric=mx.metric.accuracy, - wd=0.00001, - batch.end.callback=mx.callback.log.speedometer(batch.size, frequency = 100) - ) -``` - -The last 2 iterations with a K80 GPU looks like this: - -``` -## [8] Train-accuracy=0.998866666666667 -## Batch [100] Speed: 15413.0104454713 samples/sec Train-accuracy=0.999 -## Batch [200] Speed: 16629.3412459049 samples/sec Train-accuracy=0.99935 -## Batch [300] Speed: 18412.6900509319 samples/sec Train-accuracy=0.9995 -## Batch [400] Speed: 16757.2882328335 samples/sec Train-accuracy=0.999425 -## Batch [500] Speed: 17116.6529207406 samples/sec Train-accuracy=0.99946 -## Batch [600] Speed: 19627.589505195 samples/sec Train-accuracy=0.99945 -## [9] Train-accuracy=0.9991 -## Batch [100] Speed: 18971.5745536982 samples/sec Train-accuracy=0.9992 -## Batch [200] Speed: 15554.8822435383 samples/sec Train-accuracy=0.99955 -## Batch [300] Speed: 18327.6950115053 samples/sec Train-accuracy=0.9997 -## Batch [400] Speed: 17103.0705411788 samples/sec Train-accuracy=0.9997 -## Batch [500] Speed: 15104.8656902394 samples/sec Train-accuracy=0.99974 -## Batch [600] Speed: 13818.7899518255 samples/sec Train-accuracy=0.99975 -## [10] Train-accuracy=0.99975 -``` - -## Conclusion - - -We have shown how to create a custom CSV Iterator by extending the class `mx.io.CSVIter`. In our class, we iteratively read from a CSV file a batch of data that will be transformed and then processed in the stochastic gradient descent optimization. That way, we are able to manage CSV files that are bigger than the memory of the machine we are using. - -Based of this custom iterator, we can also create data loaders that internally transform or expand the data, allowing to manage files of any size. - - - diff --git a/R-package/vignettes/CustomLossFunction.Rmd b/R-package/vignettes/CustomLossFunction.Rmd deleted file mode 100644 index 85e882567f8e..000000000000 --- a/R-package/vignettes/CustomLossFunction.Rmd +++ /dev/null @@ -1,151 +0,0 @@ -# Customized loss function - -This tutorial provides guidelines for using customized loss function in network construction. - -## Model Training Example - -Let's begin with a small regression example. We can build and train a regression model with the following code: - -```{r} -data(BostonHousing, package = "mlbench") -BostonHousing[, sapply(BostonHousing, is.factor)] <- - as.numeric(as.character(BostonHousing[, sapply(BostonHousing, is.factor)])) -BostonHousing <- data.frame(scale(BostonHousing)) - -test.ind = seq(1, 506, 5) # 1 pt in 5 used for testing -train.x = data.matrix(BostonHousing[-test.ind,-14]) -train.y = BostonHousing[-test.ind, 14] -test.x = data.matrix(BostonHousing[--test.ind,-14]) -test.y = BostonHousing[--test.ind, 14] - -require(mxnet) - -data <- mx.symbol.Variable("data") -label <- mx.symbol.Variable("label") -fc1 <- mx.symbol.FullyConnected(data, num_hidden = 14, name = "fc1") -tanh1 <- mx.symbol.Activation(fc1, act_type = "tanh", name = "tanh1") -fc2 <- mx.symbol.FullyConnected(tanh1, num_hidden = 1, name = "fc2") -lro <- mx.symbol.LinearRegressionOutput(fc2, name = "lro") - -mx.set.seed(0) -model <- mx.model.FeedForward.create(lro, X = train.x, y = train.y, - ctx = mx.cpu(), - num.round = 5, - array.batch.size = 60, - optimizer = "rmsprop", - verbose = TRUE, - array.layout = "rowmajor", - batch.end.callback = NULL, - epoch.end.callback = NULL) - -pred <- predict(model, test.x) -sum((test.y - pred[1,])^2) / length(test.y) -``` - -Besides the `LinearRegressionOutput`, we also provide `LogisticRegressionOutput` and `MAERegressionOutput`. -However, this might not be enough for real-world models. You can provide your own loss function -by using `mx.symbol.MakeLoss` when constructing the network. - -## How to Use Your Own Loss Function - -We still use our previous example, but this time we use `mx.symbol.MakeLoss` to minimize the `(pred-label)^2` - -```{r} -data <- mx.symbol.Variable("data") -label <- mx.symbol.Variable("label") -fc1 <- mx.symbol.FullyConnected(data, num_hidden = 14, name = "fc1") -tanh1 <- mx.symbol.Activation(fc1, act_type = "tanh", name = "tanh1") -fc2 <- mx.symbol.FullyConnected(tanh1, num_hidden = 1, name = "fc2") -lro2 <- mx.symbol.MakeLoss(mx.symbol.square(mx.symbol.Reshape(fc2, shape = 0) - label), name="lro2") -``` - -Then we can train the network just as usual. - -```{r} -mx.set.seed(0) -model2 <- mx.model.FeedForward.create(lro2, X = train.x, y = train.y, - ctx = mx.cpu(), - num.round = 5, - array.batch.size = 60, - optimizer = "rmsprop", - verbose = TRUE, - array.layout = "rowmajor", - batch.end.callback = NULL, - epoch.end.callback = NULL) -``` - -We should get very similar results because we are actually minimizing the same loss function. -However, the result is quite different. - -```{r} -pred2 <- predict(model2, test.x) -sum((test.y - pred2)^2) / length(test.y) -``` - -This is because output of `mx.symbol.MakeLoss` is the gradient of loss with respect to the input data. -We can get the real prediction as below. - -```{r} -internals = internals(model2$symbol) -fc_symbol = internals[[match("fc2_output", outputs(internals))]] - -model3 <- list(symbol = fc_symbol, - arg.params = model2$arg.params, - aux.params = model2$aux.params) - -class(model3) <- "MXFeedForwardModel" - -pred3 <- predict(model3, test.x) -sum((test.y - pred3[1,])^2) / length(test.y) -``` - -We have provided many operations on the symbols. An example of `|pred-label|` can be found below. - -```{r} -lro_abs <- mx.symbol.MakeLoss(mx.symbol.abs(mx.symbol.Reshape(fc2, shape = 0) - label)) -mx.set.seed(0) -model4 <- mx.model.FeedForward.create(lro_abs, X = train.x, y = train.y, - ctx = mx.cpu(), - num.round = 20, - array.batch.size = 60, - optimizer = "sgd", - learning.rate = 0.001, - verbose = TRUE, - array.layout = "rowmajor", - batch.end.callback = NULL, - epoch.end.callback = NULL) - -internals = internals(model4$symbol) -fc_symbol = internals[[match("fc2_output", outputs(internals))]] - -model5 <- list(symbol = fc_symbol, - arg.params = model4$arg.params, - aux.params = model4$aux.params) - -class(model5) <- "MXFeedForwardModel" - -pred5 <- predict(model5, test.x) -sum(abs(test.y - pred5[1,])) / length(test.y) -``` - - -```{r} -lro_mae <- mx.symbol.MAERegressionOutput(fc2, name = "lro") -mx.set.seed(0) -model6 <- mx.model.FeedForward.create(lro_mae, X = train.x, y = train.y, - ctx = mx.cpu(), - num.round = 20, - array.batch.size = 60, - optimizer = "sgd", - learning.rate = 0.001, - verbose = TRUE, - array.layout = "rowmajor", - batch.end.callback = NULL, - epoch.end.callback = NULL) -pred6 <- predict(model6, test.x) -sum(abs(test.y - pred6[1,])) / length(test.y) -``` - -We got the same result as expected. - - diff --git a/R-package/vignettes/fiveMinutesNeuralNetwork.Rmd b/R-package/vignettes/fiveMinutesNeuralNetwork.Rmd deleted file mode 100644 index fb023bb5435f..000000000000 --- a/R-package/vignettes/fiveMinutesNeuralNetwork.Rmd +++ /dev/null @@ -1,173 +0,0 @@ -# Neural Network with MXNet in Five Minutes - -This is the first tutorial for new users of the R package `mxnet`. You will learn to construct a neural network to do regression in 5 minutes. - -We will show you how to do classification and regression tasks respectively. The data we use comes from the package `mlbench`. - -## Classification - -First of all, let us load in the data and preprocess it: - -```{r} -require(mlbench) -require(mxnet) - -data(Sonar, package = "mlbench") - -Sonar[,61] <- as.numeric(Sonar[,61])-1 -train.ind <- c(1:50, 100:150) -train.x <- data.matrix(Sonar[train.ind, 1:60]) -train.y <- Sonar[train.ind, 61] -test.x <- data.matrix(Sonar[-train.ind, 1:60]) -test.y <- Sonar[-train.ind, 61] -``` - -Next we are going to use a multi-layer perceptron (MLP) as our classifier. -In `mxnet`, we have a function called `mx.mlp` so that users can build a general multi-layer neural network to do classification (`out_activation="softmax"`) or regression (`out_activation="rmse"`). -Note for the `softmax` activation, the output is zero-indexed not one-indexed. In the data we use: - -```{r} -table(train.y) -table(test.y) -``` - -There are several parameters we have to feed to `mx.mlp`: - -- Training data and label. -- Number of hidden nodes in each hidden layers. -- Number of nodes in the output layer. -- Type of the activation. -- Type of the output loss. -- The device to train `mx.gpu()` for GPU or `mx.cpu()` for CPU. -- Other parameters for `mx.model.FeedForward.create`. - -The following code piece is showing a possible usage of `mx.mlp`: - -```{r} -mx.set.seed(0) -model <- mx.mlp(train.x, train.y, hidden_node=10, out_node=2, out_activation="softmax", - num.round=20, array.batch.size=15, learning.rate=0.07, momentum=0.9, - eval.metric=mx.metric.accuracy) -``` - -Note that `mx.set.seed` is the correct function to control the random process in `mxnet`. You can see the accuracy in each round during training. It is also easy to make prediction and evaluate. - -To get an idea of what is happening, we can easily view the computation graph from R. - -```{r} -graph.viz(model$symbol) -``` - -```{r} -preds <- predict(model, test.x) -pred.label <- max.col(t(preds)) - 1 -table(pred.label, test.y) -``` - -Note for multi-class prediction, mxnet outputs `nclass` x `nexamples`, each each row corresponding to probability of that class. - -## Regression - -Again, let us preprocess the data first. - -```{r} -data(BostonHousing, package="mlbench") - -train.ind <- seq(1, 506, 3) -train.x <- data.matrix(BostonHousing[train.ind, -14]) -train.y <- BostonHousing[train.ind, 14] -test.x <- data.matrix(BostonHousing[-train.ind, -14]) -test.y <- BostonHousing[-train.ind, 14] -``` - -Although we can use `mx.mlp` again to do regression by changing the `out_activation`, this time we are going to introduce a flexible way to configure neural networks in `mxnet`. The configuration is done by the "Symbol" system in `mxnet`, which takes care of the links among nodes, the activation, dropout ratio, etc. To configure a multi-layer neural network, we can do it in the following way: - -```{r} -# Define the input data -data <- mx.symbol.Variable("data") -# A fully connected hidden layer -# data: input source -# num_hidden: number of neurons in this hidden layer -fc1 <- mx.symbol.FullyConnected(data, num_hidden=1) - -# Use linear regression for the output layer -lro <- mx.symbol.LinearRegressionOutput(fc1) -``` - -What matters for a regression task is mainly the last function, this enables the new network to optimize for squared loss. We can now train on this simple data set. In this configuration, we dropped the hidden layer so the input layer is directly connected to the output layer. - -next we can make prediction with this structure and other parameters with `mx.model.FeedForward.create`: - -```{r} -mx.set.seed(0) -model <- mx.model.FeedForward.create(lro, X=train.x, y=train.y, - ctx=mx.cpu(), num.round=50, array.batch.size=20, - learning.rate=2e-6, momentum=0.9, eval.metric=mx.metric.rmse) -``` - -It is also easy to make prediction and evaluate - -```{r} -preds <- predict(model, test.x) -sqrt(mean((preds-test.y)^2)) -``` - -Currently we have four pre-defined metrics "accuracy", "rmse", "mae" and "rmsle". One might wonder how to customize the evaluation metric. `mxnet` provides the interface for users to define their own metric of interests: - -```{r} -demo.metric.mae <- mx.metric.custom("mae", function(label, pred) { - res <- mean(abs(label-pred)) - return(res) -}) -``` - -This is an example for mean absolute error. We can simply plug it in the training function: - -```{r} -mx.set.seed(0) -model <- mx.model.FeedForward.create(lro, X=train.x, y=train.y, - ctx=mx.cpu(), num.round=50, array.batch.size=20, - learning.rate=2e-6, momentum=0.9, eval.metric=demo.metric.mae) -``` - -In the previous example, our target is to predict the last column ("medv") in the dataset. -It is also possible to build a regression model with multiple outputs. -This time we use the last two columns as the targets: - -```{r} -train.x <- data.matrix(BostonHousing[train.ind, -(13:14)]) -train.y <- BostonHousing[train.ind, c(13:14)] -test.x <- data.matrix(BostonHousing[-train.ind, -(13:14)]) -test.y <- BostonHousing[-train.ind, c(13:14)] -``` - -and build a similar network symbol: - -```{r} -data <- mx.symbol.Variable("data") -fc2 <- mx.symbol.FullyConnected(data, num_hidden=2) -lro2 <- mx.symbol.LinearRegressionOutput(fc2) -``` - -We use `mx.io.arrayiter` to build an iter for our training set and train the model using `mx.model.FeedForward.create`: - -```{r} -mx.set.seed(0) -train_iter = mx.io.arrayiter(data = t(train.x), label = t(train.y)) - -model <- mx.model.FeedForward.create(lro2, X=train_iter, - ctx=mx.cpu(), num.round=50, array.batch.size=20, - learning.rate=2e-6, momentum=0.9) -``` - -After training, we can see that the dimension of the prediction is the same with our target. - -```{r} -preds <- t(predict(model, test.x)) -dim(preds) -dim(test.y) -``` -Congratulations! Now you have learnt the basic for using `mxnet`. Please check the other tutorials for advanced features. - - - diff --git a/R-package/vignettes/mnistCompetition.Rmd b/R-package/vignettes/mnistCompetition.Rmd deleted file mode 100644 index 055f1ae51d7e..000000000000 --- a/R-package/vignettes/mnistCompetition.Rmd +++ /dev/null @@ -1,246 +0,0 @@ -# Handwritten Digits Classification Competition - -[MNIST](http://yann.lecun.com/exdb/mnist/) is a handwritten digits image data set created by Yann LeCun. Every digit is represented by a 28x28 image. It has become a standard data set to test classifiers on simple image input. Neural network is no doubt a strong model for image classification tasks. There's a [long-term hosted competition](https://www.kaggle.com/c/digit-recognizer) on Kaggle using this data set. -We will present the basic usage of [mxnet](https://github.com/dmlc/mxnet/tree/master/R-package) to compete in this challenge. - -## Data Loading - -First, let us download the data from [here](https://www.kaggle.com/c/digit-recognizer/data), and put them under the `data/` folder in your working directory. - -Then we can read them in R and convert to matrices. - -```{r, echo=FALSE} -download.file('https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/data/mnist_csv.zip', destfile = 'mnist_csv.zip') -unzip('mnist_csv.zip', exdir = '.') -``` - - -```{r} -require(mxnet) -train <- read.csv("train.csv", header=TRUE) -test <- read.csv("test.csv", header=TRUE) -train <- data.matrix(train) -test <- data.matrix(test) - -train.x <- train[,-1] -train.y <- train[,1] -``` - -Besides using the csv files from kaggle, you can also read the orginal MNIST dataset into R. - -```{r, eval=FALSE} -load_image_file <- function(filename) { - f = file(filename, 'rb') - readBin(f, 'integer', n = 1, size = 4, endian = 'big') - n = readBin(f,'integer', n = 1, size = 4, endian = 'big') - nrow = readBin(f,'integer', n = 1, size = 4, endian = 'big') - ncol = readBin(f,'integer', n = 1, size = 4, endian = 'big') - x = readBin(f, 'integer', n = n * nrow * ncol, size = 1, signed = F) - x = matrix(x, ncol = nrow * ncol, byrow = T) - close(f) - x -} - -load_label_file <- function(filename) { - f = file(filename, 'rb') - readBin(f,'integer', n = 1, size = 4, endian = 'big') - n = readBin(f,'integer', n = 1, size = 4, endian = 'big') - y = readBin(f,'integer', n = n, size = 1, signed = F) - close(f) - y -} - -train.x <- load_image_file('mnist/train-images-idx3-ubyte') -test.y <- load_image_file('mnist/t10k-images-idx3-ubyte') - -train.y <- load_label_file('mnist/train-labels-idx1-ubyte') -test.y <- load_label_file('mnist/t10k-labels-idx1-ubyte') -``` - -Here every image is represented as a single row in train/test. The greyscale of each image falls in the range [0, 255], we can linearly transform it into [0,1] by - -```{r} -train.x <- t(train.x/255) -test <- t(test/255) -``` -We also transpose the input matrix to npixel x nexamples, which is the column major format accepted by mxnet (and the convention of R). - -In the label part, we see the number of each digit is fairly even: - -```{r} -table(train.y) -``` - -## Network Configuration - -Now we have the data. The next step is to configure the structure of our network. - -```{r} -data <- mx.symbol.Variable("data") -fc1 <- mx.symbol.FullyConnected(data, name="fc1", num_hidden=128) -act1 <- mx.symbol.Activation(fc1, name="relu1", act_type="relu") -fc2 <- mx.symbol.FullyConnected(act1, name="fc2", num_hidden=64) -act2 <- mx.symbol.Activation(fc2, name="relu2", act_type="relu") -fc3 <- mx.symbol.FullyConnected(act2, name="fc3", num_hidden=10) -softmax <- mx.symbol.SoftmaxOutput(fc3, name="sm") -``` - -1. In `mxnet`, we use its own data type `symbol` to configure the network. `data <- mx.symbol.Variable("data")` use `data` to represent the input data, i.e. the input layer. -2. Then we set the first hidden layer by `fc1 <- mx.symbol.FullyConnected(data, name="fc1", num_hidden=128)`. This layer has `data` as the input, its name and the number of hidden neurons. -3. The activation is set by `act1 <- mx.symbol.Activation(fc1, name="relu1", act_type="relu")`. The activation function takes the output from the first hidden layer `fc1`. -4. The second hidden layer takes the result from `act1` as the input, with its name as "fc2" and the number of hidden neurons as 64. -5. the second activation is almost the same as `act1`, except we have a different input source and name. -6. Here comes the output layer. Since there's only 10 digits, we set the number of neurons to 10. -7. Finally we set the activation to softmax to get a probabilistic prediction. - -If you are a big fan of the `%>%` operator, you can also define the network as below: - -```{r, eval=FALSE} -library(magrittr) -softmax <- mx.symbol.Variable("data") %>% - mx.symbol.FullyConnected(name = "fc1", num_hidden = 128) %>% - mx.symbol.Activation(name = "relu1", act_type = "relu") %>% - mx.symbol.FullyConnected(name = "fc2", num_hidden = 64) %>% - mx.symbol.Activation(name = "relu2", act_type = "relu") %>% - mx.symbol.FullyConnected(name="fc3", num_hidden=10) %>% - mx.symbol.SoftmaxOutput(name="sm") -``` - -## Training - -We are almost ready for the training process. Before we start the computation, let's decide what device should we use. - -```{r} -devices <- mx.cpu() -``` - -Here we assign CPU to `mxnet`. After all these preparation, you can run the following command to train the neural network! Note that `mx.set.seed` is the correct function to control the random process in `mxnet`. - -```{r} -mx.set.seed(0) -model <- mx.model.FeedForward.create(softmax, X = train.x, y = train.y, - ctx = devices, num.round = 5, - array.batch.size = 100, - learning.rate = 0.07, momentum = 0.9, - eval.metric = mx.metric.accuracy, - initializer = mx.init.uniform(0.07), - batch.end.callback = mx.callback.log.train.metric(100)) -``` - -## Prediction and Submission - -To make prediction, we can simply write - -```{r} -preds <- predict(model, test) -dim(preds) -``` - -It is a matrix with 28000 rows and 10 cols, containing the desired classification probabilities from the output layer. To extract the maximum label for each row, we can use the `max.col` in R: - -```{r} -pred.label <- max.col(t(preds)) - 1 -table(pred.label) -``` - -With a little extra effort in the csv format, we can have our submission to the competition! - -```{r, eval = FALSE} -submission <- data.frame(ImageId=1:ncol(test), Label=pred.label) -write.csv(submission, file='submission.csv', row.names=FALSE, quote=FALSE) -``` - -## LeNet - -Next we are going to introduce a new network structure: [LeNet](http://yann.lecun.com/exdb/lenet/). It is proposed by Yann LeCun to recognize handwritten digits. Now we are going to demonstrate how to construct and train an LeNet in `mxnet`. - - -First we construct the network: - -```{r} -require(mxnet) -# input -data <- mx.symbol.Variable('data') -# first conv -conv1 <- mx.symbol.Convolution(data=data, kernel=c(5,5), num_filter=20) -tanh1 <- mx.symbol.Activation(data=conv1, act_type="tanh") -pool1 <- mx.symbol.Pooling(data=tanh1, pool_type="max", - kernel=c(2,2), stride=c(2,2)) -# second conv -conv2 <- mx.symbol.Convolution(data=pool1, kernel=c(5,5), num_filter=50) -tanh2 <- mx.symbol.Activation(data=conv2, act_type="tanh") -pool2 <- mx.symbol.Pooling(data=tanh2, pool_type="max", - kernel=c(2,2), stride=c(2,2)) -# first fullc -flatten <- mx.symbol.Flatten(data=pool2) -fc1 <- mx.symbol.FullyConnected(data=flatten, num_hidden=500) -tanh3 <- mx.symbol.Activation(data=fc1, act_type="tanh") -# second fullc -fc2 <- mx.symbol.FullyConnected(data=tanh3, num_hidden=10) -# loss -lenet <- mx.symbol.SoftmaxOutput(data=fc2) -``` - -Then let us reshape the matrices into arrays: - -```{r} -train.array <- train.x -dim(train.array) <- c(28, 28, 1, ncol(train.x)) -test.array <- test -dim(test.array) <- c(28, 28, 1, ncol(test)) -``` - -Next we are going to compare the training speed on different devices, so the definition of the devices goes first: - -```{r} -n.gpu <- 1 -device.cpu <- mx.cpu() -device.gpu <- lapply(0:(n.gpu-1), function(i) { - mx.gpu(i) -}) -``` - -As you can see, we can pass a list of devices, to ask mxnet to train on multiple GPUs (you can do similar thing for cpu, -but since internal computation of cpu is already multi-threaded, there is less gain than using GPUs). - -We start by training on CPU first. Because it takes a bit time to do so, we will only run it for one iteration. - -```{r} -mx.set.seed(0) -tic <- proc.time() -model <- mx.model.FeedForward.create(lenet, X = train.array, y = train.y, - ctx = device.cpu, num.round = 1, - array.batch.size = 100, - learning.rate = 0.05, momentum = 0.9, wd = 0.00001, - eval.metric = mx.metric.accuracy, - batch.end.callback = mx.callback.log.train.metric(100)) -print(proc.time() - tic) -``` - -Training on GPU: - -```{r} -mx.set.seed(0) -tic <- proc.time() -model <- mx.model.FeedForward.create(lenet, X = train.array, y = train.y, - ctx = device.gpu, num.round = 5, - array.batch.size = 100, - learning.rate = 0.05, momentum = 0.9, wd = 0.00001, - eval.metric = mx.metric.accuracy, - batch.end.callback = mx.callback.log.train.metric(100)) -print(proc.time() - tic) -``` - -As you can see by using GPU, we can get a much faster speedup in training! -Finally we can submit the result to Kaggle again to see the improvement of our ranking! - -```{r, eval = FALSE} -preds <- predict(model, test.array) -pred.label <- max.col(t(preds)) - 1 -submission <- data.frame(ImageId=1:ncol(test), Label=pred.label) -write.csv(submission, file='submission.csv', row.names=FALSE, quote=FALSE) -``` - -![](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/knitr/mnistCompetition-kaggle-submission.png) - - diff --git a/R-package/vignettes/symbol.Rmd b/R-package/vignettes/symbol.Rmd deleted file mode 100644 index 228c6b26606c..000000000000 --- a/R-package/vignettes/symbol.Rmd +++ /dev/null @@ -1,103 +0,0 @@ -# Symbol and Automatic Differentiation - -The computational unit `NDArray` requires a way to construct neural networks. MXNet provides a symbolic interface, named Symbol, to do this. Symbol combines both flexibility and efficiency. - -## Basic Composition of Symbols - -The following code creates a two-layer perceptron network: - - -```{r} -require(mxnet) -net <- mx.symbol.Variable("data") -net <- mx.symbol.FullyConnected(data=net, name="fc1", num_hidden=128) -net <- mx.symbol.Activation(data=net, name="relu1", act_type="relu") -net <- mx.symbol.FullyConnected(data=net, name="fc2", num_hidden=64) -net <- mx.symbol.Softmax(data=net, name="out") -class(net) -``` - - -Each symbol takes a (unique) string name. *Variable* often defines the inputs, -or free variables. Other symbols take a symbol as the input (*data*), -and may accept other hyper parameters, such as the number of hidden neurons (*num_hidden*) -or the activation type (*act_type*). - -A symbol can be viewed as a function that takes several arguments, whose -names are automatically generated and can be retrieved with the following command: - - -```{r} -arguments(net) -``` - -The arguments are the parameters need by each symbol: - -- *data*: Input data needed by the variable *data* -- *fc1_weight* and *fc1_bias*: The weight and bias for the first fully connected layer, *fc1* -- *fc2_weight* and *fc2_bias*: The weight and bias for the second fully connected layer, *fc2* -- *out_label*: The label needed by the loss - -We can also specify the automatically generated names explicitly: - - -```{r} -data <- mx.symbol.Variable("data") -w <- mx.symbol.Variable("myweight") -net <- mx.symbol.FullyConnected(data=data, weight=w, name="fc1", num_hidden=128) -arguments(net) -``` - -## More Complicated Composition of Symbols - -MXNet provides well-optimized symbols for -commonly used layers in deep learning. You can also define new operators -in Python. The following example first performs an element-wise add between two -symbols, then feeds them to the fully connected operator: - - -```{r} -lhs <- mx.symbol.Variable("data1") -rhs <- mx.symbol.Variable("data2") -net <- mx.symbol.FullyConnected(data=lhs + rhs, name="fc1", num_hidden=128) -arguments(net) -``` - -We can construct a symbol more flexibly than by using the single -forward composition, for example: - -```{r} -net <- mx.symbol.Variable("data") -net <- mx.symbol.FullyConnected(data=net, name="fc1", num_hidden=128) -net2 <- mx.symbol.Variable("data2") -net2 <- mx.symbol.FullyConnected(data=net2, name="net2", num_hidden=128) -composed.net <- mx.apply(net, data=net2, name="compose") -arguments(composed.net) -``` - -In the example, *net* is used as a function to apply to an existing symbol -*net*. The resulting *composed.net* will replace the original argument *data* with -*net2* instead. - -## Training a Neural Net - -The [model API](../../../R-package/R/model.R) is a thin wrapper around the symbolic executors to support neural net training. - -We encourage you to read [Symbolic Configuration and Execution in Pictures for python package](../python/symbol_in_pictures.md)for a detailed explanation of concepts in pictures. - -## How Efficient Is the Symbolic API? - -The Symbolic API brings the efficient C++ -operations in powerful toolkits, such as CXXNet and Caffe, together with the -flexible dynamic NDArray operations. All of the memory and computation resources are -allocated statically during bind operations, to maximize runtime performance and memory -utilization. - -The coarse-grained operators are equivalent to CXXNet layers, which are -extremely efficient. We also provide fine-grained operators for more flexible -composition. Because MXNet does more in-place memory allocation, it can -be more memory efficient than CXXNet and gets to the same runtime with -greater flexibility. - - - \ No newline at end of file diff --git a/amalgamation/mxnet_predict0.cc b/amalgamation/mxnet_predict0.cc index a18e28fd57de..f9bf45adfa19 100644 --- a/amalgamation/mxnet_predict0.cc +++ b/amalgamation/mxnet_predict0.cc @@ -81,7 +81,6 @@ #include "src/operator/leaky_relu.cc" #include "src/operator/nn/pooling.cc" #include "src/operator/nn/softmax_activation.cc" -#include "src/operator/softmax_output.cc" #include "src/operator/tensor/elemwise_binary_broadcast_op_basic.cc" #include "src/operator/tensor/elemwise_binary_op.cc" #include "src/operator/tensor/elemwise_binary_op_basic.cc" diff --git a/benchmark/opperf/nd_operations/nn_basic_operators.py b/benchmark/opperf/nd_operations/nn_basic_operators.py index f3007bac188c..d669b83d653e 100644 --- a/benchmark/opperf/nd_operations/nn_basic_operators.py +++ b/benchmark/opperf/nd_operations/nn_basic_operators.py @@ -29,11 +29,6 @@ 1. FullyConnected 2. Dropout 3. BatchNorm -4. SoftmaxOutput -5. LinearRegressionOutput -6. LogisticRegressionOutput -7. MAERegressionOutput -8. SVMOutput 9. L2Normalization 10. LayerNorm 11. InstanceNorm diff --git a/benchmark/opperf/rules/default_params.py b/benchmark/opperf/rules/default_params.py index 04ec8100b1b4..4e8bb6b6cc6f 100644 --- a/benchmark/opperf/rules/default_params.py +++ b/benchmark/opperf/rules/default_params.py @@ -182,14 +182,7 @@ # L2Normalization DEFAULT_MODE_L2 = ['channel', 'instance', 'spatial'] -# SVMOutput -DEFAULT_LABEL_SVM = [(32, 3, 256), (32, 3, 10000)] - DEFAULT_DATA_SVM_LARGE_TENSOR = [(2**29, 2, 2, 2)] -DEFAULT_LABEL_SVM_LARGE_TENSOR = [(2**29, 2, 2)] - -# SoftmaxOutput -DEFAULT_LABEL_SM = [(32, 3, 256), (32, 3, 10000)] DEFAULT_DATA_SO_LARGE_TENSOR = [(2**29, 2, 2, 2)] DEFAULT_LABEL_SO_LARGE_TENSOR = [(2**29, 2, 2)] @@ -537,16 +530,6 @@ "moving_mean_batchnorm": DEFAULT_MOVING_MEAN, "moving_var_batchnorm": DEFAULT_MOVING_VAR, "axis_batchnorm": DEFAULT_AXIS_BN, - "data_softmaxoutput": DEFAULT_DATA_NN_BASIC, - "label_softmaxoutput": DEFAULT_LABEL_SM, - "data_maeregressionoutput": DEFAULT_DATA_NN_BASIC, - "label_maeregressionoutput": DEFAULT_LABEL_REG, - "data_logisticregressionoutput": DEFAULT_DATA_NN_BASIC, - "label_logisticregressionoutput": DEFAULT_LABEL_REG, - "data_linearregressionoutput": DEFAULT_DATA_NN_BASIC, - "label_linearregressionoutput": DEFAULT_LABEL_REG, - "data_svmoutput": DEFAULT_DATA_NN_BASIC, - "label_svmoutput": DEFAULT_LABEL_SVM, "grad_scale": DEFAULT_GRAD_SCALE, "normalization": DEFAULT_NORMALIZATION, "margin": DEFAULT_MARGIN, @@ -751,16 +734,6 @@ "moving_mean_batchnorm": DEFAULT_MOVING_MEAN_LARGE_TENSOR, "moving_var_batchnorm": DEFAULT_MOVING_VAR_LARGE_TENSOR, "axis_batchnorm": DEFAULT_AXIS_BN, - "data_softmaxoutput": DEFAULT_DATA_SO_LARGE_TENSOR, - "label_softmaxoutput": DEFAULT_LABEL_SO_LARGE_TENSOR, - "data_maeregressionoutput": DEFAULT_DATA_REG_LARGE_TENSOR, - "label_maeregressionoutput": DEFAULT_LABEL_REG_LARGE_TENSOR, - "data_logisticregressionoutput": DEFAULT_DATA_REG_LARGE_TENSOR, - "label_logisticregressionoutput": DEFAULT_LABEL_REG_LARGE_TENSOR, - "data_linearregressionoutput": DEFAULT_DATA_REG_LARGE_TENSOR, - "label_linearregressionoutput": DEFAULT_LABEL_REG_LARGE_TENSOR, - "data_svmoutput": DEFAULT_DATA_SVM_LARGE_TENSOR, - "label_svmoutput": DEFAULT_LABEL_SVM_LARGE_TENSOR, "grad_scale": DEFAULT_GRAD_SCALE, "normalization": DEFAULT_NORMALIZATION, "margin": DEFAULT_MARGIN, diff --git a/benchmark/opperf/utils/benchmark_utils.py b/benchmark/opperf/utils/benchmark_utils.py index f2cce0abec09..b3bf8213569a 100644 --- a/benchmark/opperf/utils/benchmark_utils.py +++ b/benchmark/opperf/utils/benchmark_utils.py @@ -182,10 +182,6 @@ def run_performance_test(ops, inputs, run_backward=True, def run_op_benchmarks(ops, dtype, ctx, profiler, int64_tensor, warmup, runs): - # Running SoftmaxOutput backwards on GPU results in errors - # track issue here: https://github.com/apache/incubator-mxnet/issues/880 - gpu_backwards_disabled_ops = ['SoftmaxOutput'] - # Running im2col either forwards or backwards on GPU results in errors # track issue here: https://github.com/apache/incubator-mxnet/issues/17493 gpu_disabled_ops = ['im2col'] @@ -198,7 +194,7 @@ def run_op_benchmarks(ops, dtype, ctx, profiler, int64_tensor, warmup, runs): inputs = prepare_op_inputs(op, op_params, int64_tensor) # setting backward false for ops with known issue - if (ctx == mx.gpu() and op in gpu_backwards_disabled_ops) or op in no_backward: + if op in no_backward: op_params["has_backward"] = False # Run benchmarks diff --git a/benchmark/opperf/utils/op_registry_utils.py b/benchmark/opperf/utils/op_registry_utils.py index 65eb6aab2aac..d3cf1a418334 100644 --- a/benchmark/opperf/utils/op_registry_utils.py +++ b/benchmark/opperf/utils/op_registry_utils.py @@ -121,8 +121,8 @@ def prepare_op_inputs(op, arg_params, int64_tensor): # For ops with args that need to change shape/value for different ops custom_data = {'Activation', 'LeakyReLU', 'Softmax', 'BilinearSampler', 'GridGenerator', 'sample_multinomial', 'linalg_maketrian', 'SpatialTransformer', 'col2im', 'GroupNorm', 'Dropout', 'FullyConnected', - 'SoftmaxOutput', 'LinearRegressionOutput', 'BatchNorm', 'LogisticRegressionOutput', - 'MAERegressionOutput', 'SVMOutput', 'L2Normalization', 'LayerNorm', 'InstanceNorm', + 'BatchNorm', + 'L2Normalization', 'LayerNorm', 'InstanceNorm', 'Embedding', 'Correlation', 'im2col', 'LRN', 'squeeze', 'fill_element_0index'} custom_data_int64 = {'random_pdf_dirichlet', 'random_pdf_exponential', 'random_pdf_gamma', @@ -366,8 +366,8 @@ def get_all_nn_basic_operators(): ------- {"operator_name": {"has_backward", "nd_op_handle", "params"}} """ - nn_basic_ops = ['FullyConnected', 'Dropout', 'BatchNorm', 'SoftmaxOutput', 'LinearRegressionOutput', - 'LogisticRegressionOutput', 'MAERegressionOutput', 'SVMOutput', 'L2Normalization', + nn_basic_ops = ['FullyConnected', 'Dropout', 'BatchNorm', + 'L2Normalization', 'LayerNorm', 'InstanceNorm', 'Embedding', 'Correlation', 'SpatialTransformer', 'im2col', 'col2im', 'GroupNorm', 'LRN'] diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 62412e092523..6b14a386fa09 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -491,10 +491,6 @@ build_ubuntu_cpu_cmake_asan() { -DMXNET_USE_CPU=ON \ /work/mxnet make -j $(nproc) mxnet - # Disable leak detection but enable ASAN to link with ASAN but not fail with build tooling. - ASAN_OPTIONS=detect_leaks=0 \ - LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libasan.so.5 \ - make -j $(nproc) mlp_cpu } build_ubuntu_cpu_gcc8_werror() { @@ -1170,21 +1166,6 @@ unittest_ubuntu_minimal_R() { R_LIBS=/tmp/r-site-library R CMD INSTALL --library=/tmp/r-site-library R-package - # pick mlp as minimal R test - R_LIBS=/tmp/r-site-library \ - Rscript -e "library(mxnet); require(mlbench); \ - data(Sonar, package=\"mlbench\"); \ - Sonar[,61] = as.numeric(Sonar[,61])-1; \ - train.ind = c(1:50, 100:150); \ - train.x = data.matrix(Sonar[train.ind, 1:60]); \ - train.y = Sonar[train.ind, 61]; \ - test.x = data.matrix(Sonar[-train.ind, 1:60]); \ - test.y = Sonar[-train.ind, 61]; \ - model = mx.mlp(train.x, train.y, hidden_node = 10, \ - out_node = 2, out_activation = \"softmax\", \ - learning.rate = 0.1, \ - array.layout = \"rowmajor\"); \ - preds = predict(model, test.x, array.layout = \"rowmajor\")" } unittest_ubuntu_gpu_R() { @@ -1271,39 +1252,12 @@ integrationtest_ubuntu_cpu_onnx() { pytest -n 4 tests/python/unittest/onnx/test_node.py } -integrationtest_ubuntu_cpu_asan() { - set -ex - export DMLC_LOG_STACK_TRACE_DEPTH=10 - - cd /work/mxnet/build/cpp-package/example/ - /work/mxnet/cpp-package/example/get_data.sh - ./mlp_cpu -} - integrationtest_ubuntu_gpu_cpp_package() { set -ex export DMLC_LOG_STACK_TRACE_DEPTH=10 cpp-package/tests/ci_test.sh } -integrationtest_ubuntu_gpu_capi_cpp_package() { - set -ex - export PYTHONPATH=./python/ - export LD_LIBRARY_PATH=/work/mxnet/lib:$LD_LIBRARY_PATH - python3 -c "import mxnet as mx; mx.test_utils.download_model(\"imagenet1k-resnet-18\"); mx.test_utils.download_model(\"imagenet1k-resnet-152\"); mx.test_utils.download_model(\"imagenet1k-resnet-50\");" - # Load symbol, convert symbol to leverage fusion with subgraphs, save the model - python3 -c "import mxnet as mx; x = mx.sym.load(\"imagenet1k-resnet-152-symbol.json\"); x.get_backend_symbol(\"MKLDNN\"); x.save(\"imagenet1k-resnet-152-subgraph-symbol.json\");" - # Copy params file with a different name, used in subgraph symbol testing - cp imagenet1k-resnet-152-0000.params imagenet1k-resnet-152-subgraph-0000.params - build/tests/cpp/mxnet_unit_tests --gtest_filter="ThreadSafety.*" - build/tests/cpp/mxnet_unit_tests --gtest_filter="ThreadSafety.*" --thread-safety-with-cpu - # Also run thread safety tests in NaiveEngine mode - export MXNET_ENGINE_TYPE=NaiveEngine - build/tests/cpp/mxnet_unit_tests --gtest_filter="ThreadSafety.*" - build/tests/cpp/mxnet_unit_tests --gtest_filter="ThreadSafety.*" --thread-safety-with-cpu - unset MXNET_ENGINE_TYPE -} - integrationtest_ubuntu_cpu_dist_kvstore() { set -ex pushd . diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy index ce97f4a03a74..c7c700210d15 100644 --- a/ci/jenkins/Jenkins_steps.groovy +++ b/ci/jenkins/Jenkins_steps.groovy @@ -984,20 +984,6 @@ def test_unix_cpp_package_gpu(lib_name) { }] } -def test_unix_capi_cpp_package(lib_name) { - return ['capi-cpp-package GPU Makefile': { - node(NODE_LINUX_GPU_G4) { - ws('workspace/it-capi-cpp-package') { - timeout(time: max_time, unit: 'MINUTES') { - utils.unpack_and_init(lib_name, mx_lib_cpp_capi_make) - utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_capi_cpp_package', true) - utils.publish_test_coverage() - } - } - } - }] -} - def test_unix_scala_cpu(lib_name) { return ['Scala: CPU Makefile': { node(NODE_LINUX_CPU) { @@ -1693,17 +1679,6 @@ def docs_publish_beta() { } -def misc_asan_cpu(lib_name) { - return ['CPU ASAN': { - node(NODE_LINUX_CPU) { - ws('workspace/ut-python3-cpu-asan') { - utils.unpack_and_init(lib_name, mx_lib_cpp_examples_cpu) - utils.docker_run('ubuntu_cpu', 'integrationtest_ubuntu_cpu_asan', false) - } - } - }] -} - def sanity_lint() { return ['Lint': { node(NODE_LINUX_CPU) { diff --git a/ci/jenkins/Jenkinsfile_miscellaneous b/ci/jenkins/Jenkinsfile_miscellaneous index d6376a72ddbf..eb6b632cdbd3 100644 --- a/ci/jenkins/Jenkinsfile_miscellaneous +++ b/ci/jenkins/Jenkinsfile_miscellaneous @@ -44,7 +44,6 @@ core_logic: { ]) utils.parallel_stage('Tests', [ - custom_steps.misc_asan_cpu('cpu_asan'), custom_steps.misc_test_docker_cache_build() ]) } diff --git a/ci/jenkins/Jenkinsfile_unix_gpu b/ci/jenkins/Jenkinsfile_unix_gpu index 33c0d4daf580..d662a95e014f 100644 --- a/ci/jenkins/Jenkinsfile_unix_gpu +++ b/ci/jenkins/Jenkinsfile_unix_gpu @@ -60,7 +60,6 @@ core_logic: { // TODO(szha): fix and reenable the hanging issue. tracked in #18098 // custom_steps.test_unix_distributed_kvstore_gpu('gpu'), custom_steps.test_unix_byteps_gpu('gpu'), - custom_steps.test_unix_capi_cpp_package('gpu_mkldnn_cpp_test_make'), ]) } , diff --git a/ci/windows/test_py3_gpu.ps1 b/ci/windows/test_py3_gpu.ps1 index 9f200f31b540..5dbc6fc28755 100644 --- a/ci/windows/test_py3_gpu.ps1 +++ b/ci/windows/test_py3_gpu.ps1 @@ -35,11 +35,6 @@ if ($LastExitCode -ne 0) { Throw ("Error running parallel tests, python exited w C:\Python37\python.exe -m pytest -v -m 'serial' --durations=50 --cov-report xml:tests_operator.xml --cov-append tests\python\gpu\test_operator_gpu.py if ($LastExitCode -ne 0) { Throw ("Error running serial tests, python exited with status code " + ('{0:X}' -f $LastExitCode)) } -C:\Python37\python.exe -m pytest -v -m 'not serial' -n 4 --durations=50 --cov-report xml:tests_forward.xml tests\python\gpu\test_forward.py -if ($LastExitCode -ne 0) { Throw ("Error running parallel tests, python exited with status code " + ('{0:X}' -f $LastExitCode)) } -C:\Python37\python.exe -m pytest -v -m 'serial' --durations=50 --cov-report xml:tests_forward.xml --cov-append tests\python\gpu\test_forward.py -if ($LastExitCode -ne 0) { Throw ("Error running serial tests, python exited with status code " + ('{0:X}' -f $LastExitCode)) } - C:\Python37\python.exe -m pytest -v -m 'not serial' -n 4 --durations=50 --cov-report xml:tests_train.xml tests\python\train if ($LastExitCode -ne 0) { Throw ("Error running parallel tests, python exited with status code " + ('{0:X}' -f $LastExitCode)) } C:\Python37\python.exe -m pytest -v -m 'serial' --durations=50 --cov-report xml:tests_train.xml --cov-append tests\python\train diff --git a/contrib/clojure-package/examples/bert/.gitignore b/contrib/clojure-package/examples/bert/.gitignore deleted file mode 100644 index 70c55267e7ab..000000000000 --- a/contrib/clojure-package/examples/bert/.gitignore +++ /dev/null @@ -1,18 +0,0 @@ -/target -/classes -/checkouts -pom.xml -pom.xml.asc -*.jar -*.class -/.lein-* -/.nrepl-port -.hgignore -.hg/ -data/* -model/* -*~ -*.params -*.states -*.json - diff --git a/contrib/clojure-package/examples/bert/README.md b/contrib/clojure-package/examples/bert/README.md deleted file mode 100644 index 0681a0694fc8..000000000000 --- a/contrib/clojure-package/examples/bert/README.md +++ /dev/null @@ -1,163 +0,0 @@ - - - - - - - - - - - - - - - - - - -# BERT - -There are two examples showcasing the power of BERT. One is BERT-QA for inference and the other is BERT Sentence Pair Classification which uses fine tuning of the BERT base model. For more information about BERT please read [http://jalammar.github.io/illustrated-bert/](http://jalammar.github.io/illustrated-bert/). - -## bert-qa - -**This example was based off of the Java API one. It shows how to do inference with a pre-trained BERT network that is trained on Questions and Answers using the [SQuAD Dataset](https://rajpurkar.github.io/SQuAD-explorer/)** - -The pretrained model was created using GluonNLP and then exported to the MXNet symbol format. You can find more information in the background section below. - -In this tutorial, we will walk through the BERT QA model trained by MXNet. -Users can provide a question with a paragraph contains answer to the model and -the model will be able to find the best answer from the answer paragraph. - -Example: - -``` -{:input-answer "Steam engines are external combustion engines, where the working fluid is separate from the combustion products. Non-combustion heat sources such as solar power, nuclear power or geothermal energy may be used. The ideal thermodynamic cycle used to analyze this process is called the Rankine cycle. In the cycle, water is heated and transforms into steam within a boiler operating at a high pressure. When expanded through pistons or turbines, mechanical work is done. The reduced-pressure steam is then condensed and pumped back into the boiler." - :input-question "Along with geothermal and nuclear, what is a notable non-combustion heat source?" - :ground-truth-answers ["solar" - "solar power" - "solar power, nuclear power or geothermal energy solar"]} -``` - -The prediction in this case would be `solar power` - -### Setup Guide - -Note: If you have trouble with your REPL and cider, please comment out the `lein-jupyter` plugin. There are some conflicts with cider. - -#### Step 1: Download the model - -For this tutorial, you can get the model and vocabulary by running following bash file. This script will use `wget` to download these artifacts from AWS S3. - -From the example directory: - -```bash -./get_bert_data.sh -``` - -Some sample questions and answers are provide in the `squad-sample.edn` file. Some are taken directly from the SQuAD dataset and one was just made up. Feel free to edit the file and add your own! - - -### To run - -* `lein install` in the root of the main project directory -* cd into this project directory and do `lein run`. This will execute the cpu version. - * `lein run` or `lein run :cpu` to run with cpu - * `lein run :gpu` to run with gpu - -### Background - -To learn more about how BERT works in MXNet, please follow this [MXNet Gluon tutorial on NLP using BERT](https://medium.com/apache-mxnet/gluon-nlp-bert-6a489bdd3340). - -The model was extracted from MXNet GluonNLP with static length settings. - -[Download link for the script](https://gluon-nlp.mxnet.io/_downloads/bert.zip) - -The original description can be found in the [MXNet GluonNLP model zoo](https://gluon-nlp.mxnet.io/model_zoo/bert/index.html#bert-base-on-squad-1-1). -```bash -python static_finetune_squad.py --optimizer adam --accumulate 2 --batch_size 6 --lr 3e-5 --epochs 2 --gpu 0 --export - -``` -This script will generate `json` and `param` files that are the standard MXNet model files. -By default, this model are using `bert_12_768_12` model with extra layers for QA jobs. - -After that, to be able to use it in Java, we need to export the dictionary from the script to parse the text -to actual indexes. Please add the following lines after [this line](https://github.com/dmlc/gluon-nlp/blob/master/scripts/bert/staticbert/static_finetune_squad.py#L262). -```python -import json -json_str = vocab.to_json() -f = open("vocab.json", "w") -f.write(json_str) -f.close() -``` -This would export the token vocabulary in json format. -Once you have these three files, you will be able to run this example without problems. - -## Fine-tuning Sentence Pair Classification with BERT - -This was based off of the great tutorial for in Gluon-NLP [https://gluon-nlp.mxnet.io/examples/sentence_embedding/bert.html](https://gluon-nlp.mxnet.io/examples/sentence_embedding/bert.html). - -We use the pre-trained BERT model that was exported from GluonNLP via the `scripts/bert/staticbert/static_export_base.py` running `python static_export_base.py --seq_length 128`. For convenience, the model has been downloaded for you by running the get_bert_data.sh file in the root directory of this example. - -It will fine tune the base bert model for use in a classification task for 3 epochs. - - -### Setup Guide - - -## Installation - -Before you run this example, make sure that you have the clojure package installed. -In the main clojure package directory, do `lein install`. Then you can run -`lein install` in this directory. - -#### Step 1: Download the model - -For this tutorial, you can get the model and vocabulary by running following bash file. This script will use `wget` to download these artifacts from AWS S3. - -From the example directory: - -```bash -./get_bert_data.sh -``` - -### To run the notebook walkthrough - -There is a Jupyter notebook that uses the `lein jupyter` plugin to be able to execute Clojure code in project setting. The first time that you run it you will need to install the kernel with `lein jupyter install-kernel`. After that you can open the notebook in the project directory with `lein jupyter notebook`. - -There is also an exported copy of the walkthrough to markdown `fine-tune-bert.md`. - - -### To run - -* `lein install` in the root of the main project directory -* cd into this project directory and do `lein run`. This will execute the cpu version. - -`lein run -m bert.bert-sentence-classification :cpu` - to run with cpu -`lein run -m bert.bert-sentence-classification :gpu` - to run with gpu - -By default it will run 3 epochs, you can control the number of epochs with: - -`lein run -m bert.bert-sentence-classification :cpu 1` to run just 1 epoch - - -Sample results from cpu run on OSX -``` -INFO org.apache.mxnet.module.BaseModule: Epoch[1] Train-accuracy=0.65384614 -INFO org.apache.mxnet.module.BaseModule: Epoch[1] Time cost=464187 -INFO org.apache.mxnet.Callback$Speedometer: Epoch[2] Batch [1] Speed: 0.91 samples/sec Train-accuracy=0.656250 -INFO org.apache.mxnet.Callback$Speedometer: Epoch[2] Batch [2] Speed: 0.90 samples/sec Train-accuracy=0.656250 -INFO org.apache.mxnet.Callback$Speedometer: Epoch[2] Batch [3] Speed: 0.91 samples/sec Train-accuracy=0.687500 -INFO org.apache.mxnet.Callback$Speedometer: Epoch[2] Batch [4] Speed: 0.90 samples/sec Train-accuracy=0.693750 -INFO org.apache.mxnet.Callback$Speedometer: Epoch[2] Batch [5] Speed: 0.91 samples/sec Train-accuracy=0.703125 -INFO org.apache.mxnet.Callback$Speedometer: Epoch[2] Batch [6] Speed: 0.92 samples/sec Train-accuracy=0.696429 -INFO org.apache.mxnet.Callback$Speedometer: Epoch[2] Batch [7] Speed: 0.91 samples/sec Train-accuracy=0.699219 -INFO org.apache.mxnet.Callback$Speedometer: Epoch[2] Batch [8] Speed: 0.90 samples/sec Train-accuracy=0.701389 -INFO org.apache.mxnet.Callback$Speedometer: Epoch[2] Batch [9] Speed: 0.90 samples/sec Train-accuracy=0.690625 -INFO org.apache.mxnet.Callback$Speedometer: Epoch[2] Batch [10] Speed: 0.89 samples/sec Train-accuracy=0.690341 -INFO org.apache.mxnet.Callback$Speedometer: Epoch[2] Batch [11] Speed: 0.90 samples/sec Train-accuracy=0.695313 -INFO org.apache.mxnet.Callback$Speedometer: Epoch[2] Batch [12] Speed: 0.91 samples/sec Train-accuracy=0.701923 -INFO org.apache.mxnet.module.BaseModule: Epoch[2] Train-accuracy=0.7019231 -INFO org.apache.mxnet.module.BaseModule: Epoch[2] Time cost=459809 -```` diff --git a/contrib/clojure-package/examples/bert/fine-tune-bert.ipynb b/contrib/clojure-package/examples/bert/fine-tune-bert.ipynb deleted file mode 100644 index 5934477ea338..000000000000 --- a/contrib/clojure-package/examples/bert/fine-tune-bert.ipynb +++ /dev/null @@ -1,629 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Fine-tuning Sentence Pair Classification with BERT\n", - "\n", - "**This tutorial is based off of the Gluon NLP one here https://gluon-nlp.mxnet.io/examples/sentence_embedding/bert.html**\n", - "\n", - "Pre-trained language representations have been shown to improve many downstream NLP tasks such as question answering, and natural language inference. To apply pre-trained representations to these tasks, there are two strategies:\n", - "\n", - " - **feature-based approach**, which uses the pre-trained representations as additional features to the downstream task.\n", - " - **fine-tuning based approach**, which trains the downstream tasks by fine-tuning pre-trained parameters.\n", - " \n", - "While feature-based approaches such as ELMo [1] are effective in improving many downstream tasks, they require task-specific architectures. Devlin, Jacob, et al proposed BERT [2] (Bidirectional Encoder Representations from Transformers), which fine-tunes deep bidirectional representations on a wide range of tasks with minimal task-specific parameters, and obtained state- of-the-art results.\n", - "\n", - "In this tutorial, we will focus on fine-tuning with the pre-trained BERT model to classify semantically equivalent sentence pairs. Specifically, we will:\n", - "\n", - " 1. load the state-of-the-art pre-trained BERT model and attach an additional layer for classification\n", - " 2. process and transform sentence pair data for the task at hand, and \n", - " 3. fine-tune BERT model for sentence classification.\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Preparation\n", - "\n", - "To run this tutorial locally, in the example directory:\n", - "\n", - "1. Get the model and supporting data by running `get_bert_data.sh`. \n", - "2. This Jupyter Notebook uses the lein-jupyter plugin to be able to execute Clojure code in project setting. The first time that you run it you will need to install the kernel with`lein jupyter install-kernel`. After that you can open the notebook in the project directory with `lein jupyter notebook`." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Load requirements\n", - "\n", - "We need to load up all the namespace requires" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "(ns bert.bert-sentence-classification\n", - " (:require [bert.util :as bert-util]\n", - " [clojure-csv.core :as csv]\n", - " [clojure.java.shell :refer [sh]]\n", - " [clojure.string :as string]\n", - " [org.apache.clojure-mxnet.callback :as callback]\n", - " [org.apache.clojure-mxnet.context :as context]\n", - " [org.apache.clojure-mxnet.dtype :as dtype]\n", - " [org.apache.clojure-mxnet.infer :as infer]\n", - " [org.apache.clojure-mxnet.eval-metric :as eval-metric]\n", - " [org.apache.clojure-mxnet.io :as mx-io]\n", - " [org.apache.clojure-mxnet.layout :as layout]\n", - " [org.apache.clojure-mxnet.module :as m]\n", - " [org.apache.clojure-mxnet.ndarray :as ndarray]\n", - " [org.apache.clojure-mxnet.optimizer :as optimizer]\n", - " [org.apache.clojure-mxnet.symbol :as sym]))\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": true - }, - "source": [ - "# Use the Pre-trained BERT Model\n", - "\n", - "In this tutorial we will use the pre-trained BERT model that was exported from GluonNLP via the `scripts/bert/staticbert/static_export_base.py`. For convenience, the model has been downloaded for you by running the `get_bert_data.sh` file in the root directory of this example." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Get BERT\n", - "\n", - "Let’s first take a look at the BERT model architecture for sentence pair classification below:\n", - "\n", - "![bert](https://gluon-nlp.mxnet.io/_images/bert-sentence-pair.png)\n", - "\n", - "where the model takes a pair of sequences and *pools* the representation of the first token in the sequence. Note that the original BERT model was trained for masked language model and next sentence prediction tasks, which includes layers for language model decoding and classification. These layers will not be used for fine-tuning sentence pair classification.\n", - "\n", - "Let's load the pre-trained BERT using the module API in MXNet." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "#'bert.bert-sentence-classification/bert-base" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "(def model-path-prefix \"data/static_bert_base_net\")\n", - "\n", - ";; the vocabulary used in the model\n", - "(def vocab (bert-util/get-vocab))\n", - "\n", - ";; the maximum length of the sequence\n", - "(def seq-length 128)\n", - "\n", - "(def batch-size 32)\n", - "\n", - "(def bert-base (m/load-checkpoint {:prefix model-path-prefix :epoch 0}))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Model Definition for Sentence Pair Classification\n", - "\n", - "Now that we have loaded the BERT model, we only need to attach an additional layer for classification. We can do this by defining a fine tune model from the symbol of the base BERT model." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "#'bert.bert-sentence-classification/model-sym" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "(defn fine-tune-model\n", - " \"msymbol: the pretrained network symbol\n", - " num-classes: the number of classes for the fine-tune datasets\n", - " dropout: the dropout rate\"\n", - " [msymbol {:keys [num-classes dropout]}]\n", - " (as-> msymbol data\n", - " (sym/dropout {:data data :p dropout})\n", - " (sym/fully-connected \"fc-finetune\" {:data data :num-hidden num-classes})\n", - " (sym/softmax-output \"softmax\" {:data data})))\n", - "\n", - "(def model-sym (fine-tune-model (m/symbol bert-base) {:num-classes 2 :dropout 0.1}))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Data Preprocessing for BERT\n", - "\n", - "## Dataset\n", - "\n", - "For demonstration purpose, we use the dev set of the Microsoft Research Paraphrase Corpus dataset. The file is named ‘dev.tsv’ and was downloaded as part of the data script. Let’s take a look at the raw dataset." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Quality\t#1 ID\t#2 ID\t#1 String\t#2 String\n", - "1\t1355540\t1355592\tHe said the foodservice pie business doesn 't fit the company 's long-term growth strategy .\t\" The foodservice pie business does not fit our long-term growth strategy .\n", - "0\t2029631\t2029565\tMagnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war .\tHis wife said he was \" 100 percent behind George Bush \" and looked forward to using his years of training in the war .\n", - "0\t487993\t487952\tThe dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat .\tThe dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent .\n", - "1\t1989515\t1989458\tThe AFL-CIO is waiting until October to decide if it will endorse a candidate .\tThe AFL-CIO announced Wednesday that it will decide in October whether to endorse a candidate before the primaries .\n", - "\n" - ] - } - ], - "source": [ - "(-> (sh \"head\" \"-n\" \"5\" \"data/dev.tsv\") \n", - " :out\n", - " println)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The file contains 5 columns, separated by tabs (i.e. ‘\n", - "\n", - "\\t\n", - "‘). The first line of the file explains each of these columns: 0. the label indicating whether the two sentences are semantically equivalent 1. the id of the first sentence in this sample 2. the id of the second sentence in this sample 3. the content of the first sentence 4. the content of the second sentence\n", - "\n", - "For our task, we are interested in the 0th, 3rd and 4th columns. " - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "He said the foodservice pie business doesn 't fit the company 's long-term growth strategy .\n", - " The foodservice pie business does not fit our long-term growth strategy .\n", - "1\n" - ] - } - ], - "source": [ - "(def raw-file \n", - " (csv/parse-csv (string/replace (slurp \"data/dev.tsv\") \"\\\"\" \"\")\n", - " :delimiter \\tab\n", - " :strict true))\n", - "\n", - "(def data-train-raw (->> raw-file\n", - " (mapv #(vals (select-keys % [3 4 0])))\n", - " (rest) ; drop header\n", - " (into [])))\n", - "\n", - "(def sample (first data-train-raw))\n", - "(println (nth sample 0)) ;;;sentence a\n", - "(println (nth sample 1)) ;; sentence b\n", - "(println (nth sample 2)) ;; 1 means equivalent, 0 means not equivalent" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To use the pre-trained BERT model, we need to preprocess the data in the same way it was trained. The following figure shows the input representation in BERT:\n", - "\n", - "![bert-input](https://gluon-nlp.mxnet.io/_images/bert-embed.png)\n", - "\n", - "We will do pre-processing on the inputs to get them in the right format and to perform the following transformations:\n", - "- tokenize the input sequences\n", - "- insert [CLS] at the beginning\n", - "- insert [SEP] between sentence one and sentence two, and at the end - generate segment ids to indicate whether a token belongs to the first sequence or the second sequence.\n", - "- generate valid length" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Train Count is = 408\n", - "[PAD] token id = 1\n", - "[CLS] token id = 2\n", - "[SEP] token id = 3\n", - "token ids = \n", - " [2 2002 2056 1996 0 11345 2449 2987 0 4906 1996 2194 0 0 3930 5656 0 1012 3 0 1996 0 11345 2449 2515 2025 4906 2256 0 3930 5656 0 1012 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]\n", - "segment ids = \n", - " [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n", - "valid length = \n", - " [31]\n", - "label = \n", - " [0]\n" - ] - } - ], - "source": [ - "(defn pre-processing\n", - " \"Preprocesses the sentences in the format that BERT is expecting\"\n", - " [idx->token token->idx train-item]\n", - " (let [[sentence-a sentence-b label] train-item\n", - " ;;; pre-processing tokenize sentence\n", - " token-1 (bert-util/tokenize (string/lower-case sentence-a))\n", - " token-2 (bert-util/tokenize (string/lower-case sentence-b))\n", - " valid-length (+ (count token-1) (count token-2))\n", - " ;;; generate token types [0000...1111...0000]\n", - " qa-embedded (into (bert-util/pad [] 0 (count token-1))\n", - " (bert-util/pad [] 1 (count token-2)))\n", - " token-types (bert-util/pad qa-embedded 0 seq-length)\n", - " ;;; make BERT pre-processing standard\n", - " token-2 (conj token-2 \"[SEP]\")\n", - " token-1 (into [] (concat [\"[CLS]\"] token-1 [\"[SEP]\"] token-2))\n", - " tokens (bert-util/pad token-1 \"[PAD]\" seq-length)\n", - " ;;; pre-processing - token to index translation\n", - " indexes (bert-util/tokens->idxs token->idx tokens)]\n", - " {:input-batch [indexes\n", - " token-types\n", - " [valid-length]]\n", - " :label (if (= \"0\" label)\n", - " [0]\n", - " [1])\n", - " :tokens tokens\n", - " :train-item train-item}))\n", - "\n", - "(def idx->token (:idx->token vocab))\n", - "(def token->idx (:token->idx vocab))\n", - "(def dev (context/default-context))\n", - "(def processed-datas (mapv #(pre-processing idx->token token->idx %) data-train-raw))\n", - "(def train-count (count processed-datas))\n", - "(println \"Train Count is = \" train-count)\n", - "(println \"[PAD] token id = \" (get token->idx \"[PAD]\"))\n", - "(println \"[CLS] token id = \" (get token->idx \"[CLS]\"))\n", - "(println \"[SEP] token id = \" (get token->idx \"[SEP]\"))\n", - "(println \"token ids = \\n\"(-> (first processed-datas) :input-batch first)) \n", - "(println \"segment ids = \\n\"(-> (first processed-datas) :input-batch second)) \n", - "(println \"valid length = \\n\" (-> (first processed-datas) :input-batch last)) \n", - "(println \"label = \\n\" (-> (second processed-datas) :label)) \n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now that we have all the input-batches for each row, we are going to slice them up column-wise and create NDArray Iterators that we can use in training" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "#object[org.apache.mxnet.io.NDArrayIter 0x2583097d \"non-empty iterator\"]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "(defn slice-inputs-data\n", - " \"Each sentence pair had to be processed as a row. This breaks all\n", - " the rows up into a column for creating a NDArray\"\n", - " [processed-datas n]\n", - " (->> processed-datas\n", - " (mapv #(nth (:input-batch %) n))\n", - " (flatten)\n", - " (into [])))\n", - "\n", - "(def prepared-data {:data0s (slice-inputs-data processed-datas 0)\n", - " :data1s (slice-inputs-data processed-datas 1)\n", - " :data2s (slice-inputs-data processed-datas 2)\n", - " :labels (->> (mapv :label processed-datas)\n", - " (flatten)\n", - " (into []))\n", - " :train-num (count processed-datas)})\n", - "\n", - "(def train-data\n", - " (let [{:keys [data0s data1s data2s labels train-num]} prepared-data\n", - " data-desc0 (mx-io/data-desc {:name \"data0\"\n", - " :shape [train-num seq-length]\n", - " :dtype dtype/FLOAT32\n", - " :layout layout/NT})\n", - " data-desc1 (mx-io/data-desc {:name \"data1\"\n", - " :shape [train-num seq-length]\n", - " :dtype dtype/FLOAT32\n", - " :layout layout/NT})\n", - " data-desc2 (mx-io/data-desc {:name \"data2\"\n", - " :shape [train-num]\n", - " :dtype dtype/FLOAT32\n", - " :layout layout/N})\n", - " label-desc (mx-io/data-desc {:name \"softmax_label\"\n", - " :shape [train-num]\n", - " :dtype dtype/FLOAT32\n", - " :layout layout/N})]\n", - " (mx-io/ndarray-iter {data-desc0 (ndarray/array data0s [train-num seq-length]\n", - " {:ctx dev})\n", - " data-desc1 (ndarray/array data1s [train-num seq-length]\n", - " {:ctx dev})\n", - " data-desc2 (ndarray/array data2s [train-num]\n", - " {:ctx dev})}\n", - " {:label {label-desc (ndarray/array labels [train-num]\n", - " {:ctx dev})}\n", - " :data-batch-size batch-size})))\n", - "train-data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Fine-tune BERT Model\n", - "\n", - "Putting everything together, now we can fine-tune the model with a few epochs. For demonstration, we use a fixed learning rate and skip validation steps." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Speedometer: epoch 0 count 1 metric [accuracy 0.609375]\n", - "Speedometer: epoch 0 count 2 metric [accuracy 0.6041667]\n", - "Speedometer: epoch 0 count 3 metric [accuracy 0.5703125]\n", - "Speedometer: epoch 0 count 4 metric [accuracy 0.55625]\n", - "Speedometer: epoch 0 count 5 metric [accuracy 0.5625]\n", - "Speedometer: epoch 0 count 6 metric [accuracy 0.55803573]\n", - "Speedometer: epoch 0 count 7 metric [accuracy 0.5625]\n", - "Speedometer: epoch 0 count 8 metric [accuracy 0.5798611]\n", - "Speedometer: epoch 0 count 9 metric [accuracy 0.584375]\n", - "Speedometer: epoch 0 count 10 metric [accuracy 0.57670456]\n", - "Speedometer: epoch 0 count 11 metric [accuracy 0.5807292]\n", - "Speedometer: epoch 0 count 12 metric [accuracy 0.5793269]\n", - "Speedometer: epoch 1 count 1 metric [accuracy 0.5625]\n", - "Speedometer: epoch 1 count 2 metric [accuracy 0.5520833]\n", - "Speedometer: epoch 1 count 3 metric [accuracy 0.5859375]\n", - "Speedometer: epoch 1 count 4 metric [accuracy 0.59375]\n", - "Speedometer: epoch 1 count 5 metric [accuracy 0.6145833]\n", - "Speedometer: epoch 1 count 6 metric [accuracy 0.625]\n", - "Speedometer: epoch 1 count 7 metric [accuracy 0.640625]\n", - "Speedometer: epoch 1 count 8 metric [accuracy 0.6527778]\n", - "Speedometer: epoch 1 count 9 metric [accuracy 0.653125]\n", - "Speedometer: epoch 1 count 10 metric [accuracy 0.6448864]\n", - "Speedometer: epoch 1 count 11 metric [accuracy 0.640625]\n", - "Speedometer: epoch 1 count 12 metric [accuracy 0.6418269]\n", - "Speedometer: epoch 2 count 1 metric [accuracy 0.671875]\n", - "Speedometer: epoch 2 count 2 metric [accuracy 0.7083333]\n", - "Speedometer: epoch 2 count 3 metric [accuracy 0.7109375]\n", - "Speedometer: epoch 2 count 4 metric [accuracy 0.725]\n", - "Speedometer: epoch 2 count 5 metric [accuracy 0.7239583]\n", - "Speedometer: epoch 2 count 6 metric [accuracy 0.71875]\n", - "Speedometer: epoch 2 count 7 metric [accuracy 0.734375]\n", - "Speedometer: epoch 2 count 8 metric [accuracy 0.7361111]\n", - "Speedometer: epoch 2 count 9 metric [accuracy 0.721875]\n", - "Speedometer: epoch 2 count 10 metric [accuracy 0.71022725]\n", - "Speedometer: epoch 2 count 11 metric [accuracy 0.6979167]\n", - "Speedometer: epoch 2 count 12 metric [accuracy 0.7019231]\n" - ] - }, - { - "data": { - "text/plain": [ - "#object[org.apache.mxnet.module.Module 0x73c42ae5 \"org.apache.mxnet.module.Module@73c42ae5\"]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "(def num-epoch 3)\n", - "\n", - "(def fine-tune-model (m/module model-sym {:contexts [dev]\n", - " :data-names [\"data0\" \"data1\" \"data2\"]}))\n", - "\n", - "(m/fit fine-tune-model {:train-data train-data :num-epoch num-epoch\n", - " :fit-params (m/fit-params {:allow-missing true\n", - " :arg-params (m/arg-params bert-base)\n", - " :aux-params (m/aux-params bert-base)\n", - " :optimizer (optimizer/adam {:learning-rate 5e-6 :episilon 1e-9})\n", - " :batch-end-callback (callback/speedometer batch-size 1)})})\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Explore results from the fine-tuned model\n", - "\n", - "Now that our model is fitted, we can use it to infer semantic equivalence of arbitrary sentence pairs. Note that for demonstration purpose we skipped the warmup learning rate schedule and validation on dev dataset used in the original implementation. This means that our model's performance will be significantly less than optimal. Please visit [here](https://gluon-nlp.mxnet.io/model_zoo/bert/index.html) for the complete fine-tuning scripts (using Python and GluonNLP).\n", - "\n", - "To do inference with our model we need a predictor. It must have a batch size of 1 so we can feed the model a single sentence pair." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "#'bert.bert-sentence-classification/fine-tuned-predictor" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "(def fine-tuned-prefix \"fine-tune-sentence-bert\")\n", - "\n", - "(m/save-checkpoint fine-tune-model {:prefix fine-tuned-prefix :epoch 3})\n", - "\n", - "(def fine-tuned-predictor\n", - " (infer/create-predictor (infer/model-factory fine-tuned-prefix\n", - " [{:name \"data0\" :shape [1 seq-length] :dtype dtype/FLOAT32 :layout layout/NT}\n", - " {:name \"data1\" :shape [1 seq-length] :dtype dtype/FLOAT32 :layout layout/NT}\n", - " {:name \"data2\" :shape [1] :dtype dtype/FLOAT32 :layout layout/N}])\n", - " {:epoch 3}))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now we can write a function that feeds a sentence pair to the fine-tuned model:" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "#'bert.bert-sentence-classification/predict-equivalence" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "(defn predict-equivalence\n", - " [predictor sentence1 sentence2]\n", - " (let [vocab (bert.util/get-vocab)\n", - " processed-test-data (mapv #(pre-processing (:idx->token vocab)\n", - " (:token->idx vocab) %)\n", - " [[sentence1 sentence2]])\n", - " prediction (infer/predict-with-ndarray predictor\n", - " [(ndarray/array (slice-inputs-data processed-test-data 0) [1 seq-length])\n", - " (ndarray/array (slice-inputs-data processed-test-data 1) [1 seq-length])\n", - " (ndarray/array (slice-inputs-data processed-test-data 2) [1])])]\n", - " (ndarray/->vec (first prediction))))" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[0.2633881 0.7366119]" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - ";; Modify an existing sentence pair to test:\n", - ";; [\"1\"\n", - ";; \"69773\"\n", - ";; \"69792\"\n", - ";; \"Cisco pared spending to compensate for sluggish sales .\"\n", - ";; \"In response to sluggish sales , Cisco pared spending .\"]\n", - "(predict-equivalence fine-tuned-predictor\n", - " \"The company cut spending to compensate for weak sales .\"\n", - " \"In response to poor sales results, the company cut spending .\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## References\n", - "\n", - "[1] Peters, Matthew E., et al. “Deep contextualized word representations.” arXiv preprint arXiv:1802.05365 (2018).\n", - "\n", - "[2] Devlin, Jacob, et al. “Bert: Pre-training of deep bidirectional transformers for language understanding.” arXiv preprint arXiv:1810.04805 (2018)." - ] - } - ], - "metadata": { - "anaconda-cloud": {}, - "kernelspec": { - "display_name": "Lein-Clojure", - "language": "clojure", - "name": "lein-clojure" - }, - "language_info": { - "file_extension": ".clj", - "mimetype": "text/x-clojure", - "name": "clojure", - "version": "1.9.0" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/contrib/clojure-package/examples/bert/fine-tune-bert.md b/contrib/clojure-package/examples/bert/fine-tune-bert.md deleted file mode 100644 index 4e6681e7aade..000000000000 --- a/contrib/clojure-package/examples/bert/fine-tune-bert.md +++ /dev/null @@ -1,371 +0,0 @@ - - - - - - - - - - - - - - - - - - -# Fine-tuning Sentence Pair Classification with BERT - -**This tutorial is based off of the Gluon NLP one here https://gluon-nlp.mxnet.io/examples/sentence_embedding/bert.html** - -Pre-trained language representations have been shown to improve many downstream NLP tasks such as question answering, and natural language inference. To apply pre-trained representations to these tasks, there are two strategies: - -feature-based approach, which uses the pre-trained representations as additional features to the downstream task. -fine-tuning based approach, which trains the downstream tasks by fine-tuning pre-trained parameters. -While feature-based approaches such as ELMo [3] (introduced in the previous tutorial) are effective in improving many downstream tasks, they require task-specific architectures. Devlin, Jacob, et al proposed BERT [1] (Bidirectional Encoder Representations from Transformers), which fine-tunes deep bidirectional representations on a wide range of tasks with minimal task-specific parameters, and obtained state- of-the-art results. - -In this tutorial, we will focus on fine-tuning with the pre-trained BERT model to classify semantically equivalent sentence pairs. Specifically, we will: - -load the state-of-the-art pre-trained BERT model and attach an additional layer for classification, -process and transform sentence pair data for the task at hand, and -fine-tune BERT model for sentence classification. - - - -## Preparation - -To run this tutorial locally, in the example directory: - -1. Get the model and supporting data by running `get_bert_data.sh`. -2. This Jupyter Notebook uses the lein-jupyter plugin to be able to execute Clojure code in project setting. The first time that you run it you will need to install the kernel with`lein jupyter install-kernel`. After that you can open the notebook in the project directory with `lein jupyter notebook`. - -## Load requirements - -We need to load up all the namespace requires - - -```clojure -(ns bert.bert-sentence-classification - (:require [bert.util :as bert-util] - [clojure-csv.core :as csv] - [clojure.java.shell :refer [sh]] - [clojure.string :as string] - [org.apache.clojure-mxnet.callback :as callback] - [org.apache.clojure-mxnet.context :as context] - [org.apache.clojure-mxnet.dtype :as dtype] - [org.apache.clojure-mxnet.eval-metric :as eval-metric] - [org.apache.clojure-mxnet.io :as mx-io] - [org.apache.clojure-mxnet.layout :as layout] - [org.apache.clojure-mxnet.module :as m] - [org.apache.clojure-mxnet.ndarray :as ndarray] - [org.apache.clojure-mxnet.optimizer :as optimizer] - [org.apache.clojure-mxnet.symbol :as sym])) - -``` - -# Use the Pre-trained BERT Model - -In this tutorial we will use the pre-trained BERT model that was exported from GluonNLP via the `scripts/bert/staticbert/static_export_base.py`. For convenience, the model has been downloaded for you by running the `get_bert_data.sh` file in the root directory of this example. - -## Get BERT - -Let’s first take a look at the BERT model architecture for sentence pair classification below: - -![bert](https://gluon-nlp.mxnet.io/_images/bert-sentence-pair.png) - -where the model takes a pair of sequences and pools the representation of the first token in the sequence. Note that the original BERT model was trained for masked language model and next sentence prediction tasks, which includes layers for language model decoding and classification. These layers will not be used for fine-tuning sentence pair classification. - -Let's load the pre-trained BERT using the module API in MXNet. - - -```clojure -(def model-path-prefix "data/static_bert_base_net") -;; the vocabulary used in the model -(def vocab (bert-util/get-vocab)) -;; the input question -;; the maximum length of the sequence -(def seq-length 128) - -(def bert-base (m/load-checkpoint {:prefix model-path-prefix :epoch 0})) -``` - - - - - #'bert.bert-sentence-classification/bert-base - - - -## Model Definition for Sentence Pair Classification - -Now that we have loaded the BERT model, we only need to attach an additional layer for classification. We can do this by defining a fine tune model from the symbol of the base BERT model. - - -```clojure -(defn fine-tune-model - "msymbol: the pretrained network symbol - num-classes: the number of classes for the fine-tune datasets - dropout: the dropout rate" - [msymbol {:keys [num-classes dropout]}] - (as-> msymbol data - (sym/dropout {:data data :p dropout}) - (sym/fully-connected "fc-finetune" {:data data :num-hidden num-classes}) - (sym/softmax-output "softmax" {:data data}))) - -(def model-sym (fine-tune-model (m/symbol bert-base) {:num-classes 2 :dropout 0.1})) -``` - - - - - #'bert.bert-sentence-classification/model-sym - - - -# Data Preprocessing for BERT - -## Dataset - -For demonstration purpose, we use the dev set of the Microsoft Research Paraphrase Corpus dataset. The file is named ‘dev.tsv’ and was downloaded as part of the data script. Let’s take a look at the raw dataset. - - -```clojure -(-> (sh "head" "-n" "5" "data/dev.tsv") - :out - println) -``` - - Quality #1 ID #2 ID #1 String #2 String - 1 1355540 1355592 He said the foodservice pie business doesn 't fit the company 's long-term growth strategy . " The foodservice pie business does not fit our long-term growth strategy . - 0 2029631 2029565 Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war . His wife said he was " 100 percent behind George Bush " and looked forward to using his years of training in the war . - 0 487993 487952 The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat . The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent . - 1 1989515 1989458 The AFL-CIO is waiting until October to decide if it will endorse a candidate . The AFL-CIO announced Wednesday that it will decide in October whether to endorse a candidate before the primaries . - - - -The file contains 5 columns, separated by tabs (i.e. ‘ - -\t -‘). The first line of the file explains each of these columns: 0. the label indicating whether the two sentences are semantically equivalent 1. the id of the first sentence in this sample 2. the id of the second sentence in this sample 3. the content of the first sentence 4. the content of the second sentence - -For our task, we are interested in the 0th, 3rd and 4th columns. - - -```clojure -(def raw-file - (csv/parse-csv (string/replace (slurp "data/dev.tsv") "\"" "") - :delimiter \tab - :strict true)) - -(def data-train-raw (->> raw-file - (mapv #(vals (select-keys % [3 4 0]))) - (rest) ; drop header - (into []))) - -(def sample (first data-train-raw)) -(println (nth sample 0)) ;;;sentence a -(println (nth sample 1)) ;; sentence b -(println (nth sample 2)) ;; 1 means equivalent, 0 means not equivalent -``` - - He said the foodservice pie business doesn 't fit the company 's long-term growth strategy . - The foodservice pie business does not fit our long-term growth strategy . - 1 - - -To use the pre-trained BERT model, we need to preprocess the data in the same way it was trained. The following figure shows the input representation in BERT: - -![bert-input](https://gluon-nlp.mxnet.io/_images/bert-embed.png) - -We will do pre-processing on the inputs to get them in the right format and to perform the following transformations: -- tokenize the input sequences -- insert [CLS] at the beginning -- insert [SEP] between sentence one and sentence two, and at the end - generate segment ids to indicate whether a token belongs to the first sequence or the second sequence. -- generate valid length - - -```clojure -(defn pre-processing - "Preprocesses the sentences in the format that BERT is expecting" - [ctx idx->token token->idx train-item] - (let [[sentence-a sentence-b label] train-item - ;;; pre-processing tokenize sentence - token-1 (bert-util/tokenize (string/lower-case sentence-a)) - token-2 (bert-util/tokenize (string/lower-case sentence-b)) - valid-length (+ (count token-1) (count token-2)) - ;;; generate token types [0000...1111...0000] - qa-embedded (into (bert-util/pad [] 0 (count token-1)) - (bert-util/pad [] 1 (count token-2))) - token-types (bert-util/pad qa-embedded 0 seq-length) - ;;; make BERT pre-processing standard - token-2 (conj token-2 "[SEP]") - token-1 (into [] (concat ["[CLS]"] token-1 ["[SEP]"] token-2)) - tokens (bert-util/pad token-1 "[PAD]" seq-length) - ;;; pre-processing - token to index translation - indexes (bert-util/tokens->idxs token->idx tokens)] - {:input-batch [indexes - token-types - [valid-length]] - :label (if (= "0" label) - [0] - [1]) - :tokens tokens - :train-item train-item})) - -(def idx->token (:idx->token vocab)) -(def token->idx (:token->idx vocab)) -(def dev (context/default-context)) -(def processed-datas (mapv #(pre-processing dev idx->token token->idx %) data-train-raw)) -(def train-count (count processed-datas)) -(println "Train Count is = " train-count) -(println "[PAD] token id = " (get token->idx "[PAD]")) -(println "[CLS] token id = " (get token->idx "[CLS]")) -(println "[SEP] token id = " (get token->idx "[SEP]")) -(println "token ids = \n"(-> (first processed-datas) :input-batch first)) -(println "segment ids = \n"(-> (first processed-datas) :input-batch second)) -(println "valid length = \n" (-> (first processed-datas) :input-batch last)) -(println "label = \n" (-> (second processed-datas) :label)) - - -``` - - Train Count is = 408 - [PAD] token id = 1 - [CLS] token id = 2 - [SEP] token id = 3 - token ids = - [2 2002 2056 1996 0 11345 2449 2987 0 4906 1996 2194 0 0 3930 5656 0 1012 3 0 1996 0 11345 2449 2515 2025 4906 2256 0 3930 5656 0 1012 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] - segment ids = - [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] - valid length = - [31] - label = - [0] - - -Now that we have all the input-batches for each row, we are going to slice them up column-wise and create NDArray Iterators that we can use in training - - -```clojure -(defn slice-inputs-data - "Each sentence pair had to be processed as a row. This breaks all - the rows up into a column for creating a NDArray" - [processed-datas n] - (->> processed-datas - (mapv #(nth (:input-batch %) n)) - (flatten) - (into []))) - -(def prepared-data {:data0s (slice-inputs-data processed-datas 0) - :data1s (slice-inputs-data processed-datas 1) - :data2s (slice-inputs-data processed-datas 2) - :labels (->> (mapv :label processed-datas) - (flatten) - (into [])) - :train-num (count processed-datas)}) - -(def batch-size 32) - -(def train-data - (let [{:keys [data0s data1s data2s labels train-num]} prepared-data - data-desc0 (mx-io/data-desc {:name "data0" - :shape [train-num seq-length] - :dtype dtype/FLOAT32 - :layout layout/NT}) - data-desc1 (mx-io/data-desc {:name "data1" - :shape [train-num seq-length] - :dtype dtype/FLOAT32 - :layout layout/NT}) - data-desc2 (mx-io/data-desc {:name "data2" - :shape [train-num] - :dtype dtype/FLOAT32 - :layout layout/N}) - label-desc (mx-io/data-desc {:name "softmax_label" - :shape [train-num] - :dtype dtype/FLOAT32 - :layout layout/N})] - (mx-io/ndarray-iter {data-desc0 (ndarray/array data0s [train-num seq-length] - {:ctx dev}) - data-desc1 (ndarray/array data1s [train-num seq-length] - {:ctx dev}) - data-desc2 (ndarray/array data2s [train-num] - {:ctx dev})} - {:label {label-desc (ndarray/array labels [train-num] - {:ctx dev})} - :data-batch-size batch-size}))) -train-data -``` - - - - - #object[org.apache.mxnet.io.NDArrayIter 0x2583097d "non-empty iterator"] - - - -# Fine-tune BERT Model - -Putting everything together, now we can fine-tune the model with a few epochs. For demonstration, we use a fixed learning rate and skip validation steps. - - -```clojure -(def num-epoch 3) - -(def fine-tune-model (m/module model-sym {:contexts [dev] - :data-names ["data0" "data1" "data2"]})) - -(m/fit fine-tune-model {:train-data train-data :num-epoch num-epoch - :fit-params (m/fit-params {:allow-missing true - :arg-params (m/arg-params bert-base) - :aux-params (m/aux-params bert-base) - :optimizer (optimizer/adam {:learning-rate 5e-6 :episilon 1e-9}) - :batch-end-callback (callback/speedometer batch-size 1)})}) - -``` - - Speedometer: epoch 0 count 1 metric [accuracy 0.609375] - Speedometer: epoch 0 count 2 metric [accuracy 0.6041667] - Speedometer: epoch 0 count 3 metric [accuracy 0.5703125] - Speedometer: epoch 0 count 4 metric [accuracy 0.55625] - Speedometer: epoch 0 count 5 metric [accuracy 0.5625] - Speedometer: epoch 0 count 6 metric [accuracy 0.55803573] - Speedometer: epoch 0 count 7 metric [accuracy 0.5625] - Speedometer: epoch 0 count 8 metric [accuracy 0.5798611] - Speedometer: epoch 0 count 9 metric [accuracy 0.584375] - Speedometer: epoch 0 count 10 metric [accuracy 0.57670456] - Speedometer: epoch 0 count 11 metric [accuracy 0.5807292] - Speedometer: epoch 0 count 12 metric [accuracy 0.5793269] - Speedometer: epoch 1 count 1 metric [accuracy 0.5625] - Speedometer: epoch 1 count 2 metric [accuracy 0.5520833] - Speedometer: epoch 1 count 3 metric [accuracy 0.5859375] - Speedometer: epoch 1 count 4 metric [accuracy 0.59375] - Speedometer: epoch 1 count 5 metric [accuracy 0.6145833] - Speedometer: epoch 1 count 6 metric [accuracy 0.625] - Speedometer: epoch 1 count 7 metric [accuracy 0.640625] - Speedometer: epoch 1 count 8 metric [accuracy 0.6527778] - Speedometer: epoch 1 count 9 metric [accuracy 0.653125] - Speedometer: epoch 1 count 10 metric [accuracy 0.6448864] - Speedometer: epoch 1 count 11 metric [accuracy 0.640625] - Speedometer: epoch 1 count 12 metric [accuracy 0.6418269] - Speedometer: epoch 2 count 1 metric [accuracy 0.671875] - Speedometer: epoch 2 count 2 metric [accuracy 0.7083333] - Speedometer: epoch 2 count 3 metric [accuracy 0.7109375] - Speedometer: epoch 2 count 4 metric [accuracy 0.725] - Speedometer: epoch 2 count 5 metric [accuracy 0.7239583] - Speedometer: epoch 2 count 6 metric [accuracy 0.71875] - Speedometer: epoch 2 count 7 metric [accuracy 0.734375] - Speedometer: epoch 2 count 8 metric [accuracy 0.7361111] - Speedometer: epoch 2 count 9 metric [accuracy 0.721875] - Speedometer: epoch 2 count 10 metric [accuracy 0.71022725] - Speedometer: epoch 2 count 11 metric [accuracy 0.6979167] - Speedometer: epoch 2 count 12 metric [accuracy 0.7019231] - - - - - - #object[org.apache.mxnet.module.Module 0x73c42ae5 "org.apache.mxnet.module.Module@73c42ae5"] - - diff --git a/contrib/clojure-package/examples/bert/get_bert_data.sh b/contrib/clojure-package/examples/bert/get_bert_data.sh deleted file mode 100755 index 10ed8e9a1f8e..000000000000 --- a/contrib/clojure-package/examples/bert/get_bert_data.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -e - -data_path=data - -if [ ! -d "$data_path" ]; then - mkdir -p "$data_path" - curl https://s3.us-east-2.amazonaws.com/mxnet-scala/scala-example-ci/BertQA/vocab.json -o $data_path/vocab.json - curl https://s3.us-east-2.amazonaws.com/mxnet-scala/scala-example-ci/BertQA/static_bert_qa-0002.params -o $data_path/static_bert_qa-0002.params - curl https://s3.us-east-2.amazonaws.com/mxnet-scala/scala-example-ci/BertQA/static_bert_qa-symbol.json -o $data_path/static_bert_qa-symbol.json - curl https://s3.us-east-2.amazonaws.com/mxnet-scala/scala-example-ci/BertQA/static_bert_base_net-symbol.json -o $data_path/static_bert_base_net-symbol.json - curl https://s3.us-east-2.amazonaws.com/mxnet-scala/scala-example-ci/BertQA/static_bert_base_net-0000.params -o $data_path/static_bert_base_net-0000.params - curl https://raw.githubusercontent.com/dmlc/gluon-nlp/master/docs/examples/sentence_embedding/dev.tsv -o $data_path/dev.tsv -fi diff --git a/contrib/clojure-package/examples/bert/project.clj b/contrib/clojure-package/examples/bert/project.clj deleted file mode 100644 index b53e0875951f..000000000000 --- a/contrib/clojure-package/examples/bert/project.clj +++ /dev/null @@ -1,32 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - - -(defproject bert "0.1.0-SNAPSHOT" - :description "BERT Examples" - :plugins [[lein-cljfmt "0.5.7"] - ;;; lein-jupyter seems to have some incompatibilities with dependencies with cider - ;;; so if you run into trouble please delete the `lein-juptyter` plugin - [lein-jupyter "0.1.16" :exclusions [org.clojure/tools.nrepl org.clojure/clojure org.codehaus.plexus/plexus-utils org.clojure/tools.reader]]] - :dependencies [[org.clojure/clojure "1.9.0"] - [org.apache.mxnet.contrib.clojure/clojure-mxnet "2.0.0-SNAPSHOT"] - [cheshire "5.8.1"] - [clojure-csv/clojure-csv "2.0.1"]] - :pedantic? :skip - :java-source-paths ["src/java"] - :main bert.infer - :repl-options {:init-ns bert.infer}) diff --git a/contrib/clojure-package/examples/bert/squad-samples.edn b/contrib/clojure-package/examples/bert/squad-samples.edn deleted file mode 100644 index e99a181f7d17..000000000000 --- a/contrib/clojure-package/examples/bert/squad-samples.edn +++ /dev/null @@ -1,39 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - - -[{:input-answer "Computational complexity theory is a branch of the theory of computation in theoretical computer science that focuses on classifying computational problems according to their inherent difficulty, and relating those classes to each other. A computational problem is understood to be a task that is in principle amenable to being solved by a computer, which is equivalent to stating that the problem may be solved by mechanical application of mathematical steps, such as an algorithm." - :input-question "By what main attribute are computational problems classified utilizing computational complexity theory?" - :ground-truth-answers ["Computational complexity theory" - "Computational complexity theory" - "complexity theory"]} - {:input-answer "Steam engines are external combustion engines, where the working fluid is separate from the combustion products. Non-combustion heat sources such as solar power, nuclear power or geothermal energy may be used. The ideal thermodynamic cycle used to analyze this process is called the Rankine cycle. In the cycle, water is heated and transforms into steam within a boiler operating at a high pressure. When expanded through pistons or turbines, mechanical work is done. The reduced-pressure steam is then condensed and pumped back into the boiler." - :input-question "Along with geothermal and nuclear, what is a notable non-combustion heat source?" - :ground-truth-answers ["solar" - "solar power" - "solar power, nuclear power or geothermal energysolar"]} - {:input-answer "In the 1960s, a series of discoveries, the most important of which was seafloor spreading, showed that the Earth's lithosphere, which includes the crust and rigid uppermost portion of the upper mantle, is separated into a number of tectonic plates that move across the plastically deforming, solid, upper mantle, which is called the asthenosphere. There is an intimate coupling between the movement of the plates on the surface and the convection of the mantle: oceanic plate motions and mantle convection currents always move in the same direction, because the oceanic lithosphere is the rigid upper thermal boundary layer of the convecting mantle. This coupling between rigid plates moving on the surface of the Earth and the convecting mantle is called plate tectonics." - :input-question "What was the most important discovery that led to the understanding that Earth's lithosphere is separated into tectonic plates?" - :ground-truth-answers ["seafloor spreading"]} - ;;; totally made up - {:input-answer "Susan had a cat named Sammy when she lived in the green house." - :input-question "What was Susan's cat named?" - :ground-truth-answers ["Sammy" "sammy"]} - ;;; more or less from wikipedia on clojure - {:input-answer "Rich Hickey is the creator of the Clojure language. Before Clojure, he developed dotLisp, a similar project based on the .NET platform, and three earlier attempts to provide interoperability between Lisp and Java: a Java foreign language interface for Common Lisp, A Foreign Object Interface for Lisp, and a Lisp-friendly interface to Java Servlets." - :input-question "Who created Clojure?" - :ground-truth-answers ["rich" "hickey"]}] diff --git a/contrib/clojure-package/examples/bert/src/bert/bert_sentence_classification.clj b/contrib/clojure-package/examples/bert/src/bert/bert_sentence_classification.clj deleted file mode 100644 index 6ec4d586ad17..000000000000 --- a/contrib/clojure-package/examples/bert/src/bert/bert_sentence_classification.clj +++ /dev/null @@ -1,225 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns bert.bert-sentence-classification - "Fine-tuning Sentence Pair Classification with BERT - This tutorial focuses on fine-tuning with the pre-trained BERT model to classify semantically equivalent sentence pairs. - - Specifically, we will: - 1. load the state-of-the-art pre-trained BERT model - 2. attach an additional layer for classification - 3. process and transform sentence pair data for the task at hand - 4. fine-tune BERT model for sentence classification" - (:require [bert.util :as bert-util] - [clojure-csv.core :as csv] - [clojure.string :as string] - [org.apache.clojure-mxnet.callback :as callback] - [org.apache.clojure-mxnet.context :as context] - [org.apache.clojure-mxnet.dtype :as dtype] - [org.apache.clojure-mxnet.infer :as infer] - [org.apache.clojure-mxnet.io :as mx-io] - [org.apache.clojure-mxnet.layout :as layout] - [org.apache.clojure-mxnet.module :as m] - [org.apache.clojure-mxnet.ndarray :as ndarray] - [org.apache.clojure-mxnet.optimizer :as optimizer] - [org.apache.clojure-mxnet.symbol :as sym])) - -;; Pre-trained language representations have been shown to improve -;; many downstream NLP tasks such as question answering, and natural -;; language inference. To apply pre-trained representations to these -;; tasks, there are two strategies: - -;; * feature-based approach, which uses the pre-trained representations as additional features to the downstream task. -;; * fine-tuning based approach, which trains the downstream tasks by fine-tuning pre-trained parameters. - -;; While feature-based approaches such as ELMo are effective in -;; improving many downstream tasks, they require task-specific -;; architectures. Devlin, Jacob, et al proposed BERT (Bidirectional -;; Encoder Representations from Transformers), which fine-tunes deep -;; bidirectional representations on a wide range of tasks with minimal -;; task-specific parameters, and obtained state-of-the-art results. - -(def model-path-prefix "data/static_bert_base_net") - -(def fine-tuned-prefix "fine-tune-sentence-bert") - -;; the maximum length of the sequence -(def seq-length 128) - -(defn pre-processing - "Preprocesses the sentences in the format that BERT is expecting" - [idx->token token->idx train-item] - (let [[sentence-a sentence-b label] train-item - ;; pre-processing tokenize sentence - token-1 (bert-util/tokenize (string/lower-case sentence-a)) - token-2 (bert-util/tokenize (string/lower-case sentence-b)) - valid-length (+ (count token-1) (count token-2)) - ;; generate token types [0000...1111...0000] - qa-embedded (into (bert-util/pad [] 0 (count token-1)) - (bert-util/pad [] 1 (count token-2))) - token-types (bert-util/pad qa-embedded 0 seq-length) - ;; make BERT pre-processing standard - token-2 (conj token-2 "[SEP]") - token-1 (into [] (concat ["[CLS]"] token-1 ["[SEP]"] token-2)) - tokens (bert-util/pad token-1 "[PAD]" seq-length) - ;; pre-processing - token to index translation - indexes (bert-util/tokens->idxs token->idx tokens)] - {:input-batch [indexes - token-types - [valid-length]] - :label (if (= "0" label) - [0] - [1]) - :tokens tokens - :train-item train-item})) - -(defn fine-tune-model - "msymbol: the pretrained network symbol - num-classes: the number of classes for the fine-tune datasets - dropout: The dropout rate amount" - [msymbol {:keys [num-classes dropout]}] - (as-> msymbol data - (sym/dropout {:data data :p dropout}) - (sym/fully-connected "fc-finetune" {:data data :num-hidden num-classes}) - (sym/softmax-output "softmax" {:data data}))) - -(defn slice-inputs-data - "Each sentence pair had to be processed as a row. This breaks all - the rows up into a column for creating a NDArray" - [processed-datas n] - (->> processed-datas - (mapv #(nth (:input-batch %) n)) - (flatten) - (into []))) - -(defn get-raw-data [] - (csv/parse-csv (string/replace (slurp "data/dev.tsv") "\"" "") - :delimiter \tab - :strict true)) - -(defn prepare-data - "This prepares the sentence pairs into NDArrays for use in NDArrayIterator" - [raw-data] - (let [vocab (bert-util/get-vocab) - idx->token (:idx->token vocab) - token->idx (:token->idx vocab) - data-train-raw (->> raw-data - (mapv #(vals (select-keys % [3 4 0]))) - (rest) ; drop header - (into [])) - processed-datas (mapv #(pre-processing idx->token token->idx %) data-train-raw)] - {:data0s (slice-inputs-data processed-datas 0) - :data1s (slice-inputs-data processed-datas 1) - :data2s (slice-inputs-data processed-datas 2) - :labels (->> (mapv :label processed-datas) - (flatten) - (into [])) - :train-num (count processed-datas)})) - -(defn train - "Trains (fine tunes) the sentence pairs for a classification task on the BERT Base model" - [dev num-epoch] - (let [bert-base (m/load-checkpoint {:prefix model-path-prefix :epoch 0}) - model-sym (fine-tune-model (m/symbol bert-base) {:num-classes 2 :dropout 0.1}) - {:keys [data0s data1s data2s labels train-num]} (prepare-data (get-raw-data)) - batch-size 32 - data-desc0 (mx-io/data-desc {:name "data0" - :shape [train-num seq-length] - :dtype dtype/FLOAT32 - :layout layout/NT}) - data-desc1 (mx-io/data-desc {:name "data1" - :shape [train-num seq-length] - :dtype dtype/FLOAT32 - :layout layout/NT}) - data-desc2 (mx-io/data-desc {:name "data2" - :shape [train-num] - :dtype dtype/FLOAT32 - :layout layout/N}) - label-desc (mx-io/data-desc {:name "softmax_label" - :shape [train-num] - :dtype dtype/FLOAT32 - :layout layout/N}) - train-data (mx-io/ndarray-iter {data-desc0 (ndarray/array data0s [train-num seq-length] - {:ctx dev}) - data-desc1 (ndarray/array data1s [train-num seq-length] - {:ctx dev}) - data-desc2 (ndarray/array data2s [train-num] - {:ctx dev})} - {:label {label-desc (ndarray/array labels [train-num] - {:ctx dev})} - :data-batch-size batch-size}) - fitted-model (m/fit (m/module model-sym {:contexts [dev] - :data-names ["data0" "data1" "data2"]}) - {:train-data train-data :num-epoch num-epoch - :fit-params (m/fit-params {:allow-missing true - :arg-params (m/arg-params bert-base) - :aux-params (m/aux-params bert-base) - :optimizer (optimizer/adam {:learning-rate 5e-6 :epsilon 1e-9}) - :batch-end-callback (callback/speedometer batch-size 1)})})] - (m/save-checkpoint fitted-model {:prefix fine-tuned-prefix :epoch num-epoch}) - fitted-model)) - -(defn -main [& args] - (let [[dev-arg num-epoch-arg] args - dev (if (= dev-arg ":gpu") (context/gpu) (context/cpu)) - num-epoch (if num-epoch-arg (Integer/parseInt num-epoch-arg) 3)] - (println "Running example with " dev " and " num-epoch " epochs ") - (train dev num-epoch))) - -;; For evaluating the model -(defn predict-equivalence - "Get the fine-tuned model's opinion on whether two sentences are equivalent:" - [predictor sentence1 sentence2] - (let [vocab (bert.util/get-vocab) - processed-test-data (mapv #(pre-processing (:idx->token vocab) - (:token->idx vocab) %) - [[sentence1 sentence2]]) - prediction (infer/predict-with-ndarray predictor - [(ndarray/array (slice-inputs-data processed-test-data 0) [1 seq-length]) - (ndarray/array (slice-inputs-data processed-test-data 1) [1 seq-length]) - (ndarray/array (slice-inputs-data processed-test-data 2) [1])])] - (ndarray/->vec (first prediction)))) - -(comment - - (train (context/cpu 0) 3) - - (m/save-checkpoint model {:prefix fine-tuned-prefix :epoch 3}) - - - ;;;; Explore results from the fine-tuned model - - ;; We need a predictor with a batch size of 1, so we can feed the - ;; model a single sentence pair. - (def fine-tuned-predictor - (infer/create-predictor (infer/model-factory fine-tuned-prefix - [{:name "data0" :shape [1 seq-length] :dtype dtype/FLOAT32 :layout layout/NT} - {:name "data1" :shape [1 seq-length] :dtype dtype/FLOAT32 :layout layout/NT} - {:name "data2" :shape [1] :dtype dtype/FLOAT32 :layout layout/N}]) - {:epoch 3})) - - ;; Modify an existing sentence pair to test: - ;; ["1" - ;; "69773" - ;; "69792" - ;; "Cisco pared spending to compensate for sluggish sales ." - ;; "In response to sluggish sales , Cisco pared spending ."] - (predict-equivalence fine-tuned-predictor - "The company cut spending to compensate for weak sales ." - "In response to poor sales results, the company cut spending .") - - ) diff --git a/contrib/clojure-package/examples/bert/src/bert/infer.clj b/contrib/clojure-package/examples/bert/src/bert/infer.clj deleted file mode 100644 index 2a08dab36f85..000000000000 --- a/contrib/clojure-package/examples/bert/src/bert/infer.clj +++ /dev/null @@ -1,129 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - - - -(ns bert.infer - (:require [bert.util :as bert-util] - [clojure.pprint :as pprint] - [clojure.string :as string] - [org.apache.clojure-mxnet.context :as context] - [org.apache.clojure-mxnet.dtype :as dtype] - [org.apache.clojure-mxnet.infer :as infer] - [org.apache.clojure-mxnet.layout :as layout] - [org.apache.clojure-mxnet.ndarray :as ndarray] - [org.apache.clojure-mxnet.util :as util])) - -(def model-path-prefix "data/static_bert_qa") -;; epoch number of the model -(def epoch 2) -;; the maximum length of the sequence -(def seq-length 384) - -;;; data helpers - -(defn post-processing [result tokens] - (let [output1 (ndarray/slice-axis result 2 0 1) - output2 (ndarray/slice-axis result 2 1 2) - ;;; get the formatted logits result - start-logits (ndarray/reshape output1 [0 -3]) - end-logits (ndarray/reshape output2 [0 -3]) - start-prob (ndarray/softmax start-logits) - end-prob (ndarray/softmax end-logits) - start-idx (-> (ndarray/argmax start-prob 1) - (ndarray/->vec) - (first)) - end-idx (-> (ndarray/argmax end-prob 1) - (ndarray/->vec) - (first))] - (if (> end-idx start-idx) - (subvec tokens start-idx (inc end-idx)) - (subvec tokens end-idx (inc end-idx))))) - -(defn make-predictor [ctx] - (let [input-descs [{:name "data0" - :shape [1 seq-length] - :dtype dtype/FLOAT32 - :layout layout/NT} - {:name "data1" - :shape [1 seq-length] - :dtype dtype/FLOAT32 - :layout layout/NT} - {:name "data2" - :shape [1] - :dtype dtype/FLOAT32 - :layout layout/N}] - factory (infer/model-factory model-path-prefix input-descs)] - (infer/create-predictor - factory - {:contexts [ctx] - :epoch epoch}))) - -(defn pre-processing [ctx idx->token token->idx qa-map] - (let [{:keys [input-question input-answer ground-truth-answers]} qa-map - ;;; pre-processing tokenize sentence - token-q (bert-util/tokenize (string/lower-case input-question)) - token-a (bert-util/tokenize (string/lower-case input-answer)) - valid-length (+ (count token-q) (count token-a)) - ;;; generate token types [0000...1111...0000] - qa-embedded (into (bert-util/pad [] 0 (count token-q)) - (bert-util/pad [] 1 (count token-a))) - token-types (bert-util/pad qa-embedded 0 seq-length) - ;;; make BERT pre-processing standard - token-a (conj token-a "[SEP]") - token-q (into [] (concat ["[CLS]"] token-q ["[SEP]"] token-a)) - tokens (bert-util/pad token-q "[PAD]" seq-length) - ;;; pre-processing - token to index translation - - indexes (bert-util/tokens->idxs token->idx tokens)] - {:input-batch [(ndarray/array indexes [1 seq-length] {:context ctx}) - (ndarray/array token-types [1 seq-length] {:context ctx}) - (ndarray/array [valid-length] [1] {:context ctx})] - :tokens tokens - :qa-map qa-map})) - -(defn infer - ([] (infer (context/default-context))) - ([ctx] - (let [predictor (make-predictor ctx) - {:keys [idx->token token->idx]} (bert-util/get-vocab) - ;;; samples taken from https://rajpurkar.github.io/SQuAD-explorer/explore/v2.0/dev/ - question-answers (clojure.edn/read-string (slurp "squad-samples.edn"))] - (doseq [qa-map question-answers] - (let [{:keys [input-batch tokens qa-map]} (pre-processing ctx idx->token token->idx qa-map) - result (first (infer/predict-with-ndarray predictor input-batch)) - answer (post-processing result tokens)] - (println "===============================") - (println " Question Answer Data") - (pprint/pprint qa-map) - (println) - (println " Predicted Answer: " answer) - (println "===============================")))))) - -(defn -main [& args] - (let [[dev] args] - (if (= dev ":gpu") - (infer (context/gpu)) - (infer (context/cpu))))) - -(comment - - (infer) - - (infer (context/gpu)) - - ) diff --git a/contrib/clojure-package/examples/bert/src/bert/util.clj b/contrib/clojure-package/examples/bert/src/bert/util.clj deleted file mode 100644 index 061e12b4e8de..000000000000 --- a/contrib/clojure-package/examples/bert/src/bert/util.clj +++ /dev/null @@ -1,52 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns bert.util - (:require [clojure.java.io :as io] - [clojure.string :as string] - [cheshire.core :as json])) - -(defn break-out-punctuation [s str-match] - (->> (string/split (str s "") (re-pattern (str "\\" str-match))) - (map #(string/replace % "" str-match)))) - -(defn break-out-punctuations [s] - (if-let [target-char (first (re-seq #"[.,?!]" s))] - (break-out-punctuation s target-char) - [s])) - -(defn tokenize [s] - (->> (string/split s #"\s+") - (mapcat break-out-punctuations) - (into []))) - -(defn pad [tokens pad-item num] - (if (>= (count tokens) num) - tokens - (into tokens (repeat (- num (count tokens)) pad-item)))) - -(defn get-vocab [] - (let [vocab (json/parse-stream (io/reader "data/vocab.json"))] - {:idx->token (get vocab "idx_to_token") - :token->idx (get vocab "token_to_idx")})) - -(defn tokens->idxs [token->idx tokens] - (let [unk-idx (get token->idx "[UNK]")] - (mapv #(get token->idx % unk-idx) tokens))) - -(defn idxs->tokens [idx->token idxs] - (mapv #(get idx->token %) idxs)) diff --git a/contrib/clojure-package/examples/bert/test/bert/bert_sentence_classification_test.clj b/contrib/clojure-package/examples/bert/test/bert/bert_sentence_classification_test.clj deleted file mode 100644 index c26301e34fe6..000000000000 --- a/contrib/clojure-package/examples/bert/test/bert/bert_sentence_classification_test.clj +++ /dev/null @@ -1,104 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - - -(ns bert.bert-sentence-classification-test - (:require [bert.bert-sentence-classification :refer :all] - [clojure-csv.core :as csv] - [clojure.java.io :as io] - [clojure.java.shell :refer [sh]] - [clojure.test :refer :all] - [org.apache.clojure-mxnet.callback :as callback] - [org.apache.clojure-mxnet.context :as context] - [org.apache.clojure-mxnet.dtype :as dtype] - [org.apache.clojure-mxnet.eval-metric :as eval-metric] - [org.apache.clojure-mxnet.infer :as infer] - [org.apache.clojure-mxnet.io :as mx-io] - [org.apache.clojure-mxnet.layout :as layout] - [org.apache.clojure-mxnet.ndarray :as ndarray] - [org.apache.clojure-mxnet.optimizer :as optimizer] - [org.apache.clojure-mxnet.module :as m])) - -(def model-dir "data/") - -(def test-prefix "test-fine-tuning-bert-sentence-pairs") - -(when-not (.exists (io/file (str model-dir "static_bert_qa-0002.params"))) - (println "Downloading bert qa data") - (sh "./get_bert_data.sh")) - -(defn get-slim-raw-data [] - (take 32 (csv/parse-csv (slurp "data/dev.tsv") :delimiter \tab))) - -(deftest train-test - (with-redefs [get-raw-data get-slim-raw-data] - (let [dev (context/default-context) - num-epoch 1 - bert-base (m/load-checkpoint {:prefix model-path-prefix :epoch 0}) - model-sym (fine-tune-model (m/symbol bert-base) {:num-classes 2 :dropout 0.1}) - {:keys [data0s data1s data2s labels train-num]} (prepare-data (get-raw-data)) - batch-size 32 - data-desc0 (mx-io/data-desc {:name "data0" - :shape [train-num seq-length] - :dtype dtype/FLOAT32 - :layout layout/NT}) - data-desc1 (mx-io/data-desc {:name "data1" - :shape [train-num seq-length] - :dtype dtype/FLOAT32 - :layout layout/NT}) - data-desc2 (mx-io/data-desc {:name "data2" - :shape [train-num] - :dtype dtype/FLOAT32 - :layout layout/N}) - label-desc (mx-io/data-desc {:name "softmax_label" - :shape [train-num] - :dtype dtype/FLOAT32 - :layout layout/N}) - train-data (mx-io/ndarray-iter {data-desc0 (ndarray/array data0s [train-num seq-length] - {:ctx dev}) - data-desc1 (ndarray/array data1s [train-num seq-length] - {:ctx dev}) - data-desc2 (ndarray/array data2s [train-num] - {:ctx dev})} - {:label {label-desc (ndarray/array labels [train-num] - {:ctx dev})} - :data-batch-size batch-size}) - model (m/module model-sym {:contexts [dev] - :data-names ["data0" "data1" "data2"]})] - (m/fit model {:train-data train-data :num-epoch num-epoch - :fit-params (m/fit-params {:allow-missing true - :arg-params (m/arg-params bert-base) - :aux-params (m/aux-params bert-base) - :optimizer (optimizer/adam {:learning-rate 5e-6 :episilon 1e-9}) - :batch-end-callback (callback/speedometer batch-size 1)})}) - (m/save-checkpoint model {:prefix test-prefix :epoch num-epoch}) - (testing "accuracy" - (is (< 0.5 (last (m/score model {:eval-data train-data :eval-metric (eval-metric/accuracy)}))))) - (testing "prediction" - (let [test-predictor (infer/create-predictor (infer/model-factory test-prefix - [{:name "data0" :shape [1 seq-length] :dtype dtype/FLOAT32 :layout layout/NT} - {:name "data1" :shape [1 seq-length] :dtype dtype/FLOAT32 :layout layout/NT} - {:name "data2" :shape [1] :dtype dtype/FLOAT32 :layout layout/N}]) - {:epoch num-epoch}) - prediction (predict-equivalence test-predictor - "The company cut spending to compensate for weak sales ." - "In response to poor sales results, the company cut spending .")] - ;; We can't say much about how the model will find this prediction, so we test only the prediction's shape. - (is (vector? prediction)) - (is (number? (first prediction))) - (is (number? (second prediction))) - (is (= 2 (count prediction)))))))) diff --git a/contrib/clojure-package/examples/bert/test/bert/infer_test.clj b/contrib/clojure-package/examples/bert/test/bert/infer_test.clj deleted file mode 100644 index 48ee3a89b177..000000000000 --- a/contrib/clojure-package/examples/bert/test/bert/infer_test.clj +++ /dev/null @@ -1,43 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - - -(ns bert.infer-test - (:require [bert.infer :refer :all] - [bert.util :as util] - [clojure.java.io :as io] - [clojure.java.shell :refer [sh]] - [clojure.test :refer :all] - [org.apache.clojure-mxnet.context :as context] - [org.apache.clojure-mxnet.infer :as infer])) - -(def model-dir "data/") - -(when-not (.exists (io/file (str model-dir "static_bert_qa-0002.params"))) - (println "Downloading bert qa data") - (sh "./get_bert_data.sh")) - -(deftest infer-test - (let [ctx (context/default-context) - predictor (make-predictor ctx) - {:keys [idx->token token->idx]} (util/get-vocab) - ;;; samples taken from https://rajpurkar.github.io/SQuAD-explorer/explore/v2.0/dev/ - question-answers (clojure.edn/read-string (slurp "squad-samples.edn"))] - (let [qa-map (last question-answers) - {:keys [input-batch tokens qa-map]} (pre-processing ctx idx->token token->idx qa-map) - result (first (infer/predict-with-ndarray predictor input-batch))] - (is (= ["rich" "hickey"] (post-processing result tokens)))))) diff --git a/contrib/clojure-package/examples/captcha/.gitignore b/contrib/clojure-package/examples/captcha/.gitignore deleted file mode 100644 index e1569bd89020..000000000000 --- a/contrib/clojure-package/examples/captcha/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -/.lein-* -/.nrepl-port -images/* diff --git a/contrib/clojure-package/examples/captcha/README.md b/contrib/clojure-package/examples/captcha/README.md deleted file mode 100644 index be71e07b2120..000000000000 --- a/contrib/clojure-package/examples/captcha/README.md +++ /dev/null @@ -1,78 +0,0 @@ - - - - - - - - - - - - - - - - - -# Captcha - -This is the clojure version of [captcha recognition](https://github.com/xlvector/learning-dl/tree/master/mxnet/ocr) -example by xlvector and mirrors the R captcha example. It can be used as an -example of multi-label training. For the following captcha example, we consider it as an -image with 4 labels and train a CNN over the data set. - -![captcha example](captcha_example.png) - -## Installation - -Before you run this example, make sure that you have the clojure package -installed. In the main clojure package directory, do `lein install`. -Then you can run `lein install` in this directory. - -## Usage - -### Training - -First the OCR model needs to be trained based on [labeled data](https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/data/captcha_example.zip). -The training can be started using the following: -``` -$ lein train [:cpu|:gpu] [num-devices] -``` -This downloads the training/evaluation data using the `get_data.sh` script -before starting training. - -It is possible that you will encounter some out-of-memory issues while training using :gpu on Ubuntu -linux (18.04). However, the command `lein train` (training on one CPU) may resolve the issue. - -The training runs for 10 iterations by default and saves the model with the -prefix `ocr-`. The model achieved an exact match accuracy of ~0.954 and -~0.628 on training and validation data respectively. - -### Inference - -Once the model has been saved, it can be used for prediction. This can be done -by running: -``` -$ lein infer -INFO MXNetJVM: Try loading mxnet-scala from native path. -INFO MXNetJVM: Try loading mxnet-scala-linux-x86_64-gpu from native path. -INFO MXNetJVM: Try loading mxnet-scala-linux-x86_64-cpu from native path. -WARN MXNetJVM: MXNet Scala native library not found in path. Copying native library from the archive. Consider installing the library somewhere in the path (for Windows: PATH, for Linux: LD_LIBRARY_PATH), or specifying by Java cmd option -Djava.library.path=[lib path]. -WARN org.apache.mxnet.DataDesc: Found Undefined Layout, will use default index 0 for batch axis -INFO org.apache.mxnet.infer.Predictor: Latency increased due to batchSize mismatch 8 vs 1 -WARN org.apache.mxnet.DataDesc: Found Undefined Layout, will use default index 0 for batch axis -WARN org.apache.mxnet.DataDesc: Found Undefined Layout, will use default index 0 for batch axis -CAPTCHA output: 6643 -INFO org.apache.mxnet.util.NativeLibraryLoader: Deleting /tmp/mxnet6045308279291774865/libmxnet.so -INFO org.apache.mxnet.util.NativeLibraryLoader: Deleting /tmp/mxnet6045308279291774865/mxnet-scala -INFO org.apache.mxnet.util.NativeLibraryLoader: Deleting /tmp/mxnet6045308279291774865 -``` -The model runs on `captcha_example.png` by default. - -It can be run on other generated captcha images as well. The script -`gen_captcha.py` generates random captcha images for length 4. -Before running the python script, you will need to install the [captcha](https://pypi.org/project/captcha/) -library using `pip3 install --user captcha`. The captcha images are generated -in the `images/` folder and we can run the prediction using -`lein infer images/7534.png`. diff --git a/contrib/clojure-package/examples/captcha/captcha_example.png b/contrib/clojure-package/examples/captcha/captcha_example.png deleted file mode 100644 index 09b84f7190fa..000000000000 Binary files a/contrib/clojure-package/examples/captcha/captcha_example.png and /dev/null differ diff --git a/contrib/clojure-package/examples/captcha/gen_captcha.py b/contrib/clojure-package/examples/captcha/gen_captcha.py deleted file mode 100644 index 43e0d26fb961..000000000000 --- a/contrib/clojure-package/examples/captcha/gen_captcha.py +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python3 - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from captcha.image import ImageCaptcha -import os -import random - -length = 4 -width = 160 -height = 60 -IMAGE_DIR = "images" - - -def random_text(): - return ''.join(str(random.randint(0, 9)) - for _ in range(length)) - - -if __name__ == '__main__': - image = ImageCaptcha(width=width, height=height) - captcha_text = random_text() - if not os.path.exists(IMAGE_DIR): - os.makedirs(IMAGE_DIR) - image.write(captcha_text, os.path.join(IMAGE_DIR, captcha_text + ".png")) diff --git a/contrib/clojure-package/examples/captcha/get_data.sh b/contrib/clojure-package/examples/captcha/get_data.sh deleted file mode 100755 index baa7f9eb818f..000000000000 --- a/contrib/clojure-package/examples/captcha/get_data.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -evx - -EXAMPLE_ROOT=$(cd "$(dirname $0)"; pwd) - -data_path=$EXAMPLE_ROOT - -if [ ! -f "$data_path/captcha_example.zip" ]; then - wget https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/data/captcha_example.zip -P $data_path -fi - -if [ ! -f "$data_path/captcha_example/captcha_train.rec" ]; then - unzip $data_path/captcha_example.zip -d $data_path -fi diff --git a/contrib/clojure-package/examples/captcha/project.clj b/contrib/clojure-package/examples/captcha/project.clj deleted file mode 100644 index 7bc862c29e3d..000000000000 --- a/contrib/clojure-package/examples/captcha/project.clj +++ /dev/null @@ -1,28 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(defproject captcha "0.1.0-SNAPSHOT" - :description "Captcha recognition via multi-label classification" - :plugins [[lein-cljfmt "0.5.7"]] - :dependencies [[org.clojure/clojure "1.9.0"] - [org.apache.mxnet.contrib.clojure/clojure-mxnet "2.0.0-SNAPSHOT"]] - :main ^:skip-aot captcha.train-ocr - :profiles {:train {:main captcha.train-ocr} - :infer {:main captcha.infer-ocr} - :uberjar {:aot :all}} - :aliases {"train" ["with-profile" "train" "run"] - "infer" ["with-profile" "infer" "run"]}) diff --git a/contrib/clojure-package/examples/captcha/src/captcha/consts.clj b/contrib/clojure-package/examples/captcha/src/captcha/consts.clj deleted file mode 100644 index 318e0d806873..000000000000 --- a/contrib/clojure-package/examples/captcha/src/captcha/consts.clj +++ /dev/null @@ -1,27 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns captcha.consts) - -(def batch-size 8) -(def channels 3) -(def height 30) -(def width 80) -(def data-shape [channels height width]) -(def num-labels 10) -(def label-width 4) -(def model-prefix "ocr") diff --git a/contrib/clojure-package/examples/captcha/src/captcha/infer_ocr.clj b/contrib/clojure-package/examples/captcha/src/captcha/infer_ocr.clj deleted file mode 100644 index f6a648e9867b..000000000000 --- a/contrib/clojure-package/examples/captcha/src/captcha/infer_ocr.clj +++ /dev/null @@ -1,56 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns captcha.infer-ocr - (:require [captcha.consts :refer :all] - [org.apache.clojure-mxnet.dtype :as dtype] - [org.apache.clojure-mxnet.infer :as infer] - [org.apache.clojure-mxnet.layout :as layout] - [org.apache.clojure-mxnet.ndarray :as ndarray])) - -(defn create-predictor - [] - (let [data-desc {:name "data" - :shape [batch-size channels height width] - :layout layout/NCHW - :dtype dtype/FLOAT32} - label-desc {:name "label" - :shape [batch-size label-width] - :layout layout/NT - :dtype dtype/FLOAT32} - factory (infer/model-factory model-prefix - [data-desc label-desc])] - (infer/create-predictor factory))) - -(defn -main - [& args] - (let [[filename] args - image-fname (or filename "captcha_example.png") - image-ndarray (-> image-fname - infer/load-image-from-file - (infer/reshape-image width height) - (infer/buffered-image-to-pixels [channels height width]) - (ndarray/expand-dims 0)) - label-ndarray (ndarray/zeros [1 label-width]) - predictor (create-predictor) - predictions (-> (infer/predict-with-ndarray - predictor - [image-ndarray label-ndarray]) - first - (ndarray/argmax 1) - ndarray/->vec)] - (println "CAPTCHA output:" (apply str (mapv int predictions))))) diff --git a/contrib/clojure-package/examples/captcha/src/captcha/train_ocr.clj b/contrib/clojure-package/examples/captcha/src/captcha/train_ocr.clj deleted file mode 100644 index 91ec2fff3af7..000000000000 --- a/contrib/clojure-package/examples/captcha/src/captcha/train_ocr.clj +++ /dev/null @@ -1,156 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns captcha.train-ocr - (:require [captcha.consts :refer :all] - [clojure.java.io :as io] - [clojure.java.shell :refer [sh]] - [org.apache.clojure-mxnet.callback :as callback] - [org.apache.clojure-mxnet.context :as context] - [org.apache.clojure-mxnet.eval-metric :as eval-metric] - [org.apache.clojure-mxnet.initializer :as initializer] - [org.apache.clojure-mxnet.io :as mx-io] - [org.apache.clojure-mxnet.module :as m] - [org.apache.clojure-mxnet.ndarray :as ndarray] - [org.apache.clojure-mxnet.optimizer :as optimizer] - [org.apache.clojure-mxnet.symbol :as sym]) - (:gen-class)) - -(when-not (.exists (io/file "captcha_example/captcha_train.lst")) - (sh "./get_data.sh")) - -(defonce train-data - (mx-io/image-record-iter {:path-imgrec "captcha_example/captcha_train.rec" - :path-imglist "captcha_example/captcha_train.lst" - :batch-size batch-size - :label-width label-width - :data-shape data-shape - :shuffle true - :seed 42})) - -(defonce eval-data - (mx-io/image-record-iter {:path-imgrec "captcha_example/captcha_test.rec" - :path-imglist "captcha_example/captcha_test.lst" - :batch-size batch-size - :label-width label-width - :data-shape data-shape})) - -(defn accuracy - [label pred & {:keys [by-character] - :or {by-character false} :as opts}] - (let [[nr nc] (ndarray/shape-vec label) - pred-context (ndarray/context pred) - label-t (-> label - ndarray/transpose - (ndarray/reshape [-1]) - (ndarray/as-in-context pred-context)) - pred-label (ndarray/argmax pred 1) - matches (ndarray/equal label-t pred-label) - [digit-matches] (-> matches - ndarray/sum - ndarray/->vec) - [complete-matches] (-> matches - (ndarray/reshape [nc nr]) - (ndarray/sum 0) - (ndarray/equal label-width) - ndarray/sum - ndarray/->vec)] - (if by-character - (float (/ digit-matches nr nc)) - (float (/ complete-matches nr))))) - -(defn get-data-symbol - [] - (let [data (sym/variable "data") - ;; normalize the input pixels - scaled (sym/div (sym/- data 127) 128) - - conv1 (sym/convolution {:data scaled :kernel [5 5] :num-filter 32}) - pool1 (sym/pooling {:data conv1 :pool-type "max" :kernel [2 2] :stride [1 1]}) - relu1 (sym/activation {:data pool1 :act-type "relu"}) - - conv2 (sym/convolution {:data relu1 :kernel [5 5] :num-filter 32}) - pool2 (sym/pooling {:data conv2 :pool-type "avg" :kernel [2 2] :stride [1 1]}) - relu2 (sym/activation {:data pool2 :act-type "relu"}) - - conv3 (sym/convolution {:data relu2 :kernel [3 3] :num-filter 32}) - pool3 (sym/pooling {:data conv3 :pool-type "avg" :kernel [2 2] :stride [1 1]}) - relu3 (sym/activation {:data pool3 :act-type "relu"}) - - conv4 (sym/convolution {:data relu3 :kernel [3 3] :num-filter 32}) - pool4 (sym/pooling {:data conv4 :pool-type "avg" :kernel [2 2] :stride [1 1]}) - relu4 (sym/activation {:data pool4 :act-type "relu"}) - - flattened (sym/flatten {:data relu4}) - fc1 (sym/fully-connected {:data flattened :num-hidden 256}) - fc21 (sym/fully-connected {:data fc1 :num-hidden num-labels}) - fc22 (sym/fully-connected {:data fc1 :num-hidden num-labels}) - fc23 (sym/fully-connected {:data fc1 :num-hidden num-labels}) - fc24 (sym/fully-connected {:data fc1 :num-hidden num-labels})] - (sym/concat "concat" nil [fc21 fc22 fc23 fc24] {:dim 0}))) - -(defn get-label-symbol - [] - (as-> (sym/variable "label") label - (sym/transpose {:data label}) - (sym/reshape {:data label :shape [-1]}))) - -(defn create-captcha-net - [] - (let [scores (get-data-symbol) - labels (get-label-symbol)] - (sym/softmax-output {:data scores :label labels}))) - -(def optimizer - (optimizer/adam - {:learning-rate 0.0002 - :wd 0.00001 - :clip-gradient 10})) - -(defn train-ocr - [devs] - (println "Starting the captcha training ...") - (let [model (m/module - (create-captcha-net) - {:data-names ["data"] :label-names ["label"] - :contexts devs})] - (m/fit model {:train-data train-data - :eval-data eval-data - :num-epoch 10 - :fit-params (m/fit-params - {:kvstore "local" - :batch-end-callback - (callback/speedometer batch-size 100) - :initializer - (initializer/xavier {:factor-type "in" - :magnitude 2.34}) - :optimizer optimizer - :eval-metric (eval-metric/custom-metric - #(accuracy %1 %2) - "accuracy")})}) - (println "Finished the fit") - model)) - -(defn -main - [& args] - (let [[dev dev-num] args - num-devices (Integer/parseInt (or dev-num "1")) - devs (if (= dev ":gpu") - (mapv #(context/gpu %) (range num-devices)) - (mapv #(context/cpu %) (range num-devices))) - model (train-ocr devs)] - (m/save-checkpoint model {:prefix model-prefix :epoch 0}))) diff --git a/contrib/clojure-package/examples/captcha/test/captcha/train_ocr_test.clj b/contrib/clojure-package/examples/captcha/test/captcha/train_ocr_test.clj deleted file mode 100644 index ab785f7fedf2..000000000000 --- a/contrib/clojure-package/examples/captcha/test/captcha/train_ocr_test.clj +++ /dev/null @@ -1,119 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns captcha.train-ocr-test - (:require [clojure.test :refer :all] - [captcha.consts :refer :all] - [captcha.train-ocr :refer :all] - [org.apache.clojure-mxnet.io :as mx-io] - [org.apache.clojure-mxnet.module :as m] - [org.apache.clojure-mxnet.ndarray :as ndarray] - [org.apache.clojure-mxnet.shape :as shape] - [org.apache.clojure-mxnet.util :as util])) - -(deftest test-consts - (is (= 8 batch-size)) - (is (= [3 30 80] data-shape)) - (is (= 4 label-width)) - (is (= 10 num-labels))) - -(deftest test-labeled-data - (let [train-batch (mx-io/next train-data) - eval-batch (mx-io/next eval-data) - allowed-labels (into #{} (map float (range 10)))] - (is (= 8 (-> train-batch mx-io/batch-index count))) - (is (= 8 (-> eval-batch mx-io/batch-index count))) - (is (= [8 3 30 80] (-> train-batch - mx-io/batch-data - first - ndarray/shape-vec))) - (is (= [8 3 30 80] (-> eval-batch - mx-io/batch-data - first - ndarray/shape-vec))) - (is (every? #(<= 0 % 255) (-> train-batch - mx-io/batch-data - first - ndarray/->vec))) - (is (every? #(<= 0 % 255) (-> eval-batch - mx-io/batch-data - first - ndarray/->vec))) - (is (= [8 4] (-> train-batch - mx-io/batch-label - first - ndarray/shape-vec))) - (is (= [8 4] (-> eval-batch - mx-io/batch-label - first - ndarray/shape-vec))) - (is (every? allowed-labels (-> train-batch - mx-io/batch-label - first - ndarray/->vec))) - (is (every? allowed-labels (-> eval-batch - mx-io/batch-label - first - ndarray/->vec))))) - -(deftest test-model - (let [batch (mx-io/next train-data) - model (m/module (create-captcha-net) - {:data-names ["data"] :label-names ["label"]}) - _ (m/bind model - {:data-shapes (mx-io/provide-data-desc train-data) - :label-shapes (mx-io/provide-label-desc train-data)}) - _ (m/init-params model) - _ (m/forward-backward model batch) - output-shapes (-> model - m/output-shapes - util/coerce-return-recursive) - outputs (-> model - m/outputs-merged - first) - grads (->> model m/grad-arrays (map first))] - (is (= [["softmaxoutput0_output" (shape/->shape [8 10])]] - output-shapes)) - (is (= [32 10] (-> outputs ndarray/shape-vec))) - (is (every? #(<= 0.0 % 1.0) (-> outputs ndarray/->vec))) - (is (= [[32 3 5 5] [32] ; convolution1 weights+bias - [32 32 5 5] [32] ; convolution2 weights+bias - [32 32 3 3] [32] ; convolution3 weights+bias - [32 32 3 3] [32] ; convolution4 weights+bias - [256 28672] [256] ; fully-connected1 weights+bias - [10 256] [10] ; 1st label scores - [10 256] [10] ; 2nd label scores - [10 256] [10] ; 3rd label scores - [10 256] [10]] ; 4th label scores - (map ndarray/shape-vec grads))))) - -(deftest test-accuracy - (let [labels (ndarray/array [1 2 3 4, - 5 6 7 8] - [2 4]) - pred-labels (ndarray/array [1 0, - 2 6, - 3 0, - 4 8] - [8]) - preds (ndarray/one-hot pred-labels 10)] - (is (float? (accuracy labels preds))) - (is (float? (accuracy labels preds :by-character false))) - (is (float? (accuracy labels preds :by-character true))) - (is (= 0.5 (accuracy labels preds))) - (is (= 0.5 (accuracy labels preds :by-character false))) - (is (= 0.75 (accuracy labels preds :by-character true))))) diff --git a/contrib/clojure-package/examples/cnn-text-classification/.gitignore b/contrib/clojure-package/examples/cnn-text-classification/.gitignore deleted file mode 100644 index c53038ec0e3d..000000000000 --- a/contrib/clojure-package/examples/cnn-text-classification/.gitignore +++ /dev/null @@ -1,11 +0,0 @@ -/target -/classes -/checkouts -pom.xml -pom.xml.asc -*.jar -*.class -/.lein-* -/.nrepl-port -.hgignore -.hg/ diff --git a/contrib/clojure-package/examples/cnn-text-classification/README.md b/contrib/clojure-package/examples/cnn-text-classification/README.md deleted file mode 100644 index 152ee4f10189..000000000000 --- a/contrib/clojure-package/examples/cnn-text-classification/README.md +++ /dev/null @@ -1,97 +0,0 @@ - - - - - - - - - - - - - - - - - -# cnn-text-classification - -An example of text classification using CNN - -To use you must download the MR polarity dataset and put it in the path specified in the mr-dataset-path -The dataset can be obtained here: [CNN_sentence](https://github.com/yoonkim/CNN_sentence). The two files `rt-polarity.neg` -and `rt-polarity.pos` must be put in a directory. For example, `data/mr-data/rt-polarity.neg`. - -You also must download the glove word embeddings. The suggested one to use is the smaller 50 dimension one -`glove.6B.50d.txt` which is contained in the download file here: [GloVe](https://nlp.stanford.edu/projects/glove/) - - -## Installation - -Before you run this example, make sure that you have the clojure package installed. -In the main clojure package directory, do `lein install`. Then you can run -`lein install` in this directory. - -## Usage - -You can run through the repl with -`(train-convnet {:devs [(context/default-context)] :embedding-size 50 :batch-size 100 :test-size 100 :num-epoch 10 :max-examples 1000 :pretrained-embedding :glove})` -or -`JVM_OPTS="-Xmx1g" lein run` (cpu) - -You can control the devices you run on by doing: - -`lein run :cpu 2` - This will run on 2 cpu devices -`lein run :gpu 1` - This will run on 1 gpu device -`lein run :gpu 2` - This will run on 2 gpu devices - - -The max-examples only loads 1000 each of the dataset to keep the time and memory down. To run all the examples, -change the main to be (train-convnet {:embedding-size 50 :batch-size 100 :test-size 1000 :num-epoch 10 :pretrained-embedding :glove}) - -and then run - -- `lein uberjar` -- `java -Xms1024m -Xmx2048m -jar target/cnn-text-classification-0.1.0-SNAPSHOT-standalone.jar` - -## Usage with fastText - -Using fastText instead of glove is fairly straightforward, as the pretrained embedding format is very similar. - -Download the 'Simple English' pretrained wiki word vectors (text) from the fastText -[site](https://fasttext.cc/docs/en/pretrained-vectors.html) and place them in the -`data/fasttext` directory. Alternatively just run `./get_fasttext_data.sh`. - -Then you can run training on a subset of examples through the repl using: -``` -(train-convnet {:devs [(context/default-context)] :embedding-size 300 :batch-size 100 :test-size 100 :num-epoch 10 :max-examples 1000 :pretrained-embedding :fasttext}) -``` - -Expect a validation accuracy of `~0.67` with the above parameters. - -## Usage with word2vec - -You can also use word2vec embeddings in order to train the text classification model. -Before training, you will need to download [GoogleNews-vectors-negative300.bin](https://drive.google.com/file/d/0B7XkCwpI5KDYNlNUTTlSS21pQmM/edit?usp=sharing) first. -Once you've downloaded the embeddings (which are in a gzipped format), -you'll need to unzip them and place them in the `contrib/clojure-package/data` directory. - -Then you can run training on a subset of examples through the repl using: -``` -(train-convnet {:devs [(context/default-context)] :embedding-size 300 :batch-size 100 :test-size 100 :num-epoch 10 :max-examples 1000 :pretrained-embedding :word2vec}) -``` -Note that loading word2vec embeddings consumes memory and takes some time. - -You can also train them using `JVM_OPTS="-Xmx8g" lein run` once you've modified -the parameters to `train-convnet` (see above) in `src/cnn_text_classification/classifier.clj`. -In order to run training with word2vec on the complete data set, you will need to run: -``` -(train-convnet {:devs [(context/default-context)] :embedding-size 300 :batch-size 100 :test-size 1000 :num-epoch 10 :pretrained-embedding :word2vec}) -``` -You should be able to achieve an accuracy of `~0.78` using the parameters above. - -## Usage with learned embeddings - -Lastly, similar to the python CNN text classification example, you can learn the embeddings based on training data. -This can be achieved by setting `:pretrained-embedding nil` (or omitting that parameter altogether). diff --git a/contrib/clojure-package/examples/cnn-text-classification/get_data.sh b/contrib/clojure-package/examples/cnn-text-classification/get_data.sh deleted file mode 100755 index 7bbd9ce72142..000000000000 --- a/contrib/clojure-package/examples/cnn-text-classification/get_data.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -evx - -mkdir -p data/mr-data -cd data/mr-data -wget https://raw.githubusercontent.com/yoonkim/CNN_sentence/master/rt-polarity.neg -wget https://raw.githubusercontent.com/yoonkim/CNN_sentence/master/rt-polarity.pos -cd ../.. -mkdir -p data/glove -cd data/glove -wget http://nlp.stanford.edu/data/glove.6B.zip -unzip *.zip -cd ../.. diff --git a/contrib/clojure-package/examples/cnn-text-classification/get_fasttext_data.sh b/contrib/clojure-package/examples/cnn-text-classification/get_fasttext_data.sh deleted file mode 100755 index 2bfe96659402..000000000000 --- a/contrib/clojure-package/examples/cnn-text-classification/get_fasttext_data.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -evx - -mkdir -p data/fasttext -cd data/fasttext -wget https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/wiki.simple.vec diff --git a/contrib/clojure-package/examples/cnn-text-classification/project.clj b/contrib/clojure-package/examples/cnn-text-classification/project.clj deleted file mode 100644 index 5235027a71fb..000000000000 --- a/contrib/clojure-package/examples/cnn-text-classification/project.clj +++ /dev/null @@ -1,24 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(defproject cnn-text-classification "0.1.0-SNAPSHOT" - :description "CNN text classification with MXNet" - :plugins [[lein-cljfmt "0.5.7"]] - :dependencies [[org.clojure/clojure "1.9.0"] - [org.apache.mxnet.contrib.clojure/clojure-mxnet "2.0.0-SNAPSHOT"]] - :pedantic? :skip - :main cnn-text-classification.classifier) diff --git a/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/classifier.clj b/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/classifier.clj deleted file mode 100644 index 3c0288c9c343..000000000000 --- a/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/classifier.clj +++ /dev/null @@ -1,135 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns cnn-text-classification.classifier - (:require [clojure.java.io :as io] - [clojure.java.shell :refer [sh]] - [cnn-text-classification.data-helper :as data-helper] - [org.apache.clojure-mxnet.eval-metric :as eval-metric] - [org.apache.clojure-mxnet.io :as mx-io] - [org.apache.clojure-mxnet.module :as m] - [org.apache.clojure-mxnet.ndarray :as ndarray] - [org.apache.clojure-mxnet.optimizer :as optimizer] - [org.apache.clojure-mxnet.symbol :as sym] - [org.apache.clojure-mxnet.context :as context]) - (:gen-class)) - -(def data-dir "data/") -(def mr-dataset-path "data/mr-data") ;; the MR polarity dataset path -(def num-filter 100) -(def num-label 2) -(def dropout 0.5) - -(when-not (.exists (io/file (str data-dir))) - (do (println "Retrieving data for cnn text classification...") (sh "./get_data.sh"))) - -(defn shuffle-data [test-num {:keys [data label sentence-count sentence-size vocab-size embedding-size pretrained-embedding]}] - (println "Shuffling the data and splitting into training and test sets") - (println {:sentence-count sentence-count - :sentence-size sentence-size - :vocab-size vocab-size - :embedding-size embedding-size - :pretrained-embedding pretrained-embedding}) - (let [shuffled (shuffle (map #(vector %1 %2) data label)) - train-num (- (count shuffled) test-num) - training (into [] (take train-num shuffled)) - test (into [] (drop train-num shuffled)) - ;; has to be channel x y - train-data-shape (if pretrained-embedding - [train-num 1 sentence-size embedding-size] - [train-num 1 sentence-size]) - ;; has to be channel x y - test-data-shape (if pretrained-embedding - [test-num 1 sentence-size embedding-size] - [test-num 1 sentence-size])] - {:training {:data (ndarray/array (into [] (flatten (mapv first training))) - train-data-shape) - :label (ndarray/array (into [] (flatten (mapv last training))) - [train-num])} - :test {:data (ndarray/array (into [] (flatten (mapv first test))) - test-data-shape) - :label (ndarray/array (into [] (flatten (mapv last test))) - [test-num])}})) - -(defn get-data-symbol [num-embed sentence-size batch-size vocab-size pretrained-embedding] - (if pretrained-embedding - (sym/variable "data") - (as-> (sym/variable "data") data - (sym/embedding "vocab_embed" {:data data :input-dim vocab-size :output-dim num-embed}) - (sym/reshape {:data data :target-shape [batch-size 1 sentence-size num-embed]})))) - -(defn make-filter-layers [{:keys [input-x num-embed sentence-size] :as config} - filter-size] - (as-> (sym/convolution {:data input-x - :kernel [filter-size num-embed] - :num-filter num-filter}) data - (sym/activation {:data data :act-type "relu"}) - (sym/pooling {:data data - :pool-type "max" - :kernel [(inc (- sentence-size filter-size)) 1] - :stride [1 1]}))) - -;;; convnet with multiple filter sizes -;; from Convolutional Neural Networks for Sentence Classification by Yoon Kim -(defn get-multi-filter-convnet [num-embed sentence-size batch-size vocab-size pretrained-embedding] - (let [filter-list [3 4 5] - input-x (get-data-symbol num-embed sentence-size batch-size vocab-size pretrained-embedding) - polled-outputs (mapv #(make-filter-layers {:input-x input-x :num-embed num-embed :sentence-size sentence-size} %) filter-list) - total-filters (* num-filter (count filter-list)) - concat (sym/concat "concat" nil polled-outputs {:dim 1}) - hpool (sym/reshape "hpool" {:data concat :target-shape [batch-size total-filters]}) - hdrop (if (pos? dropout) (sym/dropout "hdrop" {:data hpool :p dropout}) hpool) - fc (sym/fully-connected "fc1" {:data hdrop :num-hidden num-label})] - (sym/softmax-output "softmax" {:data fc}))) - -(defn train-convnet [{:keys [devs embedding-size batch-size test-size - num-epoch max-examples pretrained-embedding]}] - (let [ms-dataset (data-helper/load-ms-with-embeddings mr-dataset-path max-examples embedding-size {:pretrained-embedding pretrained-embedding}) - sentence-size (:sentence-size ms-dataset) - vocab-size (:vocab-size ms-dataset) - shuffled (shuffle-data test-size ms-dataset) - train-data (mx-io/ndarray-iter [(get-in shuffled [:training :data])] - {:label [(get-in shuffled [:training :label])] - :label-name "softmax_label" - :data-batch-size batch-size - :last-batch-handle "pad"}) - test-data (mx-io/ndarray-iter [(get-in shuffled [:test :data])] - {:label [(get-in shuffled [:test :label])] - :label-name "softmax_label" - :data-batch-size batch-size - :last-batch-handle "pad"})] - (let [mod (m/module (get-multi-filter-convnet embedding-size sentence-size batch-size vocab-size pretrained-embedding) {:contexts devs})] - (println "Getting ready to train for " num-epoch " epochs") - (println "===========") - (m/fit mod {:train-data train-data :eval-data test-data :num-epoch num-epoch - :fit-params (m/fit-params {:optimizer (optimizer/adam)})})))) - -(defn -main [& args] - (let [[dev dev-num] args - devs (if (= dev ":gpu") - (mapv #(context/gpu %) (range (Integer/parseInt (or dev-num "1")))) - (mapv #(context/cpu %) (range (Integer/parseInt (or dev-num "1")))))] - ;;; omit max-examples if you want to run all the examples in the movie review dataset - ;; to limit mem consumption set to something like 1000 and adjust test size to 100 - (println "Running with context devices of" devs) - (train-convnet {:devs devs :embedding-size 50 :batch-size 10 :test-size 100 :num-epoch 10 :max-examples 1000 :pretrained-embedding :glove}) - ;; runs all the examples - #_(train-convnet {:embedding-size 50 :batch-size 100 :test-size 1000 :num-epoch 10}))) - -(comment - (train-convnet {:devs devs :embedding-size 50 :batch-size 10 :test-size 100 :num-epoch 10 :max-examples 1000})) - diff --git a/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/data_helper.clj b/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/data_helper.clj deleted file mode 100644 index df132c3167cd..000000000000 --- a/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/data_helper.clj +++ /dev/null @@ -1,222 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns cnn-text-classification.data-helper - (:require [clojure.java.io :as io] - [clojure.string :as string] - [org.apache.clojure-mxnet.context :as context] - [org.apache.clojure-mxnet.ndarray :as ndarray] - [org.apache.clojure-mxnet.random :as random]) - (:import (java.io DataInputStream) - (java.nio ByteBuffer ByteOrder)) - (:gen-class)) - -(def w2v-file-path "../../data/GoogleNews-vectors-negative300.bin") ;; the word2vec file path -(def EOS "") ;; end of sentence word - -(defn glove-file-path - "Returns the file path to GloVe embedding of the input size" - [embedding-size] - (format "data/glove/glove.6B.%dd.txt" embedding-size)) - -(def fasttext-file-path "data/fasttext/wiki.simple.vec") - -(defn r-string - "Reads a string from the given DataInputStream `dis` until a space or newline is reached." - [dis] - (loop [b (.readByte dis) - bs []] - (if (and (not= 32 b) (not= 10 b)) - (recur (.readByte dis) (conj bs b)) - (new String (byte-array bs))))) - -(defn get-float [bs] - (-> (ByteBuffer/wrap bs) - (.order ByteOrder/LITTLE_ENDIAN) - (.getFloat))) - -(defn read-float [is] - (let [bs (byte-array 4)] - (do (.read is bs) - (get-float bs)))) - -(defn- load-w2v-vectors - "Lazily loads the word2vec vectors given a data input stream `dis`, - number of words `nwords` and dimensionality `embedding-size`." - [dis embedding-size num-vectors] - (if (= 0 num-vectors) - (list) - (let [word (r-string dis) - vect (mapv (fn [_] (read-float dis)) (range embedding-size))] - (cons [word vect] (lazy-seq (load-w2v-vectors dis embedding-size (dec num-vectors))))))) - -(defn load-word2vec-model! - "Loads the word2vec model stored in a binary format from the given `path`. - By default only the first 100 embeddings are loaded." - ([path embedding-size opts] - (println "Loading the word2vec model from binary ...") - (with-open [bis (io/input-stream path) - dis (new DataInputStream bis)] - (let [word-size (Integer/parseInt (r-string dis)) - dim (Integer/parseInt (r-string dis)) - {:keys [max-vectors vocab] :or {max-vectors word-size}} opts - _ (println "Processing with " {:dim dim :word-size word-size} " loading max vectors " max-vectors) - _ (if (not= embedding-size dim) - (throw (ex-info "Mismatch in embedding size" - {:input-embedding-size embedding-size - :word2vec-embedding-size dim}))) - vectors (load-w2v-vectors dis dim max-vectors) - word2vec (if vocab - (->> vectors - (filter (fn [[w _]] (contains? vocab w))) - (into {})) - (->> vectors - (take max-vectors) - (into {})))] - (println "Finished") - {:num-embed dim :word2vec word2vec}))) - ([path embedding-size] - (load-word2vec-model! path embedding-size {:max-vectors 100}))) - -(defn read-text-embedding-pairs [pairs] - (for [^String line pairs - :let [fields (.split line " ")]] - [(aget fields 0) - (mapv #(Float/parseFloat ^String %) (rest fields))])) - -(defn load-glove! [glove-file-path] - (println "Loading the glove pre-trained word embeddings from " glove-file-path) - (->> (io/reader glove-file-path) - line-seq - read-text-embedding-pairs - (into {}))) - -(def remove-fasttext-metadata rest) - -(defn load-fasttext! [fasttext-file-path] - (println "Loading the fastText pre-trained word embeddings from " fasttext-file-path) - (->> (io/reader fasttext-file-path) - line-seq - remove-fasttext-metadata - read-text-embedding-pairs - (into {}))) - -(defn clean-str [s] - (-> s - (string/replace #"^A-Za-z0-9(),!?'`]" " ") - (string/replace #"'s" " 's") - (string/replace #"'ve" " 've") - (string/replace #"n't" " n't") - (string/replace #"'re" " 're") - (string/replace #"'d" " 'd") - (string/replace #"'ll" " 'll") - (string/replace #"," " , ") - (string/replace #"!" " ! ") - (string/replace #"\(" " ( ") - (string/replace #"\)" " ) ") - (string/replace #"\?" " ? ") - (string/replace #" {2,}" " ") - (string/trim))) - -(defn load-mr-data-and-labels - "Loads MR polarity data from files, splits the data into words and generates labels. - Returns split sentences and labels." - [path max-examples] - (println "Loading all the movie reviews from " path) - (let [positive-examples (mapv #(string/trim %) (-> (slurp (str path "/rt-polarity.pos")) - (string/split #"\n"))) - negative-examples (mapv #(string/trim %) (-> (slurp (str path "/rt-polarity.neg")) - (string/split #"\n"))) - positive-examples (into [] (if max-examples (take max-examples positive-examples) positive-examples)) - negative-examples (into [] (if max-examples (take max-examples negative-examples) negative-examples)) - ;; split by words - x-text (->> (into positive-examples negative-examples) - (mapv clean-str) - (mapv #(string/split % #" "))) - - ;; generate labels - positive-labels (mapv (constantly 1) positive-examples) - negative-labels (mapv (constantly 0) negative-examples)] - {:sentences x-text :labels (into positive-labels negative-labels)})) - -(defn pad-sentences - "Pads all sentences to the same length where the length is defined by the longest sentence. Returns padded sentences." - [sentences] - (let [padding-word EOS - sequence-len (apply max (mapv count sentences))] - (mapv (fn [s] (let [diff (- sequence-len (count s))] - (if (pos? diff) - (into s (repeat diff padding-word)) - s))) - sentences))) - -(defn build-vocab-embeddings - "Returns the subset of `embeddings` for words from the `vocab`. - Embeddings for words not in the vocabulary are initialized randomly - from a uniform distribution." - [vocab embedding-size embeddings] - (into {} - (mapv (fn [[word _]] - [word (or (get embeddings word) - (ndarray/->vec (random/uniform -0.25 0.25 [embedding-size])))]) - vocab))) - -(defn build-input-data-with-embeddings - "Map sentences and labels to vectors based on a pretrained embeddings." - [sentences embeddings] - (mapv (fn [sent] (mapv #(embeddings %) sent)) sentences)) - -(defn build-vocab - "Creates a vocabulary for the data set based on frequency of words. - Returns a map from words to unique indices." - [sentences] - (let [words (flatten sentences) - wc (reduce - (fn [m w] (update-in m [w] (fnil inc 0))) - {} - words) - sorted-wc (sort-by second > wc) - sorted-w (map first sorted-wc)] - (into {} (map vector sorted-w (range (count sorted-w)))))) - -(defn load-ms-with-embeddings - "Loads the movie review sentences data set for the given - `:pretrained-embedding` (e.g. `nil`, `:glove` or `:word2vec`)" - [path max-examples embedding-size {:keys [pretrained-embedding] - :or {pretrained-embedding nil} - :as opts}] - (let [{:keys [sentences labels]} (load-mr-data-and-labels path max-examples) - sentences-padded (pad-sentences sentences) - vocab (build-vocab sentences-padded) - vocab-embeddings (case pretrained-embedding - :glove (->> (load-glove! (glove-file-path embedding-size)) - (build-vocab-embeddings vocab embedding-size)) - :fasttext (->> (load-fasttext! fasttext-file-path) - (build-vocab-embeddings vocab embedding-size)) - :word2vec (->> (load-word2vec-model! w2v-file-path embedding-size {:vocab vocab}) - (:word2vec) - (build-vocab-embeddings vocab embedding-size)) - vocab) - data (build-input-data-with-embeddings sentences-padded vocab-embeddings)] - {:data data - :label labels - :sentence-count (count data) - :sentence-size (count (first data)) - :embedding-size embedding-size - :vocab-size (count vocab) - :pretrained-embedding pretrained-embedding})) - diff --git a/contrib/clojure-package/examples/cnn-text-classification/test/cnn_text_classification/classifier_test.clj b/contrib/clojure-package/examples/cnn-text-classification/test/cnn_text_classification/classifier_test.clj deleted file mode 100644 index 744307e3e363..000000000000 --- a/contrib/clojure-package/examples/cnn-text-classification/test/cnn_text_classification/classifier_test.clj +++ /dev/null @@ -1,48 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns cnn-text-classification.classifier-test - (:require [clojure.test :refer :all] - [org.apache.clojure-mxnet.module :as module] - [org.apache.clojure-mxnet.ndarray :as ndarray] - [org.apache.clojure-mxnet.util :as util] - [org.apache.clojure-mxnet.context :as context] - [cnn-text-classification.classifier :as classifier])) - -(deftest classifier-with-embeddings-test - (let [train (classifier/train-convnet - {:devs [(context/default-context)] - :embedding-size 50 - :batch-size 10 - :test-size 100 - :num-epoch 1 - :max-examples 1000 - :pretrained-embedding :glove})] - (is (= ["data"] (util/scala-vector->vec (module/data-names train)))) - (is (= 20 (count (ndarray/->vec (-> train module/outputs ffirst))))))) - -(deftest classifier-without-embeddings-test - (let [train (classifier/train-convnet - {:devs [(context/default-context)] - :embedding-size 50 - :batch-size 10 - :test-size 100 - :num-epoch 1 - :max-examples 1000 - :pretrained-embedding nil})] - (is (= ["data"] (util/scala-vector->vec (module/data-names train)))) - (is (= 20 (count (ndarray/->vec (-> train module/outputs ffirst))))))) diff --git a/contrib/clojure-package/examples/gan/.gitignore b/contrib/clojure-package/examples/gan/.gitignore deleted file mode 100644 index ea8013148d11..000000000000 --- a/contrib/clojure-package/examples/gan/.gitignore +++ /dev/null @@ -1,12 +0,0 @@ -/target -/classes -/checkouts -pom.xml -pom.xml.asc -*.jar -*.class -/.lein-* -/.nrepl-port -.hgignore -.hg/ -results \ No newline at end of file diff --git a/contrib/clojure-package/examples/gan/README.md b/contrib/clojure-package/examples/gan/README.md deleted file mode 100644 index 30ecafb266db..000000000000 --- a/contrib/clojure-package/examples/gan/README.md +++ /dev/null @@ -1,40 +0,0 @@ - - - - - - - - - - - - - - - - - -# gan - -This is an example of how to do a GAN with the MNIST data - -## Installation - -Before you run this example, make sure that you have the clojure package installed. -In the main clojure package directory, do `lein install`. Then you can run -`lein install` in this directory. - - -## Usage - -Do `lein run` and the images generated will be in the `results` directory. The gout* images are the ones generated, the diff* images are the visualization of the input gradient different fed to the generator - -`lein run :gpu` will run on gpu - -If you are running on AWS you will need to setup X11 for graphics -`sudo apt install xauth x11-apps` - -then relogin in `ssh -X -i creds ubuntu@yourinstance` - - diff --git a/contrib/clojure-package/examples/gan/project.clj b/contrib/clojure-package/examples/gan/project.clj deleted file mode 100644 index 7ec497787680..000000000000 --- a/contrib/clojure-package/examples/gan/project.clj +++ /dev/null @@ -1,26 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(defproject gan-origami "0.1.0-SNAPSHOT" - :description "GAN MNIST with MXNet" - :plugins [[lein-cljfmt "0.5.7"]] - :repositories [["vendredi" {:url "https://repository.hellonico.info/repository/hellonico/"}]] - :dependencies [[org.clojure/clojure "1.9.0"] - [org.apache.mxnet.contrib.clojure/clojure-mxnet "2.0.0-SNAPSHOT"] - [origami "4.0.0-3"] - ] - :main gan.gan-mnist) diff --git a/contrib/clojure-package/examples/gan/src/gan/gan_mnist.clj b/contrib/clojure-package/examples/gan/src/gan/gan_mnist.clj deleted file mode 100644 index 944791bce604..000000000000 --- a/contrib/clojure-package/examples/gan/src/gan/gan_mnist.clj +++ /dev/null @@ -1,219 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns gan.gan-mnist - (:require [clojure.java.io :as io] - [clojure.java.shell :refer [sh]] - [org.apache.clojure-mxnet.executor :as executor] - [org.apache.clojure-mxnet.eval-metric :as eval-metric] - [org.apache.clojure-mxnet.io :as mx-io] - [org.apache.clojure-mxnet.initializer :as init] - [org.apache.clojure-mxnet.module :as m] - [org.apache.clojure-mxnet.ndarray :as ndarray] - [org.apache.clojure-mxnet.optimizer :as opt] - [org.apache.clojure-mxnet.symbol :as sym] - [org.apache.clojure-mxnet.shape :as mx-shape] - [org.apache.clojure-mxnet.util :as util] - [gan.viz :as viz] - [org.apache.clojure-mxnet.context :as context]) - (:gen-class)) - -;; based off of https://medium.com/@julsimon/generative-adversarial-networks-on-apache-mxnet-part-1-b6d39e6b5df1 - - -(def data-dir "data/") -(def output-path "results/") -(def batch-size 100) -(def num-epoch 10) - -(io/make-parents (str output-path "gout")) - -(when-not (.exists (io/file (str data-dir "train-images-idx3-ubyte"))) - (sh "../../scripts/get_mnist_data.sh")) - -(defonce mnist-iter (mx-io/mnist-iter {:image (str data-dir "train-images-idx3-ubyte") - :label (str data-dir "train-labels-idx1-ubyte") - :input-shape [1 28 28] - :batch-size batch-size - :shuffle true})) - -(def rand-noise-iter (mx-io/rand-iter [batch-size 100 1 1])) - -(comment - - ;;This is for figuring out the convolution and deconvolution layers to convert the image sizes - - (defn conv-output-size [input-size kernel-size padding stride] - (float (inc (/ (- (+ input-size (* 2 padding)) kernel-size) stride)))) - - ;; Calcing the layer sizes for discriminator - (conv-output-size 28 4 3 2) ;=> 16 - (conv-output-size 16 4 1 2) ;=> 8 - (conv-output-size 8 4 1 2) ;=> 4.0 - (conv-output-size 4 4 0 1) ;=> 1 - - ;; Calcing the layer sizes for generator - (defn deconv-output-size [input-size kernel-size padding stride] - (- - (+ (* stride (- input-size 1)) - kernel-size) - (* 2 padding))) - - (deconv-output-size 1 4 0 1) ;=> 4 - (deconv-output-size 4 4 1 2) ;=> 8 - (deconv-output-size 8 4 1 2) ;=> 16 - (deconv-output-size 16 4 3 2)) ;=> 28 - - -(def ndf 28) ;; image height /width -(def nc 1) ;; number of channels -(def eps (float (+ 1e-5 1e-12))) -(def lr 0.0005) ;; learning rate -(def beta1 0.5) - -(def label (sym/variable "label")) - -(defn discriminator [] - (as-> (sym/variable "data") data - (sym/convolution "d1" {:data data :kernel [4 4] :pad [3 3] :stride [2 2] :num-filter ndf :no-bias true}) - (sym/batch-norm "dbn1" {:data data :fix-gamma true :eps eps}) - (sym/leaky-re-lu "dact1" {:data data :act-type "leaky" :slope 0.2}) - - (sym/convolution "d2" {:data data :kernel [4 4] :pad [1 1] :stride [2 2] :num-filter (* 2 ndf) :no-bias true}) - (sym/batch-norm "dbn2" {:data data :fix-gamma true :eps eps}) - (sym/leaky-re-lu "dact1" {:data data :act_type "leaky" :slope 0.2}) - - (sym/convolution "d3" {:data data :kernel [4 4] :pad [1 1] :stride [2 2] :num-filter (* 3 ndf) :no-bias true}) - (sym/batch-norm "dbn3" {:data data :fix-gamma true :eps eps}) - (sym/leaky-re-lu "dact3" {:data data :act_type "leaky" :slope 0.2}) - - (sym/convolution "d4" {:data data :kernel [4 4] :pad [0 0] :stride [1 1] :num-filter (* 4 ndf) :no-bias true}) - (sym/flatten "flt" {:data data}) - - (sym/fully-connected "fc" {:data data :num-hidden 1 :no-bias false}) - (sym/logistic-regression-output "dloss" {:data data :label label}))) - -(defn generator [] - (as-> (sym/variable "rand") data - (sym/deconvolution "g1" {:data data :kernel [4 4] :pad [0 0] :stride [1 1] :num-filter (* 4 ndf) :no-bias true}) - (sym/batch-norm "gbn1" {:data data :fix-gamma true :eps eps}) - (sym/activation "gact1" {:data data :act-type "relu"}) - - (sym/deconvolution "g2" {:data data :kernel [4 4] :pad [1 1] :stride [2 2] :num-filter (* 2 ndf) :no-bias true}) - (sym/batch-norm "gbn2" {:data data :fix-gamma true :eps eps}) - (sym/activation "gact2" {:data data :act-type "relu"}) - - (sym/deconvolution "g3" {:data data :kernel [4 4] :pad [1 1] :stride [2 2] :num-filter ndf :no-bias true}) - (sym/batch-norm "gbn3" {:data data :fix-gamma true :eps eps}) - (sym/activation "gact3" {:data data :act-type "relu"}) - - (sym/deconvolution "g4" {:data data :kernel [4 4] :pad [3 3] :stride [2 2] :num-filter nc :no-bias true}) - (sym/activation "gact4" {:data data :act-type "tanh"}))) - -(let [data [(ndarray/ones [batch-size 100 1 1])] - label [(ndarray/ones [batch-size 100 1 1])]] - (def my-iter (mx-io/ndarray-iter data))) - -(defn save-img-gout [i n x] - (do - (viz/im-sav {:title (str "gout-" i "-" n) - :output-path output-path - :x x - :flip false}))) - -(defn save-img-diff [i n x] - (do (viz/im-sav {:title (str "diff-" i "-" n) - :output-path output-path - :x x - :flip false}))) - -(defn save-img-data [i n batch] - (do (viz/im-sav {:title (str "data-" i "-" n) - :output-path output-path - :x (first (mx-io/batch-data batch)) - :flip false}))) - -(defn calc-diff [i n diff-d] - (let [diff (ndarray/copy diff-d) - arr (ndarray/->vec diff) - mean (/ (apply + arr) (count arr)) - std (let [tmp-a (map #(* (- % mean) (- % mean)) arr)] - (float (Math/sqrt (/ (apply + tmp-a) (count tmp-a)))))] - (let [calc-diff (ndarray/+ (ndarray/div (ndarray/- diff mean) std) 0.5)] - - (save-img-diff i n calc-diff)))) - -(defn train - ([devs] (train devs num-epoch)) - ([devs num-epoch] - (let [mod-d (-> (m/module (discriminator) {:contexts devs :data-names ["data"] :label-names ["label"]}) - (m/bind {:data-shapes (mx-io/provide-data-desc mnist-iter) - :label-shapes (mx-io/provide-label-desc mnist-iter) - :inputs-need-grad true}) - (m/init-params {:initializer (init/normal 0.02)}) - (m/init-optimizer {:optimizer (opt/adam {:learning-rate lr :wd 0.0 :beta1 beta1})})) - mod-g (-> (m/module (generator) {:contexts devs :data-names ["rand"] :label-names nil}) - (m/bind {:data-shapes (mx-io/provide-data-desc rand-noise-iter)}) - (m/init-params {:initializer (init/normal 0.02)}) - (m/init-optimizer {:optimizer (opt/adam {:learning-rate lr :wd 0.0 :beta1 beta1})}))] - - (println "Training for " num-epoch " epochs...") - (doseq [i (range num-epoch)] - (mx-io/reduce-batches mnist-iter - (fn [n batch] - (let [rbatch (mx-io/next rand-noise-iter) - out-g (-> mod-g - (m/forward rbatch) - (m/outputs)) - ;; update the discriminiator on the fake - grads-f (mapv #(ndarray/copy (first %)) (-> mod-d - (m/forward {:data (first out-g) :label [(ndarray/zeros [batch-size])]}) - (m/backward) - (m/grad-arrays))) - ;; update the discrimintator on the real - grads-r (-> mod-d - (m/forward {:data (mx-io/batch-data batch) :label [(ndarray/ones [batch-size])]}) - (m/backward) - (m/grad-arrays)) - _ (mapv (fn [real fake] (let [r (first real)] - (ndarray/set r (ndarray/+ r fake)))) grads-r grads-f) - _ (m/update mod-d) - ;; update the generator - diff-d (-> mod-d - (m/forward {:data (first out-g) :label [(ndarray/ones [batch-size])]}) - (m/backward) - (m/input-grads)) - _ (-> mod-g - (m/backward (first diff-d)) - (m/update))] - (when (zero? (mod n 100)) - (println "iteration = " i "number = " n) - (save-img-gout i n (ndarray/copy (ffirst out-g))) - (save-img-data i n batch) - (calc-diff i n (ffirst diff-d))) - (inc n)))))))) - -(defn -main [& args] - (let [[dev dev-num] args - devs (if (= dev ":gpu") - (mapv #(context/gpu %) (range (Integer/parseInt (or dev-num "1")))) - (mapv #(context/cpu %) (range (Integer/parseInt (or dev-num "1")))))] - (println "Running with context devices of" devs) - (train devs))) - -(comment - (train [(context/cpu)])) diff --git a/contrib/clojure-package/examples/gan/src/gan/viz.clj b/contrib/clojure-package/examples/gan/src/gan/viz.clj deleted file mode 100644 index 08da53cb2382..000000000000 --- a/contrib/clojure-package/examples/gan/src/gan/viz.clj +++ /dev/null @@ -1,59 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns gan.viz - (:require [org.apache.clojure-mxnet.ndarray :as ndarray] - [org.apache.clojure-mxnet.shape :as mx-shape] - [org.apache.clojure-mxnet.io :as mx-io] - [opencv4.utils :as cvu] - [opencv4.core :as cv :refer [CV_8UC1 new-matofbyte flip! imwrite new-size hconcat! vconcat! new-mat merge!]])) - -(defn clip [x] - (->> x - (mapv #(* 255 %)) - (mapv #(cond - (< % 0) 0 - (> % 255) 255 - :else (int %))) - (mapv #(.byteValue %)))) - -(defn get-img [raw-data channels height width flip] - (let [img (if (> channels 1) - (throw (Exception. "Image with 3 channels (RGB) not supported")) - ;; gray image - (cv/>> (new-mat height width CV_8UC1) (byte-array raw-data)))] - (if flip (flip! img 0) img))) - -(defn im-sav [{:keys [title output-path x flip] - :or {flip false} :as g-mod}] - (let [shape (mx-shape/->vec (ndarray/shape x)) - _ (assert (== 4 (count shape))) - [n c h w] shape - totals (* h w) - raw-data (byte-array (clip (ndarray/to-array x))) - row (.intValue (Math/sqrt n)) - col row - line-arrs (into [] (partition (* col c totals) raw-data)) - line-mats (mapv (fn [line] - (let [img-arr (into [] (partition (* c totals) line)) - src (mapv (fn [arr] (get-img (into [] arr) c h w flip)) img-arr)] - (hconcat! src))) - line-arrs)] - (-> line-mats - (vconcat!) - (cvu/resize-by 1.5) - (imwrite (str output-path title ".jpg"))))) \ No newline at end of file diff --git a/contrib/clojure-package/examples/gan/test/gan/gan_test.clj b/contrib/clojure-package/examples/gan/test/gan/gan_test.clj deleted file mode 100644 index 71b9126cae25..000000000000 --- a/contrib/clojure-package/examples/gan/test/gan/gan_test.clj +++ /dev/null @@ -1,25 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns gan.gan_test - (:require - [gan.gan-mnist :refer :all] - [org.apache.clojure-mxnet.context :as context] - [clojure.test :refer :all])) - -(deftest check-pdf - (train [(context/cpu)] 1)) \ No newline at end of file diff --git a/contrib/clojure-package/examples/imclassification/.gitignore b/contrib/clojure-package/examples/imclassification/.gitignore deleted file mode 100644 index c53038ec0e3d..000000000000 --- a/contrib/clojure-package/examples/imclassification/.gitignore +++ /dev/null @@ -1,11 +0,0 @@ -/target -/classes -/checkouts -pom.xml -pom.xml.asc -*.jar -*.class -/.lein-* -/.nrepl-port -.hgignore -.hg/ diff --git a/contrib/clojure-package/examples/imclassification/README.md b/contrib/clojure-package/examples/imclassification/README.md deleted file mode 100644 index d5ab99ced90d..000000000000 --- a/contrib/clojure-package/examples/imclassification/README.md +++ /dev/null @@ -1,46 +0,0 @@ - - - - - - - - - - - - - - - - - -# imclassification - -This shows off how to do image classification with the module api - -There is an example of the high level training api fit and also how to use multiple cpus/gpus - -To see more examples of how to use different parts of the module api look at the module example - -An example of using the profiler. - -## Installation - -Before you run this example, make sure that you have the clojure package installed. -In the main clojure package directory, do `lein install`. Then you can run -`lein install` in this directory. - - -## Usage - -To run the example you must do - -* `lein install` in the root of the main project directory -* cd into this project directory and do `lein run`. This will execute the cpu version. - -You can control the devices you run on by doing: - -`lein run :cpu 2` - This will run on 2 cpu devices -`lein run :gpu 1` - This will run on 1 gpu device -`lein run :gpu 2` - This will run on 2 gpu devices diff --git a/contrib/clojure-package/examples/imclassification/project.clj b/contrib/clojure-package/examples/imclassification/project.clj deleted file mode 100644 index 471c7f760de2..000000000000 --- a/contrib/clojure-package/examples/imclassification/project.clj +++ /dev/null @@ -1,24 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(defproject imclassification "0.1.0-SNAPSHOT" - :description "Clojure examples for image classification" - :plugins [[lein-cljfmt "0.5.7"]] - :dependencies [[org.clojure/clojure "1.9.0"] - [org.apache.mxnet.contrib.clojure/clojure-mxnet "2.0.0-SNAPSHOT"]] - :pedantic? :skip - :main imclassification.train-mnist) diff --git a/contrib/clojure-package/examples/imclassification/src/imclassification/train_mnist.clj b/contrib/clojure-package/examples/imclassification/src/imclassification/train_mnist.clj deleted file mode 100644 index 164b5f2620f2..000000000000 --- a/contrib/clojure-package/examples/imclassification/src/imclassification/train_mnist.clj +++ /dev/null @@ -1,121 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns imclassification.train-mnist - (:require [clojure.java.io :as io] - [clojure.java.shell :refer [sh]] - [org.apache.clojure-mxnet.io :as mx-io] - [org.apache.clojure-mxnet.context :as context] - [org.apache.clojure-mxnet.module :as m] - [org.apache.clojure-mxnet.symbol :as sym] - [org.apache.clojure-mxnet.kvstore :as kvstore] - [org.apache.clojure-mxnet.kvstore-server :as kvstore-server] - [org.apache.clojure-mxnet.optimizer :as optimizer] - [org.apache.clojure-mxnet.eval-metric :as eval-metric] - [org.apache.clojure-mxnet.resource-scope :as resource-scope]) - (:gen-class)) - -(def data-dir "data/") ;; the data directory to store the mnist data -(def batch-size 10) ;; the batch size -(def optimizer (optimizer/sgd {:learning-rate 0.01 :momentum 0.0})) -(def eval-metric (eval-metric/accuracy)) -(def num-epoch 1) ;; the number of training epochs -(def kvstore "local") ;; the kvstore type -;;; Note to run distributed you might need to complile the engine with an option set -(def role "worker") ;; scheduler/server/worker -(def scheduler-host nil) ;; scheduler hostame/ ip address -(def scheduler-port 0) ;; scheduler port -(def num-workers 1) ;; # of workers -(def num-servers 1) ;; # of servers - - -(def envs (cond-> {"DMLC_ROLE" role} - scheduler-host (merge {"DMLC_PS_ROOT_URI" scheduler-host - "DMLC_PS_ROOT_PORT" (str scheduler-port) - "DMLC_NUM_WORKER" (str num-workers) - "DMLC_NUM_SERVER" (str num-servers)}))) - -(when-not (.exists (io/file (str data-dir "train-images-idx3-ubyte"))) - (sh "../../scripts/get_mnist_data.sh")) - -(defn get-symbol [] - (as-> (sym/variable "data") data - (sym/fully-connected "fc1" {:data data :num-hidden 128}) - (sym/activation "relu1" {:data data :act-type "relu"}) - (sym/fully-connected "fc2" {:data data :num-hidden 64}) - (sym/activation "relu2" {:data data :act-type "relu"}) - (sym/fully-connected "fc3" {:data data :num-hidden 10}) - (sym/softmax-output "softmax" {:data data}))) - - -(defn train-data [] - (mx-io/mnist-iter {:image (str data-dir "train-images-idx3-ubyte") - :label (str data-dir "train-labels-idx1-ubyte") - :label-name "softmax_label" - :input-shape [784] - :batch-size batch-size - :shuffle true - :flat true - :silent false - :seed 10 - :num-parts num-workers - :part-index 0})) - -(defn eval-data [] - (mx-io/mnist-iter {:image (str data-dir "t10k-images-idx3-ubyte") - :label (str data-dir "t10k-labels-idx1-ubyte") - :input-shape [784] - :batch-size batch-size - :flat true - :silent false - :num-parts num-workers - :part-index 0})) - -(defn start - ([devs] (start devs num-epoch)) - ([devs _num-epoch] - (when scheduler-host - (println "Initing PS enviornments with " envs) - (kvstore-server/init envs)) - - (if (not= "worker" role) - (do - (println "Start KVStoreServer for scheduler and servers") - (kvstore-server/start)) - (do - (println "Starting Training of MNIST ....") - (println "Running with context devices of" devs) - (resource-scope/with-let [_mod (m/module (get-symbol) {:contexts devs})] - (-> _mod - (m/fit {:train-data (train-data) - :eval-data (eval-data) - :num-epoch _num-epoch - :fit-params (m/fit-params {:kvstore kvstore - :optimizer optimizer - :eval-metric eval-metric})}) - (m/save-checkpoint {:prefix "target/test" :epoch _num-epoch})) - (println "Finish fit")))))) - -(defn -main [& args] - (let [[dev dev-num] args - devs (if (= dev ":gpu") - (mapv #(context/gpu %) (range (Integer/parseInt (or dev-num "1")))) - (mapv #(context/cpu %) (range (Integer/parseInt (or dev-num "1")))))] - (start devs))) - -(comment - (start [(context/cpu)])) diff --git a/contrib/clojure-package/examples/imclassification/test/imclassification/train_mnist_test.clj b/contrib/clojure-package/examples/imclassification/test/imclassification/train_mnist_test.clj deleted file mode 100644 index f185891ab31e..000000000000 --- a/contrib/clojure-package/examples/imclassification/test/imclassification/train_mnist_test.clj +++ /dev/null @@ -1,40 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns imclassification.train-mnist-test - (:require - [clojure.test :refer :all] - [clojure.java.io :as io] - [clojure.string :as s] - [org.apache.clojure-mxnet.context :as context] - [org.apache.clojure-mxnet.module :as module] - [imclassification.train-mnist :as mnist])) - -(defn- file-to-filtered-seq [file] - (->> - file - (io/file) - (io/reader) - (line-seq) - (filter #(not (s/includes? % "mxnet_version"))))) - -(deftest mnist-two-epochs-test - (do - (mnist/start [(context/cpu)] 2) - (is (= - (file-to-filtered-seq "test/test-symbol.json.ref") - (file-to-filtered-seq "target/test-symbol.json"))))) diff --git a/contrib/clojure-package/examples/imclassification/test/test-symbol.json.ref b/contrib/clojure-package/examples/imclassification/test/test-symbol.json.ref deleted file mode 100644 index ba1d2fad3a8a..000000000000 --- a/contrib/clojure-package/examples/imclassification/test/test-symbol.json.ref +++ /dev/null @@ -1,105 +0,0 @@ -{ - "nodes": [ - { - "op": "null", - "name": "data", - "inputs": [] - }, - { - "op": "null", - "name": "fc1_weight", - "attrs": {"num_hidden": "128"}, - "inputs": [] - }, - { - "op": "null", - "name": "fc1_bias", - "attrs": {"num_hidden": "128"}, - "inputs": [] - }, - { - "op": "FullyConnected", - "name": "fc1", - "attrs": {"num_hidden": "128"}, - "inputs": [[0, 0, 0], [1, 0, 0], [2, 0, 0]] - }, - { - "op": "Activation", - "name": "relu1", - "attrs": {"act_type": "relu"}, - "inputs": [[3, 0, 0]] - }, - { - "op": "null", - "name": "fc2_weight", - "attrs": {"num_hidden": "64"}, - "inputs": [] - }, - { - "op": "null", - "name": "fc2_bias", - "attrs": {"num_hidden": "64"}, - "inputs": [] - }, - { - "op": "FullyConnected", - "name": "fc2", - "attrs": {"num_hidden": "64"}, - "inputs": [[4, 0, 0], [5, 0, 0], [6, 0, 0]] - }, - { - "op": "Activation", - "name": "relu2", - "attrs": {"act_type": "relu"}, - "inputs": [[7, 0, 0]] - }, - { - "op": "null", - "name": "fc3_weight", - "attrs": {"num_hidden": "10"}, - "inputs": [] - }, - { - "op": "null", - "name": "fc3_bias", - "attrs": {"num_hidden": "10"}, - "inputs": [] - }, - { - "op": "FullyConnected", - "name": "fc3", - "attrs": {"num_hidden": "10"}, - "inputs": [[8, 0, 0], [9, 0, 0], [10, 0, 0]] - }, - { - "op": "null", - "name": "softmax_label", - "inputs": [] - }, - { - "op": "SoftmaxOutput", - "name": "softmax", - "inputs": [[11, 0, 0], [12, 0, 0]] - } - ], - "arg_nodes": [0, 1, 2, 5, 6, 9, 10, 12], - "node_row_ptr": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14 - ], - "heads": [[13, 0, 0]], - "attrs": {"mxnet_version": ["int", 10400]} -} \ No newline at end of file diff --git a/contrib/clojure-package/examples/infer/imageclassifier/.gitignore b/contrib/clojure-package/examples/infer/imageclassifier/.gitignore deleted file mode 100644 index 35491f1a084a..000000000000 --- a/contrib/clojure-package/examples/infer/imageclassifier/.gitignore +++ /dev/null @@ -1,12 +0,0 @@ -/target -/classes -/checkouts -/images -pom.xml -pom.xml.asc -*.jar -*.class -/.lein-* -/.nrepl-port -.hgignore -.hg/ diff --git a/contrib/clojure-package/examples/infer/imageclassifier/README.md b/contrib/clojure-package/examples/infer/imageclassifier/README.md deleted file mode 100644 index ef82feebd53f..000000000000 --- a/contrib/clojure-package/examples/infer/imageclassifier/README.md +++ /dev/null @@ -1,41 +0,0 @@ - - - - - - - - - - - - - - - - - -# imageclassifier - -Run image classification using clojure infer package. - -## Installation - -Before you run this example, make sure that you have the clojure package installed. -In the main clojure package directory, do `lein install`. Then you can run -`lein install` in this directory. - -## Usage - -``` -$ chmod +x scripts/get_resnet_18_data.sh -$ ./scripts/get_resnet_18_data.sh -$ -$ lein run -- --help -$ lein run -- -m models/resnet-18/resnet-18 -i images/kitten.jpg -d images/ -$ -$ lein uberjar -$ java -jar target/imageclassifier-0.1.0-SNAPSHOT-standalone.jar --help -$ java -jar target/imageclassifier-0.1.0-SNAPSHOT-standalone.jar \ - -m models/resnet-18/resnet-18 -i images/kitten.jpg -d images/ -``` diff --git a/contrib/clojure-package/examples/infer/imageclassifier/project.clj b/contrib/clojure-package/examples/infer/imageclassifier/project.clj deleted file mode 100644 index f04d76274fa1..000000000000 --- a/contrib/clojure-package/examples/infer/imageclassifier/project.clj +++ /dev/null @@ -1,25 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(defproject imageclassifier "0.1.0-SNAPSHOT" - :description "Image classification using infer with MXNet" - :plugins [[lein-cljfmt "0.5.7"]] - :dependencies [[org.clojure/clojure "1.9.0"] - [org.clojure/tools.cli "0.4.1"] - [org.apache.mxnet.contrib.clojure/clojure-mxnet "2.0.0-SNAPSHOT"]] - :main ^:skip-aot infer.imageclassifier-example - :profiles {:uberjar {:aot :all}}) diff --git a/contrib/clojure-package/examples/infer/imageclassifier/scripts/get_resnet_18_data.sh b/contrib/clojure-package/examples/infer/imageclassifier/scripts/get_resnet_18_data.sh deleted file mode 100755 index 1a142e8edbfd..000000000000 --- a/contrib/clojure-package/examples/infer/imageclassifier/scripts/get_resnet_18_data.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -evx - -MXNET_ROOT=$(cd "$(dirname $0)/.."; pwd) - -data_path=$MXNET_ROOT/models/resnet-18/ - -image_path=$MXNET_ROOT/images/ - -if [ ! -d "$data_path" ]; then - mkdir -p "$data_path" -fi - -if [ ! -d "$image_path" ]; then - mkdir -p "$image_path" -fi - -if [ ! -f "$data_path/resnet-18-0000.params" ]; then - wget https://s3.us-east-2.amazonaws.com/scala-infer-models/resnet-18/resnet-18-symbol.json -P $data_path - wget https://s3.us-east-2.amazonaws.com/scala-infer-models/resnet-18/resnet-18-0000.params -P $data_path - wget https://s3.us-east-2.amazonaws.com/scala-infer-models/resnet-18/synset.txt -P $data_path -fi - -if [ ! -f "$image_path/kitten.jpg" ]; then - wget https://s3.us-east-2.amazonaws.com/mxnet-scala/scala-example-ci/resnet152/kitten.jpg -P $image_path - wget https://s3.amazonaws.com/model-server/inputs/Pug-Cookie.jpg -P $image_path -fi diff --git a/contrib/clojure-package/examples/infer/imageclassifier/scripts/get_resnet_data.sh b/contrib/clojure-package/examples/infer/imageclassifier/scripts/get_resnet_data.sh deleted file mode 100755 index fcef59bacc6f..000000000000 --- a/contrib/clojure-package/examples/infer/imageclassifier/scripts/get_resnet_data.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -e - -MXNET_ROOT=$(cd "$(dirname $0)/.."; pwd) - -data_path=$MXNET_ROOT/models/resnet-152/ - -image_path=$MXNET_ROOT/images/ - -if [ ! -d "$data_path" ]; then - mkdir -p "$data_path" -fi - -if [ ! -d "$image_path" ]; then - mkdir -p "$image_path" -fi - -if [ ! -f "$data_path/resnet-152-0000.params" ]; then - wget https://s3.us-east-2.amazonaws.com/mxnet-scala/scala-example-ci/resnet152/resnet-152-0000.params -P $data_path - wget https://s3.us-east-2.amazonaws.com/mxnet-scala/scala-example-ci/resnet152/resnet-152-symbol.json -P $data_path - wget https://s3.us-east-2.amazonaws.com/mxnet-scala/scala-example-ci/resnet152/synset.txt -P $data_path -fi - -if [ ! -f "$image_path/kitten.jpg" ]; then - wget https://s3.us-east-2.amazonaws.com/mxnet-scala/scala-example-ci/resnet152/kitten.jpg -P $image_path -fi diff --git a/contrib/clojure-package/examples/infer/imageclassifier/src/infer/imageclassifier_example.clj b/contrib/clojure-package/examples/infer/imageclassifier/src/infer/imageclassifier_example.clj deleted file mode 100644 index bc8b82e1ece1..000000000000 --- a/contrib/clojure-package/examples/infer/imageclassifier/src/infer/imageclassifier_example.clj +++ /dev/null @@ -1,113 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns infer.imageclassifier-example - (:require [org.apache.clojure-mxnet.context :as context] - [org.apache.clojure-mxnet.dtype :as dtype] - [org.apache.clojure-mxnet.infer :as infer] - [org.apache.clojure-mxnet.layout :as layout] - [clojure.java.io :as io] - [clojure.string :refer [join]] - [clojure.tools.cli :refer [parse-opts]]) - (:gen-class)) - -(defn check-valid-dir - "Check that the input directory exists" - [input-dir] - (let [dir (io/file input-dir)] - (and - (.exists dir) - (.isDirectory dir)))) - -(defn check-valid-file - "Check that the file exists" - [input-file] - (.exists (io/file input-file))) - -(def cli-options - [["-m" "--model-path-prefix PREFIX" "Model path prefix" - :default "models/resnet-18/resnet-18" - :validate [#(check-valid-file (str % "-symbol.json")) - "Model path prefix is invalid"]] - ["-i" "--input-image IMAGE" "Input image" - :default "images/kitten.jpg" - :validate [check-valid-file "Input file not found"]] - ["-d" "--input-dir IMAGE_DIR" "Input directory" - :default "images/" - :validate [check-valid-dir "Input directory not found"]] - ["-h" "--help"]]) - -(defn print-predictions - "Print image classifier predictions for the given input file" - [predictions] - (println (apply str (repeat 80 "="))) - (doseq [p predictions] - (println p)) - (println (apply str (repeat 80 "=")))) - -(defn classify-single-image - "Classify a single image and print top-5 predictions" - [classifier input-image] - (let [image (infer/load-image-from-file input-image) - topk 5 - predictions (infer/classify-image classifier image topk)] - [predictions])) - -(defn classify-images-in-dir - "Classify all jpg images in the directory" - [classifier input-dir] - (let [batch-size 20 - image-file-batches (->> input-dir - io/file - file-seq - sort - reverse - (filter #(.isFile %)) - (filter #(re-matches #".*\.jpg$" (.getPath %))) - (mapv #(.getPath %)) - (partition-all batch-size))] - (apply concat (for [image-files image-file-batches] - (let [image-batch (infer/load-image-paths image-files) - topk 5] - (infer/classify-image-batch classifier image-batch topk)))))) - -(defn run-classifier - "Runs an image classifier based on options provided" - [options] - (let [{:keys [model-path-prefix input-image input-dir]} options - descriptors [{:name "data" - :shape [1 3 224 224] - :layout layout/NCHW - :dtype dtype/FLOAT32}] - factory (infer/model-factory model-path-prefix descriptors) - classifier (infer/create-image-classifier - factory {:contexts [(context/default-context)]})] - (println "Classifying a single image") - (print-predictions (classify-single-image classifier input-image)) - (println "\n") - (println "Classifying images in a directory") - (doseq [predictions (classify-images-in-dir classifier input-dir)] - (print-predictions predictions)))) - -(defn -main - [& args] - (let [{:keys [options summary errors] :as opts} - (parse-opts args cli-options)] - (cond - (:help options) (println summary) - (some? errors) (println (join "\n" errors)) - :else (run-classifier options)))) diff --git a/contrib/clojure-package/examples/infer/imageclassifier/test/infer/imageclassifier_example_test.clj b/contrib/clojure-package/examples/infer/imageclassifier/test/infer/imageclassifier_example_test.clj deleted file mode 100644 index 4b71f845dd5f..000000000000 --- a/contrib/clojure-package/examples/infer/imageclassifier/test/infer/imageclassifier_example_test.clj +++ /dev/null @@ -1,58 +0,0 @@ -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns infer.imageclassifier-example-test - (:require [infer.imageclassifier-example :refer [classify-single-image - classify-images-in-dir]] - [org.apache.clojure-mxnet.context :as context] - [org.apache.clojure-mxnet.dtype :as dtype] - [org.apache.clojure-mxnet.infer :as infer] - [org.apache.clojure-mxnet.layout :as layout] - [clojure.java.io :as io] - [clojure.java.shell :refer [sh]] - [clojure.test :refer :all])) - -(def model-dir "models/") -(def image-dir "images/") -(def model-path-prefix (str model-dir "resnet-18/resnet-18")) -(def image-file (str image-dir "kitten.jpg")) - -(when-not (.exists (io/file (str model-path-prefix "-symbol.json"))) - (sh "./scripts/get_resnet_18_data.sh")) - -(defn create-classifier [] - (let [descriptors [{:name "data" - :shape [1 3 224 224] - :layout layout/NCHW - :dtype dtype/FLOAT32}] - factory (infer/model-factory model-path-prefix descriptors)] - (infer/create-image-classifier factory))) - -(deftest test-single-classification - (let [classifier (create-classifier) - [[predictions]] (classify-single-image classifier image-file)] - (is (some? predictions)) - (is (= 5 (count predictions))) - (is (= "n02123159 tiger cat" (:class (first predictions)))) - (is (= (< 0 (:prob (first predictions)) 1))))) - -(deftest test-batch-classification - (let [classifier (create-classifier) - predictions (first (classify-images-in-dir classifier image-dir))] - (is (some? predictions)) - (is (= 5 (count predictions))) - (is (= "n02123159 tiger cat" (:class (first predictions)))) - (is (= (< 0 (:prob (first predictions)) 1))))) diff --git a/contrib/clojure-package/examples/infer/predictor/test/infer/predictor_example_test.clj b/contrib/clojure-package/examples/infer/predictor/test/infer/predictor_example_test.clj deleted file mode 100644 index 02f826fbb77f..000000000000 --- a/contrib/clojure-package/examples/infer/predictor/test/infer/predictor_example_test.clj +++ /dev/null @@ -1,51 +0,0 @@ -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns infer.predictor-example-test - (:require [infer.predictor-example :refer [preprocess - do-inference - postprocess]] - [org.apache.clojure-mxnet.context :as context] - [org.apache.clojure-mxnet.dtype :as dtype] - [org.apache.clojure-mxnet.infer :as infer] - [org.apache.clojure-mxnet.layout :as layout] - [clojure.java.io :as io] - [clojure.java.shell :refer [sh]] - [clojure.test :refer :all])) - -(def model-dir "models/") -(def image-file "images/kitten.jpg") -(def model-path-prefix (str model-dir "resnet-18/resnet-18")) -(def width 224) -(def height 224) - -(when-not (.exists (io/file (str model-path-prefix "-symbol.json"))) - (sh "./scripts/get_resnet_18_data.sh")) - -(defn create-predictor [] - (let [descriptors [{:name "data" - :shape [1 3 height width] - :layout layout/NCHW - :dtype dtype/FLOAT32}] - factory (infer/model-factory model-path-prefix descriptors)] - (infer/create-predictor factory))) - -(deftest predictor-test - (let [predictor (create-predictor) - image-ndarray (preprocess image-file width height) - predictions (do-inference predictor image-ndarray) - best-prediction (postprocess model-path-prefix predictions)] - (is (= "n02123159 tiger cat" best-prediction)))) diff --git a/contrib/clojure-package/examples/module/README.md b/contrib/clojure-package/examples/module/README.md deleted file mode 100644 index fcd16b74a173..000000000000 --- a/contrib/clojure-package/examples/module/README.md +++ /dev/null @@ -1,38 +0,0 @@ - - - - - - - - - - - - - - - - - -## Instructions - -This shows off how to use the module api. - -There are examples of: - - high level api of training and prediction - - intermediate level api with save and loading from checkpoints - - examples of how to iteratate through the batch and calculate accuracy and predict manually. - -To run the example you must do - -* `lein install` in the root of the main project directory -* cd into this project directory and do `lein run`. This will execute the cpu version. - -You can control the devices you run on by doing: - -`lein run :cpu 2` - This will run on 2 cpu devices -`lein run :gpu 1` - This will run on 1 gpu device -`lein run :gpu 2` - This will run on 2 gpu devices - - diff --git a/contrib/clojure-package/examples/module/project.clj b/contrib/clojure-package/examples/module/project.clj deleted file mode 100644 index 04e7fa140a06..000000000000 --- a/contrib/clojure-package/examples/module/project.clj +++ /dev/null @@ -1,25 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(defproject module-examples "0.1.0-SNAPSHOT" - :description "Clojure examples for module" - :plugins [[lein-cljfmt "0.5.7"]] - :dependencies [[org.clojure/clojure "1.9.0"] - [org.apache.mxnet.contrib.clojure/clojure-mxnet "2.0.0-SNAPSHOT"]] - :pedantic? :skip - :main mnist-mlp) - diff --git a/contrib/clojure-package/examples/module/src/mnist_mlp.clj b/contrib/clojure-package/examples/module/src/mnist_mlp.clj deleted file mode 100644 index c5ffbbede852..000000000000 --- a/contrib/clojure-package/examples/module/src/mnist_mlp.clj +++ /dev/null @@ -1,237 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns mnist-mlp - (:require [clojure.java.io :as io] - [clojure.java.shell :refer [sh]] - [org.apache.clojure-mxnet.context :as context] - [org.apache.clojure-mxnet.eval-metric :as eval-metric] - [org.apache.clojure-mxnet.io :as mx-io] - [org.apache.clojure-mxnet.module :as m] - [org.apache.clojure-mxnet.ndarray :as ndarray] - [org.apache.clojure-mxnet.optimizer :as optimizer] - [org.apache.clojure-mxnet.symbol :as sym] - [org.apache.clojure-mxnet.util :as util] - [org.apache.clojure-mxnet.ndarray :as ndarray]) - (:gen-class)) - -(def data-dir "data/") -(def batch-size 10) -(def num-epoch 5) - -(when-not (.exists (io/file (str data-dir "train-images-idx3-ubyte"))) - (sh "../../scripts/get_mnist_data.sh")) -;; for save checkpoints load checkpoints -(io/make-parents "model/dummy.txt") - -;;; Load the MNIST datasets -(defonce train-data (mx-io/mnist-iter {:image (str data-dir "train-images-idx3-ubyte") - :label (str data-dir "train-labels-idx1-ubyte") - :label-name "softmax_label" - :input-shape [784] - :batch-size batch-size - :shuffle true - :flat true - :silent false - :seed 10})) - -(defonce test-data (mx-io/mnist-iter {:image (str data-dir "t10k-images-idx3-ubyte") - :label (str data-dir "t10k-labels-idx1-ubyte") - :input-shape [784] - :batch-size batch-size - :flat true - :silent false})) -(defn get-symbol [] - (as-> (sym/variable "data") data - (sym/fully-connected "fc1" {:data data :num-hidden 128}) - (sym/activation "relu1" {:data data :act-type "relu"}) - (sym/fully-connected "fc2" {:data data :num-hidden 64}) - (sym/activation "relu2" {:data data :act-type "relu"}) - (sym/fully-connected "fc3" {:data data :num-hidden 10}) - (sym/softmax-output "softmax" {:data data}))) - -(defn- print-header [message] - (println "") - (println "=================") - (println (str " " message)) - (println "=================") - (println "")) - -(defn run-intermediate-level-api [& {:keys [devs load-model-epoch]}] - - (let [header "Running Intermediate Level API"] - (print-header (if load-model-epoch (str header " and loading from previous epoch " load-model-epoch) - header))) - - (let [save-prefix "model/mnist-mlp" - mod (if load-model-epoch - (do - (println "Loading from checkpoint of epoch " load-model-epoch) - (m/load-checkpoint {:contexts devs :prefix save-prefix :epoch load-model-epoch})) - (m/module (get-symbol) {:contexts devs})) - metric (eval-metric/accuracy)] - (-> mod - (m/bind {:data-shapes (mx-io/provide-data-desc train-data) :label-shapes (mx-io/provide-label-desc train-data)}) - (m/init-params) - (m/init-optimizer {:optimizer (optimizer/sgd {:learning-rate 0.01 :momentum 0.9})})) - - (doseq [epoch-num (range num-epoch)] - (println "starting epoch " epoch-num) - (mx-io/do-batches - train-data - (fn [batch] - (-> mod - (m/forward batch) - (m/update-metric metric (mx-io/batch-label batch)) - (m/backward) - (m/update)))) - (println "result for epoch " epoch-num " is " (eval-metric/get-and-reset metric)) - (m/save-checkpoint mod {:prefix save-prefix :epoch epoch-num :save-opt-states true})))) - -(defn run-high-level-api [devs] - (print-header "Running High Level API") - - (let [mod (m/module (get-symbol) {:contexts devs})] - ;;; note only one function for training - (m/fit mod {:train-data train-data :eval-data test-data :num-epoch num-epoch}) - - ;;high level predict (just a dummy call but it returns a vector of results - (m/predict mod {:eval-data test-data}) - - ;;;high level score (returs the eval values) - (let [score (m/score mod {:eval-data test-data :eval-metric (eval-metric/accuracy)})] - (println "High level predict score is " score)))) - -(defn run-predication-and-calc-accuracy-manually [devs] - ;;; Gathers all the predictions at once with `predict-every-batch` - ;;; then cycles thorugh the batches and manually calculates the accuracy stats - - (print-header "Running Predicting and Calcing the Accuracy Manually") - - (let [mod (m/module (get-symbol) {:contexts devs})] - ;;; note only one function for training - (m/fit mod {:train-data train-data :eval-data test-data :num-epoch num-epoch}) - (let [preds (m/predict-every-batch mod {:eval-data test-data}) - stats (mx-io/reduce-batches test-data - (fn [r b] - (let [pred-label (->> (ndarray/argmax-channel (first (get preds (:index r)))) - (ndarray/->vec) - (mapv int)) - label (->> (mx-io/batch-label b) - (first) - (ndarray/->vec) - (mapv int)) - acc-sum (apply + (mapv (fn [pl l] (if (= pl l) 1 0)) - pred-label label))] - (-> r - (update :index inc) - (update :acc-cnt (fn [v] (+ v (count pred-label)))) - (update :acc-sum (fn [v] (+ v - (apply + (mapv (fn [pl l] (if (= pl l) 1 0)) - pred-label label)))))))) - {:acc-sum 0 :acc-cnt 0 :index 0})] - (println "Stats: " stats) - (println "Accuracy: " (/ (:acc-sum stats) - (* 1.0 (:acc-cnt stats))))))) - -(defn run-prediction-iterator-api [devs] - ;;Cycles through all the batchs and manually predicts and prints out the accuracy - ;;using `predict-batch` - - (print-header "Running the Prediction Iterator API and Calcing the Accuracy Manually") - - (let [mod (m/module (get-symbol) {:contexts devs})] - ;;; note only one function for training - (m/fit mod {:train-data train-data :eval-data test-data :num-epoch num-epoch}) - (mx-io/reduce-batches test-data - (fn [r b] - (let [preds (m/predict-batch mod b) - pred-label (->> (ndarray/argmax-channel (first preds)) - (ndarray/->vec) - (mapv int)) - label (->> (mx-io/batch-label b) - (first) - (ndarray/->vec) - (mapv int)) - acc (/ (apply + (mapv (fn [pl l] (if (= pl l) 1 0)) pred-label label)) - (* 1.0 (count pred-label)))] - (println "Batch " r " acc: " acc) - (inc r)))))) - -(defn run-all [devs] - (run-intermediate-level-api :devs devs) - (run-intermediate-level-api :devs devs :load-model-epoch (dec num-epoch)) - (run-high-level-api devs) - (run-prediction-iterator-api devs) - (run-predication-and-calc-accuracy-manually devs)) - -(defn -main - [& args] - (let [[dev dev-num] args - devs (if (= dev ":gpu") - (mapv #(context/gpu %) (range (Integer/parseInt (or dev-num "1")))) - (mapv #(context/cpu %) (range (Integer/parseInt (or dev-num "1")))))] - (println "Running Module MNIST example") - (println "Running with context devices of" devs) - (run-all devs))) - -(comment - - ;;; run all the example functions - (run-all [(context/cpu)]) - - ;;; run for the number of epochs - (run-intermediate-level-api :devs [(context/cpu)]) - ;;=> starting epoch 0 - ;;=> result for epoch 0 is [accuracy 0.8531333] - ;;=> INFO ml.dmlc.mxnet.module.Module: Saved checkpoint to model/mnist-mlp-0000.params - ;;=> INFO ml.dmlc.mxnet.module.Module: Saved optimizer state to model/mnist-mlp-0000.states - ;;=> .... - ;;=> starting epoch 4 - ;;=> result for epoch 4 is [accuracy 0.91875] - ;;=> INFO ml.dmlc.mxnet.module.Module: Saved checkpoint to model/mnist-mlp-0004.params - ;;=> INFO ml.dmlc.mxnet.module.Module: Saved optimizer state to model/mnist-mlp-0004.states - - - ;; load from the last saved file and run again - (run-intermediate-level-api :devs [(context/cpu)] :load-model-epoch (dec num-epoch)) - ;;=> Loading from checkpoint of epoch 4 - ;;=> starting epoch 0 - ;;=> result for epoch 0 is [accuracy 0.96258336] - ;;=> INFO ml.dmlc.mxnet.module.Module: Saved checkpoint to model/mnist-mlp-0000.params - ;;=> INFO ml.dmlc.mxnet.module.Module: Saved optimizer state to model/mnist-mlp-0000.states - ;;=> ... - ;;=> starting epoch 4 - ;;=> result for epoch 4 is [accuracy 0.9819833] - ;;=> INFO ml.dmlc.mxnet.module.Module: Saved checkpoint to model/mnist-mlp-0004.params - ;;=> INFO ml.dmlc.mxnet.module.Module: Saved optimizer state to model/mnist-mlp-0004.states - - (run-high-level-api [(context/cpu)]) - ;;=> ["accuracy" 0.9454] - - (run-prediction-iterator-api [(context/cpu)]) - ;;=> Batch 0 acc: 1.0 - ;;=> Batch 1 acc: 0.9 - ;;=> Batch 2 acc: 1.0 - ;;=> ... - ;;=> Batch 999 acc: 1.0 - - (run-predication-and-calc-accuracy-manually [(context/cpu)]) - ;;=> Stats: {:acc-sum 9494, :acc-cnt 10000, :index 1000} - ;;=> Accuracy: 0.9494 -) - diff --git a/contrib/clojure-package/examples/module/test/mnist_mlp_test.clj b/contrib/clojure-package/examples/module/test/mnist_mlp_test.clj deleted file mode 100644 index 5fbcdd3c0b39..000000000000 --- a/contrib/clojure-package/examples/module/test/mnist_mlp_test.clj +++ /dev/null @@ -1,29 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; -(ns mnist-mlp-test - (:require - [mnist-mlp :refer :all] - [org.apache.clojure-mxnet.context :as context] - [clojure.test :refer :all])) - -(deftest run-those-tests - (let [devs [(context/cpu)]] - (run-intermediate-level-api :devs devs) - (run-intermediate-level-api :devs devs :load-model-epoch (dec num-epoch)) - (run-high-level-api devs) - (run-prediction-iterator-api devs) - (run-predication-and-calc-accuracy-manually devs))) \ No newline at end of file diff --git a/contrib/clojure-package/examples/multi-label/.gitignore b/contrib/clojure-package/examples/multi-label/.gitignore deleted file mode 100644 index c53038ec0e3d..000000000000 --- a/contrib/clojure-package/examples/multi-label/.gitignore +++ /dev/null @@ -1,11 +0,0 @@ -/target -/classes -/checkouts -pom.xml -pom.xml.asc -*.jar -*.class -/.lein-* -/.nrepl-port -.hgignore -.hg/ diff --git a/contrib/clojure-package/examples/multi-label/README.md b/contrib/clojure-package/examples/multi-label/README.md deleted file mode 100644 index a2ea7e01e686..000000000000 --- a/contrib/clojure-package/examples/multi-label/README.md +++ /dev/null @@ -1,44 +0,0 @@ - - - - - - - - - - - - - - - - - -# multi-label - -This is a quick example of doing multi-label classification. -It involves using a proxy to implement the DataIter to make a custom -data iterator for MNIST - - -## Installation - -Before you run this example, make sure that you have the clojure package installed. -In the main clojure package directory, do `lein install`. Then you can run -`lein install` in this directory. - -## Usage - -To run -`lein run`. This will execute the cpu version. - -You can control the devices you run on by doing: - -`lein run :cpu` - This will run on 1 cpu device -`lein run :gpu` - This will run on 1 gpu device - -This example only works on 1 device - - - diff --git a/contrib/clojure-package/examples/multi-label/project.clj b/contrib/clojure-package/examples/multi-label/project.clj deleted file mode 100644 index c8573bfc2e94..000000000000 --- a/contrib/clojure-package/examples/multi-label/project.clj +++ /dev/null @@ -1,23 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(defproject multi-label "0.1.0-SNAPSHOT" - :description "Example of multi-label classification" - :plugins [[lein-cljfmt "0.5.7"]] - :dependencies [[org.clojure/clojure "1.9.0"] - [org.apache.mxnet.contrib.clojure/clojure-mxnet "2.0.0-SNAPSHOT"]] - :main multi-label.core) diff --git a/contrib/clojure-package/examples/multi-label/src/multi_label/core.clj b/contrib/clojure-package/examples/multi-label/src/multi_label/core.clj deleted file mode 100644 index e96783daad98..000000000000 --- a/contrib/clojure-package/examples/multi-label/src/multi_label/core.clj +++ /dev/null @@ -1,167 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns multi-label.core - (:require [clojure.java.io :as io] - [clojure.java.shell :refer [sh]] - [org.apache.clojure-mxnet.eval-metric :as eval-metric] - [org.apache.clojure-mxnet.io :as mx-io] - [org.apache.clojure-mxnet.module :as m] - [org.apache.clojure-mxnet.ndarray :as ndarray] - [org.apache.clojure-mxnet.symbol :as sym] - [org.apache.clojure-mxnet.shape :as mx-shape] - [org.apache.clojure-mxnet.util :as util] - [org.apache.clojure-mxnet.context :as context]) - (:import (org.apache.mxnet DataIter) - (java.util NoSuchElementException)) - (:gen-class)) - -(def data-dir "data/") -(def batch-size 100) -(def num-epoch 1) - -(when-not (.exists (io/file (str data-dir "train-images-idx3-ubyte"))) - (sh "../../scripts/get_mnist_data.sh")) - -;;; Load the MNIST datasets -(defonce train-data (mx-io/mnist-iter {:image (str data-dir "train-images-idx3-ubyte") - :label (str data-dir "train-labels-idx1-ubyte") - :label-name "softmax_label" - :input-shape [784] - :batch-size batch-size - :shuffle true - :flat true - :silent false - :seed 10})) - -(defonce test-data (mx-io/mnist-iter {:image (str data-dir "t10k-images-idx3-ubyte") - :label (str data-dir "t10k-labels-idx1-ubyte") - :input-shape [784] - :batch-size batch-size - :flat true - :silent false})) -(defn build-network [] - (let [fc3 (as-> (sym/variable "data") data - (sym/fully-connected "fc1" {:data data :num-hidden 128}) - (sym/activation "relu1" {:data data :act-type "relu"}) - (sym/fully-connected "fc2" {:data data :num-hidden 64}) - (sym/activation "relu2" {:data data :act-type "relu"}) - (sym/fully-connected "fc3" {:data data :num-hidden 10})) - sm1 (sym/softmax-output "softmax1" {:data fc3}) - sm2 (sym/softmax-output "softmax2" {:data fc3})] - (sym/group [sm1 sm2]))) - -;;; provide an override proxy to the DataIter Scala class -(def multi-train-data (let [data-iter train-data] - (proxy [DataIter] [] - (hasNext [] - (mx-io/has-next? data-iter)) - (next [] - (if (mx-io/has-next? data-iter) - (let [batch (mx-io/next data-iter)] - (mx-io/data-batch {:data (util/scala-vector->vec - (.getData data-iter)) - :label (let [label (first - (util/scala-vector->vec (.getLabel data-iter)))] - [label label]) - :index (util/scala-vector->vec - (.getIndex data-iter)) - :pad (.pad batch)})) - (throw (new NoSuchElementException)))) - (reset [] - (mx-io/reset data-iter)) - (batchSize [] - (.batchSize data-iter)) - (getData [] - (.getData data-iter)) - (getLabel [] - (let [label (first (util/scala-vector->vec (.getLabel data-iter)))] (util/vec->indexed-seq [label label]))) - (getIndex [] - (.getIndex data-iter)) - (getPad [] - (.getPad data-iter)) - (provideLabel [] - (let [shape (->> (mx-io/provide-label data-iter) - (first) - (vals) - last)] - (util/list-map - {"softmax1_label" (mx-shape/->shape shape) - "softmax2_label" (mx-shape/->shape shape)}))) - (provideData [] - (.provideData data-iter))))) - -(defn train [devs] - (let [network (build-network) - data-and-labels (->> (into (mx-io/provide-data multi-train-data) - (mx-io/provide-label multi-train-data)) - (mapcat vals) - (apply hash-map)) - [arg-shapes output-shapes aux-shapes] (sym/infer-shape network data-and-labels) - arg-names (sym/list-arguments network) - aux-names (sym/list-auxiliary-states network) - arg-params (zipmap arg-names (mapv #(ndarray/empty %) arg-shapes)) - aux-params (zipmap aux-names (mapv #(ndarray/empty %) aux-shapes)) - metric (eval-metric/custom-metric - (fn [labels preds] - (println "Carin labels " labels) - (println "Carin preds " preds) - (float 0.5)) - "multi-accuracy") - mod (-> (m/module network {:contexts devs}) - (m/bind {:data-shapes (mx-io/provide-data multi-train-data) - :label-shapes (mx-io/provide-label multi-train-data)}) - (m/init-params {:arg-params arg-params :aux-params aux-params}) - (m/init-optimizer))] - (doseq [i (range 1)] - (println "Doing epoch " i) - (let [acc (mx-io/reduce-batches - multi-train-data - (fn [r b] - (let [labels (mx-io/batch-label b) - preds (-> (m/forward mod b) - (m/outputs)) - accs (mapv (fn [p l] - (let [pred-label (->> (ndarray/argmax-channel (first p)) - (ndarray/->vec) - (mapv int)) - label (->> (ndarray/->vec l) - (mapv int))] - (* 1.0 (apply + (mapv (fn [pl l] (if (= pl l) 1 0)) - pred-label label))))) - preds labels)] - (-> mod - (m/backward) - (m/update)) - (-> r - (update :sum #(mapv (fn [o n] (+ o n)) % accs)) - (update :batch-num inc)))) - {:sum [0 0] :batch-num 0})] - (println "Multi-accuracy " acc) - (println "Multi-accuracy " (mapv #(/ % (:batch-num acc)) (:sum acc))))))) - -(defn -main [& args] - (let [[dev dev-num] args - devs (if (= dev ":gpu") - (mapv #(context/gpu %) (range (Integer/parseInt (or dev-num "1")))) - (mapv #(context/cpu %) (range (Integer/parseInt (or dev-num "1")))))] - (println "Training...") - (println "Running with context devices of" devs) - (train devs))) - -(comment - (train [(context/cpu)])) diff --git a/contrib/clojure-package/examples/multi-label/test/multi_label_test.clj b/contrib/clojure-package/examples/multi-label/test/multi_label_test.clj deleted file mode 100644 index 446a84626e72..000000000000 --- a/contrib/clojure-package/examples/multi-label/test/multi_label_test.clj +++ /dev/null @@ -1,26 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns multi_label_test - (:require - [multi-label.core :as label] - [clojure.java.io :as io] - [org.apache.clojure-mxnet.context :as context] - [clojure.test :refer :all])) - -(deftest run-multi-label - (label/train [(context/cpu)])) \ No newline at end of file diff --git a/contrib/clojure-package/examples/pre-trained-models/src/pre_trained_models/fine_tune.clj b/contrib/clojure-package/examples/pre-trained-models/src/pre_trained_models/fine_tune.clj deleted file mode 100644 index 93c121f9fc16..000000000000 --- a/contrib/clojure-package/examples/pre-trained-models/src/pre_trained_models/fine_tune.clj +++ /dev/null @@ -1,127 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns pre-trained-models.fine-tune - (:require [clojure.string :as string] - [org.apache.clojure-mxnet.callback :as callback] - [org.apache.clojure-mxnet.context :as context] - [org.apache.clojure-mxnet.initializer :as init] - [org.apache.clojure-mxnet.io :as mx-io] - [org.apache.clojure-mxnet.module :as m] - [org.apache.clojure-mxnet.ndarray :as ndarray] - [org.apache.clojure-mxnet.symbol :as sym]) - (:gen-class)) - -;;; From the finetune example https://mxnet.incubator.apache.org/faq/finetune.html - -;; run download-resnet-50.sh to get the model params and json -;; and download-caltech.sh to get the pregenerated rec files - -(def model-dir "model") -(def batch-size 16) - -;;; image set is http://www.vision.caltech.edu/Image_Datasets/Caltech101/ -;; Pictures of objects belonging to 101 categories. About 40 to 800 images per category. Most categories have about 50 images - -(def train-iter (mx-io/image-record-iter - {:path-imgrec "caltech-256/caltech-256-60-train.rec" - :data-name "data" - :label-name "softmax_label" - :batch-size batch-size - :data-shape [3 224 224] - :shuffle true - :rand-crop true - :rand-mirror true})) - -(def val-iter (mx-io/image-record-iter - {:path-imgrec "caltech-256/caltech-256-60-val.rec" - :data-name "data" - :label-name "softmax_label" - :batch-size batch-size - :data-shape [3 224 224] - :rand-crop false - :rand-mirror false})) - -(defn get-model [] - (let [mod (m/load-checkpoint {:prefix (str model-dir "/resnet-50") :epoch 0})] - {:msymbol (m/symbol mod) - :arg-params (m/arg-params mod) - :aux-params (m/aux-params mod)})) - -(defn get-fine-tune-model - "msymbol: the pretrained network symbol - arg-params: the argument parameters of the pretrained model - num-classes: the number of classes for the fine-tune datasets - layer-name: the layer name before the last fully-connected layer" - [{:keys [msymbol arg-params num-classes layer-name] - :or {layer-name "flatten0"}}] - (let [all-layers (sym/get-internals msymbol) - net (sym/get all-layers (str layer-name "_output"))] - {:net (as-> net data - (sym/fully-connected "fc1" {:data data :num-hidden num-classes}) - (sym/softmax-output "softmax" {:data data})) - :new-args (->> arg-params - (remove (fn [[k v]] (string/includes? k "fc1"))) - (into {}))})) - -(defn fit [devs msymbol arg-params aux-params] - (let [mod (-> (m/module msymbol {:contexts devs}) - (m/bind {:data-shapes (mx-io/provide-data-desc train-iter) :label-shapes (mx-io/provide-label-desc val-iter)}) - (m/init-params {:arg-params arg-params :aux-params aux-params - :allow-missing true}))] - (m/fit mod - {:train-data train-iter - :eval-data val-iter - :num-epoch 1 - :fit-params (m/fit-params {:intializer (init/xavier {:rand-type "gaussian" - :factor-type "in" - :magnitude 2}) - :batch-end-callback (callback/speedometer batch-size 10)})}))) - -(defn fine-tune! [devs] - (let [{:keys [msymbol arg-params aux-params] :as model} (get-model) - {:keys [net new-args]} (get-fine-tune-model (merge model {:num-classes 256}))] - (fit devs net new-args arg-params))) - -(defn -main [& args] - (let [[dev dev-num] args - devs (if (= dev ":gpu") - (mapv #(context/gpu %) (range (Integer/parseInt (or dev-num "1")))) - (mapv #(context/cpu %) (range (Integer/parseInt (or dev-num "1")))))] - (println "Running with context devices of" devs) - (fine-tune! devs))) - -(comment - - (fine-tune! [(context/cpu)]) - -;INFO ml.dmlc.mxnet.Callback$Speedometer: Epoch[0] Batch [10] Speed: 3.61 samples/sec Train-accuracy=0.000000 -;; INFO ml.dmlc.mxnet.Callback$Speedometer: Epoch[0] Batch [20] Speed: 3.49 samples/sec Train-accuracy=0.005952 -;; INFO ml.dmlc.mxnet.Callback$Speedometer: Epoch[0] Batch [30] Speed: 3.58 samples/sec Train-accuracy=0.012097 -;; INFO ml.dmlc.mxnet.Callback$Speedometer: Epoch[0] Batch [40] Speed: 3.49 samples/sec Train-accuracy=0.013720 -;; INFO ml.dmlc.mxnet.Callback$Speedometer: Epoch[0] Batch [50] Speed: 3.51 samples/sec Train-accuracy=0.017157 -;; INFO ml.dmlc.mxnet.Callback$Speedometer: Epoch[0] Batch [60] Speed: 3.56 samples/sec Train-accuracy=0.017418 -;; INFO ml.dmlc.mxnet.Callback$Speedometer: Epoch[0] Batch [70] Speed: 3.56 samples/sec Train-accuracy=0.023768 -;; INFO ml.dmlc.mxnet.Callback$Speedometer: Epoch[0] Batch [80] Speed: 3.10 samples/sec Train-accuracy=0.024691 -;; INFO ml.dmlc.mxnet.Callback$Speedometer: Epoch[0] Batch [90] Speed: 3.27 samples/sec Train-accuracy=0.028846 -;; INFO ml.dmlc.mxnet.Callback$Speedometer: Epoch[0] Batch [100] Speed: 3.42 samples/sec Train-accuracy=0.033416 -;; INFO ml.dmlc.mxnet.Callback$Speedometer: Epoch[0] Batch [110] Speed: 3.46 samples/sec Train-accuracy=0.034910 -;; INFO ml.dmlc.mxnet.Callback$Speedometer: Epoch[0] Batch [120] Speed: 3.44 samples/sec Train-accuracy=0.040806 -;; INFO ml.dmlc.mxnet.Callback$Speedometer: Epoch[0] Batch [130] Speed: 3.41 samples/sec Train-accuracy=0.043893 -;; INFO ml.dmlc.mxnet.Callback$Speedometer: Epoch[0] Batch [140] Speed: 3.42 samples/sec Train-accuracy=0.045213 -) - diff --git a/contrib/clojure-package/examples/rnn/.gitignore b/contrib/clojure-package/examples/rnn/.gitignore deleted file mode 100644 index c53038ec0e3d..000000000000 --- a/contrib/clojure-package/examples/rnn/.gitignore +++ /dev/null @@ -1,11 +0,0 @@ -/target -/classes -/checkouts -pom.xml -pom.xml.asc -*.jar -*.class -/.lein-* -/.nrepl-port -.hgignore -.hg/ diff --git a/contrib/clojure-package/examples/rnn/README.md b/contrib/clojure-package/examples/rnn/README.md deleted file mode 100644 index 1cedc179612a..000000000000 --- a/contrib/clojure-package/examples/rnn/README.md +++ /dev/null @@ -1,44 +0,0 @@ - - - - - - - - - - - - - - - - - -# rnn - - -Demonstration of LSTM RNN trainined using Obamas text - -## Installation - -Before you run this example, make sure that you have the clojure package installed. -In the main clojure package directory, do `lein install`. Then you can run -`lein install` in this directory. - - -## Usage - - -run `./get_data.sh to download the training corpus as well as pretrained model. - -Run `lein run` to start training the corpus from scratch for 2 epochs and then -show the result of training after 75 epochs (cpu) - -You can control the devices you run on by doing: - -`lein run :cpu 2` - This will run on 2 cpu devices -`lein run :gpu 1` - This will run on 1 gpu device -`lein run :gpu 2` - This will run on 2 gpu devices - - diff --git a/contrib/clojure-package/examples/rnn/get_data.sh b/contrib/clojure-package/examples/rnn/get_data.sh deleted file mode 100755 index 4e4a2dc3e4a1..000000000000 --- a/contrib/clojure-package/examples/rnn/get_data.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -evx - -mkdir data -cd data -wget http://data.mxnet.io/mxnet/data/char_lstm.zip -unzip char_lstm.zip -cd .. diff --git a/contrib/clojure-package/examples/rnn/project.clj b/contrib/clojure-package/examples/rnn/project.clj deleted file mode 100644 index 40c61eeedede..000000000000 --- a/contrib/clojure-package/examples/rnn/project.clj +++ /dev/null @@ -1,23 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(defproject rnn "0.1.0-SNAPSHOT" - :description "RNN example" - :plugins [[lein-cljfmt "0.5.7"]] - :dependencies [[org.clojure/clojure "1.9.0"] - [org.apache.mxnet.contrib.clojure/clojure-mxnet "2.0.0-SNAPSHOT"]] - :main rnn.train-char-rnn) diff --git a/contrib/clojure-package/examples/rnn/src/rnn/lstm.clj b/contrib/clojure-package/examples/rnn/src/rnn/lstm.clj deleted file mode 100644 index fb3a8f352dee..000000000000 --- a/contrib/clojure-package/examples/rnn/src/rnn/lstm.clj +++ /dev/null @@ -1,192 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns rnn.lstm - (:require [org.apache.clojure-mxnet.context :as context] - [org.apache.clojure-mxnet.executor :as executor] - [org.apache.clojure-mxnet.ndarray :as ndarray] - [org.apache.clojure-mxnet.symbol :as sym])) - -(defn lstm-param [i2h-weight i2h-bias - h2h-weight h2h-bias] - {:i2h-weight i2h-weight :i2h-bias i2h-bias - :h2h-weight h2h-weight :h2h-bias h2h-bias}) - -(defn lstm-state [c h] - {:c c :h h}) - -(defn lstm [num-hidden in-data prev-state param seq-idx layer-idx dropout] - (let [in-dataa (if (pos? dropout) - (sym/dropout {:data in-data :p dropout}) - in-data) - i2h (sym/fully-connected (str "t" seq-idx "_l" layer-idx "_i2h") - {:data in-dataa :weight (:i2h-weight param) - :bias (:i2h-bias param) :num-hidden (* num-hidden 4)}) - h2h (sym/fully-connected (str "t" seq-idx "_l" layer-idx "_h2h") - {:data (:h prev-state) :weight (:h2h-weight param) - :bias (:h2h-bias param) :num-hidden (* num-hidden 4)}) - gates (sym/+ i2h h2h) - slice-gates (sym/slice-channel (str "t" seq-idx "_l" layer-idx "_slice") - {:data gates :num-outputs 4}) - in-gate (sym/activation {:data (sym/get slice-gates 0) :act-type "sigmoid"}) - in-transform (sym/activation {:data (sym/get slice-gates 1) :act-type "tanh"}) - forget-gate (sym/activation {:data (sym/get slice-gates 2) :act-type "sigmoid"}) - out-gate (sym/activation {:data (sym/get slice-gates 3) :act-type "sigmoid"}) - next-c (sym/+ (sym/* forget-gate (:c prev-state)) - (sym/* in-gate in-transform)) - next-h (sym/* out-gate (sym/activation {:data next-c :act-type "tanh"}))] - (lstm-state next-c next-h))) - -(defn lstm-unroll [num-lstm-layer seq-len input-size num-hidden num-embed num-label dropout] - (let [embed-weight (sym/variable "embed_weight") - cls-weight (sym/variable "cls_weight") - cls-bias (sym/variable "cls_bias") - param-cells (mapv (fn [i] - (lstm-param (sym/variable (str "l" i "_i2h_weight")) - (sym/variable (str "l" i "_i2h_bias")) - (sym/variable (str "l" i "_h2h_weight")) - (sym/variable (str "l" i "_h2h_bias")))) - (range 0 num-lstm-layer)) - last-states (mapv (fn [i] - (lstm-state (sym/variable (str "l" i "_init_c_beta")) - (sym/variable (str "l" i "_init_h_beta")))) - (range 0 num-lstm-layer)) - ;; embedding layer - data (sym/variable "data") - label (sym/variable "softmax_label") - embed (sym/embedding "embed" {:data data :input-dim input-size :weight embed-weight - :output-dim num-embed}) - wordvec (sym/slice-channel {:data embed :num-outputs seq-len :squeeze-axis 1}) - dp-ratio 0 - ;; stack lstm - hidden-all (doall (for [seq-idx (range seq-len)] - (let [hidden (:h (last (loop [i 0 - hidden (sym/get wordvec seq-idx) - next-states []] - (if (= i num-lstm-layer) - next-states - (let [dp-ratio (if (zero? i) 0 dropout) - next-state (lstm num-hidden - hidden - (get last-states i) - (get param-cells i) - seq-idx - i - dp-ratio)] - (recur (inc i) - (:h next-state) - (conj next-states next-state)))))))] - (if (pos? dropout) - (sym/dropout {:data hidden :p dropout}) - hidden)))) - hidden-concat (sym/concat "concat" nil hidden-all {:dim 0}) - pred (sym/fully-connected "pred" {:data hidden-concat :num-hidden num-label - :weight cls-weight :bias cls-bias}) - label (sym/transpose {:data label}) - label (sym/reshape {:data label :target-shape [0]}) - sm (sym/softmax-output "softmax" {:data pred :label label})] - sm)) - -(defn lstm-inference-symbol [num-lstm-layer input-size num-hidden - num-embed num-label dropout] - (let [seq-idx 0 - embed-weight (sym/variable "embed_weight") - cls-weight (sym/variable "cls_weight") - cls-bias (sym/variable "cls_bias") - param-cells (mapv (fn [i] - (lstm-param (sym/variable (str "l" i "_i2h_weight")) - (sym/variable (str "l" i "_i2h_bias")) - (sym/variable (str "l" i "_h2h_weight")) - (sym/variable (str "l" i "_h2h_bias")))) - (range 0 num-lstm-layer)) - last-states (mapv (fn [i] - (lstm-state (sym/variable (str "l" i "_init_c_beta")) - (sym/variable (str "l" i "_init_h_beta")))) - (range 0 num-lstm-layer)) - data (sym/variable "data") - dp-ratio 0 - ;; stack lstm - next-states (loop [i 0 - hidden (sym/embedding "embed" {:data data :input-dim input-size :weight embed-weight :output-dim num-embed}) - next-states []] - (if (= i num-lstm-layer) - next-states - (let [dp-ratio (if (zero? i) 0 dropout) - next-state (lstm num-hidden - hidden - (get last-states i) - (get param-cells i) - seq-idx - i - dp-ratio)] - (recur (inc i) - (:h next-state) - (conj next-states next-state))))) - ;;; decoder - hidden (:h (last next-states)) - hidden (if (pos? dropout) (sym/dropout {:data hidden :p dropout}) hidden) - fc (sym/fully-connected "pred" {:data hidden :num-hidden num-label - :weight cls-weight :bias cls-bias}) - sm (sym/softmax-output "softmax" {:data fc}) - outs (into [sm] (mapcat (fn [next-s] (vals next-s)) next-states))] - (sym/group outs))) - -(defn lstm-inference-model [{:keys [num-lstm-layer input-size num-hidden - num-embed num-label arg-params - ctx dropout] - :or {ctx (context/cpu) - dropout 0.0}}] - - (let [lstm-sym (lstm-inference-symbol num-lstm-layer - input-size - num-hidden - num-embed - num-label - dropout) - batch-size 1 - init-c (into {} (map (fn [l] - {(str "l" l "_init_c_beta") [batch-size num-hidden]}) - (range num-lstm-layer))) - init-h (into {} (map (fn [l] - {(str "l" l "_init_h_beta") [batch-size num-hidden]})) - (range num-lstm-layer)) - data-shape {"data" [batch-size]} - input-shape (merge init-c init-h data-shape) - exec (sym/simple-bind lstm-sym ctx input-shape) - exec-arg-map (executor/arg-map exec) - states-map (zipmap (mapcat (fn [i] [(str "l" i "_init_c_beta") - (str "l" i "_init_h_beta")]) - (range num-lstm-layer)) - (rest (executor/outputs exec)))] - (doseq [[k v] arg-params] - (if-let [target-v (get exec-arg-map k)] - (when (and (not (get input-shape k)) - (not= "softmax_label" k)) - (ndarray/copy-to v target-v)))) - {:exec exec - :states-map states-map})) - -(defn forward [{:keys [exec states-map] :as lstm-model} input-data new-seq] - (when new-seq - (doseq [[k v] states-map] - (ndarray/set (get (executor/arg-map exec) k) 0))) - (do - (ndarray/copy-to input-data (get (executor/arg-map exec) "data")) - (executor/forward exec) - (doseq [[k v] states-map] - (ndarray/copy-to v (get (executor/arg-map exec) k))) - (first (executor/outputs exec)))) diff --git a/contrib/clojure-package/examples/rnn/src/rnn/test_char_rnn.clj b/contrib/clojure-package/examples/rnn/src/rnn/test_char_rnn.clj deleted file mode 100644 index 22a2982f222b..000000000000 --- a/contrib/clojure-package/examples/rnn/src/rnn/test_char_rnn.clj +++ /dev/null @@ -1,82 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns rnn.test-char-rnn - (:require [clojure.string :as string] - [clojure.java.shell :refer [sh]] - [rnn.util :as util] - [rnn.lstm :as lstm] - [org.apache.clojure-mxnet.context :as context] - [org.apache.clojure-mxnet.executor :as executor] - [org.apache.clojure-mxnet.module :as m] - [org.apache.clojure-mxnet.ndarray :as ndarray])) - -(when-not (.exists (clojure.java.io/file "data")) - (do (println "Retrieving data...") (sh "./get_data.sh"))) - -(def data-path "data/obama.txt") -(def model-prefix) -(def start-sentence "The joke ") -(def num-hidden 512) ;; hidden unit in LSTM cell -(def num-embed 256) ;; the embedding dim (a char is mapped to 256 dim) -(def num-lstm-layer 3) ;; number of lstm layers - -(def vocab (util/build-vocab data-path)) - -(defn rnn-test [model-prefix epoch-num seq-length random?] - (let [trained-mod (m/load-checkpoint {:prefix model-prefix :epoch epoch-num}) - trained-arg-params (m/arg-params trained-mod) - model (lstm/lstm-inference-model {:num-lstm-layer 3 - :input-size (inc (count vocab)) - :num-label (inc (count vocab)) - :num-hidden num-hidden - :num-embed num-embed - :arg-params trained-arg-params}) - input-ndarray (ndarray/zeros [1]) - revert-vocab (util/make-revert-vocab vocab) - fix-dict (into [""] - (mapv #(str (get revert-vocab %)) - (sort (vals vocab)))) - random-sample random? ;; use this to do random sample or max prob - ignore-length (count start-sentence)] - (println "Starter sentence: " start-sentence) - (println "===") - (loop [i 0 - new-sentence true - output start-sentence] - (if (= seq-length i) - output - (do - (if (<= i (dec ignore-length)) - (util/make-input (get start-sentence i) vocab input-ndarray) - (util/make-input (last output) vocab input-ndarray)) - (let [prob (ndarray/->vec (lstm/forward model input-ndarray new-sentence)) - next-char (util/make-output prob fix-dict random-sample)] - (recur (inc i) - (if (= "" next-char) true false) - (if (< i (dec ignore-length)) - output - (str output next-char))))))))) - -(comment - - (rnn-test "data/obama" 75 200 false) - ;=>"The joke that we can start by the challenges of the American people. The American people have been talking about how to compete with the streets of San Antonio who the courage to come together as one " - - (rnn-test "data/obama" 75 200 true) - ;=>"The joke before them prepared for five years ago, we only hear a chance to lose our efforts and they made striggling procedural deficit at the city between a politics in the efforts on the Edmund Pett" -) diff --git a/contrib/clojure-package/examples/rnn/src/rnn/train_char_rnn.clj b/contrib/clojure-package/examples/rnn/src/rnn/train_char_rnn.clj deleted file mode 100644 index 41a764f7af95..000000000000 --- a/contrib/clojure-package/examples/rnn/src/rnn/train_char_rnn.clj +++ /dev/null @@ -1,181 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns rnn.train-char-rnn - (:require [clojure.string :as string] - [clojure.java.shell :refer [sh]] - [rnn.util :as util] - [rnn.lstm :as lstm] - [rnn.test-char-rnn :as test-rnn] - [org.apache.clojure-mxnet.context :as context] - [org.apache.clojure-mxnet.callback :as callback] - [org.apache.clojure-mxnet.executor :as executor] - [org.apache.clojure-mxnet.eval-metric :as eval-metric] - [org.apache.clojure-mxnet.io :as mx-io] - [org.apache.clojure-mxnet.initializer :as init] - [org.apache.clojure-mxnet.ndarray :as ndarray] - [org.apache.clojure-mxnet.optimizer :as optimizer] - [org.apache.clojure-mxnet.symbol :as sym] - [org.apache.clojure-mxnet.module :as m]) - (:gen-class)) - -;;https://github.com/apache/incubator-mxnet/blob/master/example/rnn/old/char-rnn.ipynb - -(when-not (.exists (clojure.java.io/file "data")) - (do (println "Retrieving data...") (sh "./get_data.sh"))) - -;; batch size for training -(def batch-size 32) -;; we can support various length input -;; for this problem, we cut each input sentence to length of 129 -;; so we only need a fixed lenght bucket -(def buckets [129]) -;;hidden unit in LSTM cell -(def num-hidden 512) -;; embedding dim which is map a char to a 256 dim vector -(def num-embed 256) -;; number of lstm layer -(def num-lstm-layer 3) -;; we will show a quick demo in 2 epoch and we will see the result -;; by training 75 epoch -(def num-epoch 75) -;; learning rate -(def learning-rate 0.01) -;; we will use pure sgd without momentum -(def momentum 0.0) - -(def ctx (context/cpu)) ;; change to gpu if desired -(def data-path "data/obama.txt") -(def vocab (util/build-vocab data-path)) - -;; generate the symbol for a length -(defn sym-gen [seq-len] - (lstm/lstm-unroll num-lstm-layer seq-len (inc (count vocab)) - num-hidden num-embed (inc (count vocab)) 0.2)) - -;;; in the case of this fixed bucketing that only uses one bucket size - it is the equivalent of padpadding all sentences to a fixed length. -;; we are going to use ndarray-iter for this -;; converting the bucketing-iter over to use is todo. We could either push for the example Scala one to be included in the base package and interop with that (which would be nice for other rnn needs too) or hand convert it over ourselves - - -(defn build-training-data [path] - (let [content (slurp path) - sentences (string/split content #"\n") - max-length (first buckets) - padding-int 0] - (doall (for [sentence sentences] - (let [ids (mapv #(get vocab %) sentence)] - (if (>= (count ids) max-length) - (into [] (take max-length ids)) - (into ids (repeat (- max-length (count ids)) 0)))))))) - -(defn build-labels [train-data] - ;; want to learn the next char some rotate by 1 - (doall (mapv (fn [sent-data] (conj (into [] (rest sent-data)) 0)) - train-data))) - -(defn data-desc->map [data-desc] - (->> data-desc - (map vals) - (first) - (apply hash-map))) - -(defn train [devs] - (let [;; initialize the states for the lstm - init-c (into {} (map (fn [l] - {(str "l" l "_init_c_beta") [batch-size num-hidden]}) - (range num-lstm-layer))) - init-h (into {} (map (fn [l] - {(str "l" l "_init_h_beta") [batch-size num-hidden]})) - (range num-lstm-layer)) - init-states (merge init-c init-h) - train-data (build-training-data data-path) - labels (build-labels train-data) - sent-len (first buckets) - train-iter (mx-io/ndarray-iter [(ndarray/array (flatten train-data) - [(count train-data) sent-len])] - {:label [(ndarray/array (flatten labels) - [(count labels) sent-len])] - :label-name "softmax_label" - :data-batch-size batch-size - :last-batch-handle "pad"}) - data-and-labels (merge (data-desc->map (mx-io/provide-data-desc train-iter)) - (data-desc->map (mx-io/provide-label-desc train-iter)) - init-states) - init-states-data (mapv (fn [[k v]] (ndarray/zeros v {:ctx ctx})) init-states) - rnn-sym (sym-gen (first buckets)) - - rnn-mod (-> (m/module rnn-sym {:contexts devs}) - (m/bind {:data-shapes (into (mx-io/provide-data-desc train-iter) - (mapv (fn [[k v]] {:name k :shape v}) init-states)) - :label-shapes (mx-io/provide-label-desc train-iter)}) - (m/init-params {:initializer (init/xavier {:factor-type "in" :magnitude 2.34})}) - (m/init-optimizer {:optimizer (optimizer/adam {:learning-rate learning-rate :wd 0.0001})})) - metric (eval-metric/custom-metric - (fn [label pred] - (let [labels (ndarray/->vec (ndarray/transpose label)) - pred-shape (ndarray/shape-vec pred) - size (apply * (ndarray/shape-vec label)) - preds (mapv #(into [] %) (doall - (partition (last pred-shape) (ndarray/->vec pred)))) - results (map-indexed - (fn [i l] - (get-in preds [i (int l)])) - labels) - result (->> results - (mapv #(Math/max (float 1e-10) (float %))) - (mapv #(Math/log %)) - (mapv #(* -1.0 %)) - (apply +))] - (float (Math/exp (/ result (count labels)))))) - - "perplexity")] - - ;; Train for 1 epochs and then show the results of 75 - (doseq [epoch-num (range 1)] - (println "Doing epoch " epoch-num) - (mx-io/reduce-batches - train-iter - (fn [batch-num batch] - (let [batch (mx-io/next train-iter)] - (-> rnn-mod - (m/forward (mx-io/data-batch {:data (into (mx-io/batch-data batch) init-states-data) - :label (mx-io/batch-label batch)})) - (m/update-metric metric (mx-io/batch-label batch)) - (m/backward) - (m/update)) - (when (zero? (mod batch-num 10)) - (println "Eval metric for batch-num " batch-num " is " (eval-metric/get metric))) - (inc batch-num)))) - (println "Finished epoch " epoch-num) - #_(println "Eval-metric " (eval-metric/get-and-reset metric)) - (m/save-checkpoint rnn-mod {:prefix "train-obama" :epoch epoch-num}) - (println "Testing with random 200 chars ") - (println "=====") - (println (test-rnn/rnn-test "train-obama" epoch-num 200 true)) - (println "=====")) - - (println "Showing the result after 75 epochs (pre-trained)") - (println (test-rnn/rnn-test "data/obama" 75 200 true)) - (println "====="))) - -(defn -main [& args] - (let [[dev dev-num] args - devs (if (= dev ":gpu") - (mapv #(context/gpu %) (range (Integer/parseInt (or dev-num "1")))) - (mapv #(context/cpu %) (range (Integer/parseInt (or dev-num "1")))))] - (train devs))) diff --git a/contrib/clojure-package/examples/rnn/src/rnn/util.clj b/contrib/clojure-package/examples/rnn/src/rnn/util.clj deleted file mode 100644 index bce5bb710ada..000000000000 --- a/contrib/clojure-package/examples/rnn/src/rnn/util.clj +++ /dev/null @@ -1,74 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns rnn.util - (:require [org.apache.clojure-mxnet.ndarray :as ndarray])) - -(defn build-vocab [path] - (let [content (slurp path) - vocab-map (reduce (fn [{:keys [vocab idx] :as result} c] - (if (get vocab c) - result - (-> result - (update :vocab assoc c (inc idx)) - (update :idx inc)))) - {:vocab {} :idx 0} ;; 0 is used for padding - content)] - (:vocab vocab-map))) - -(defn make-revert-vocab [vmap] - (into {} (map (fn [[k v]] [v k]) vmap))) - -(defn make-input [char vocab arr] - (let [idx (get vocab char) - tmp (ndarray/zeros [1])] - (do - (ndarray/set tmp idx) - (ndarray/set arr tmp)))) - -(defn cdf [weights] - (let [total (* 1.0 (apply + weights)) - csums (reduce (fn [cumsum w] (conj cumsum (+ (or (last cumsum) 0) w))) [] weights)] - (mapv #(/ % total) csums))) - -(defn choice [population weights] - (assert (= (count population) (count weights))) - (let [cdf-vals (cdf weights) - x (rand) - idx (-> (partition-by (fn [v] (>= v x)) cdf-vals) - first - count)] - (get population idx))) - -;; we can use random output of fixed-output by choosing the largest probability -(defn make-output [prob fix-dict sample] - (let [temperature 1.0 - char (if sample - (let [scale-prob (mapv (fn [x] (if (< x 1e-6) - 1e-6 - (if (> x (- 1 1e-6)) - (- 1 1e-6) - x))) prob) - rescale (mapv (fn [x] (Math/exp (/ (Math/log x) temperature))) scale-prob) - sum (apply + rescale) - rescale (map (fn [x] (/ x sum)) rescale)] - (choice fix-dict rescale)) - (->> (zipmap prob fix-dict) - (sort-by max) - (vals) - last))] - char)) diff --git a/contrib/clojure-package/examples/rnn/test/rnn/core_test.clj b/contrib/clojure-package/examples/rnn/test/rnn/core_test.clj deleted file mode 100644 index b198577241c3..000000000000 --- a/contrib/clojure-package/examples/rnn/test/rnn/core_test.clj +++ /dev/null @@ -1,26 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns rnn.core_test - (:require - [rnn.test-char-rnn :as rnn] - [clojure.test :refer :all])) - -(deftest check-trained-network - (is (= - "The joke that we can start by the challenges of the American people. The American people have been talking about how to compete with the streets of San Antonio who the courage to come together as one " - (rnn/rnn-test "data/obama" 75 200 false)))) \ No newline at end of file diff --git a/contrib/clojure-package/examples/tutorial/src/tutorial/module.clj b/contrib/clojure-package/examples/tutorial/src/tutorial/module.clj deleted file mode 100644 index 4923cdcc0684..000000000000 --- a/contrib/clojure-package/examples/tutorial/src/tutorial/module.clj +++ /dev/null @@ -1,286 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns tutorial.module - "A REPL tutorial of the MXNet Clojure API for Module, based on - https://mxnet.apache.org/api/clojure/module.html" - (:require [clojure.java.io :as io] - [clojure.java.shell :refer [sh]] - [org.apache.clojure-mxnet.eval-metric :as eval-metric] - [org.apache.clojure-mxnet.io :as mx-io] - [org.apache.clojure-mxnet.module :as m] - [org.apache.clojure-mxnet.symbol :as sym] - [org.apache.clojure-mxnet.ndarray :as ndarray])) - - -;; The Module API provides an intermediate and high-level interface -;; for performing computation with neural networks in MXNet. Module -;; wraps a Symbol and one or more Executors. It has both a high level -;; and intermediate level API. - - -;;;; Prepare the Data - -;; In this example, we are going to use the MNIST data set. If you -;; start, we can run some helper scripts to download the data for us. - -(def data-dir "data/") - -(when-not (.exists (io/file (str data-dir "train-images-idx3-ubyte"))) - (sh "../../scripts/get_mnist_data.sh")) - -;; MXNet provides function in the `io` namespace to load the MNIST -;; datasets into training and test data iterators that we can use with -;; our module. -(def train-data (mx-io/mnist-iter {:image (str data-dir "train-images-idx3-ubyte") - :label (str data-dir "train-labels-idx1-ubyte") - :label-name "softmax_label" - :input-shape [784] - :batch-size 10 - :shuffle true - :flat true - :silent false - :seed 10})) - -(def test-data (mx-io/mnist-iter {:image (str data-dir "t10k-images-idx3-ubyte") - :label (str data-dir "t10k-labels-idx1-ubyte") - :input-shape [784] - :batch-size 10 - :flat true - :silent false})) - - -;;;; Preparing a module for Computation - -;; To construct a module, we need to have a symbol as input. This -;; symbol takes input data in the first layer and then has subsequent -;; layers of fully connected and relu activation layers, ending up in -;; a softmax layer for output. - -(let [data (sym/variable "data") - fc1 (sym/fully-connected "fc1" {:data data :num-hidden 128}) - act1 (sym/activation "relu1" {:data fc1 :act-type "relu"}) - fc2 (sym/fully-connected "fc2" {:data act1 :num-hidden 64}) - act2 (sym/activation "relu2" {:data fc2 :act-type "relu"}) - fc3 (sym/fully-connected "fc3" {:data act2 :num-hidden 10}) - out (sym/softmax-output "softmax" {:data fc3})] - out) ;=>#object[org.apache.mxnet.Symbol 0x1f43a406 "org.apache.mxnet.Symbol@1f43a406"] - -;; You can also write this with the `as->` threading macro. - - -(def out (as-> (sym/variable "data") data - (sym/fully-connected "fc1" {:data data :num-hidden 128}) - (sym/activation "relu1" {:data data :act-type "relu"}) - (sym/fully-connected "fc2" {:data data :num-hidden 64}) - (sym/activation "relu2" {:data data :act-type "relu"}) - (sym/fully-connected "fc3" {:data data :num-hidden 10}) - (sym/softmax-output "softmax" {:data data}))) -;=> #'tutorial.module/out - - -;; By default, context is the CPU. If you need data parallelization, -;; you can specify a GPU context or an array of GPU contexts, like -;; this: `(m/module out {:contexts [(context/gpu)]})` - -;; Before you can compute with a module, you need to call `bind` to -;; allocate the device memory and `initParams` or `set-params` to -;; initialize the parameters. If you simply want to fit a module, you -;; don’t need to call `bind` and `init-params` explicitly, because the -;; `fit` function automatically calls them if they are needed. - -(let [mod (m/module out)] - (-> mod - (m/bind {:data-shapes (mx-io/provide-data train-data) - :label-shapes (mx-io/provide-label train-data)}) - (m/init-params))) - -;; Now you can compute with the module using functions like `forward`, -;; `backward`, etc. - - -;;;; Training and Predicting - -;; Modules provide high-level APIs for training, predicting, and -;; evaluating. To fit a module, call the `fit` function with some data -;; iterators: - -(def mod - (m/fit (m/module out) {:train-data train-data - :eval-data test-data - :num-epoch 1})) -;; => -;; Epoch 0 Train- [accuracy 0.12521666] -;; Epoch 0 Time cost- 8392 -;; Epoch 0 Validation- [accuracy 0.2227] - - -;; You can pass in batch-end callbacks using batch-end-callback and -;; epoch-end callbacks using epoch-end-callback in the -;; `fit-params`. You can also set parameters using functions like in -;; the fit-params like optimizer and eval-metric. To learn more about -;; the fit-params, see the fit-param function options. To predict with -;; a module, call `predict` with a DataIter: - -(def results - (m/predict mod {:eval-data test-data})) - -(first results) ;=>#object[org.apache.mxnet.NDArray 0x3540b6d3 "org.apache.mxnet.NDArray@a48686ec"] - -(first (ndarray/->vec (first results))) ;=>0.08261358 - -;; The module collects and returns all of the prediction results. For -;; more details about the format of the return values, see the -;; documentation for the `predict` function. - -;; When prediction results might be too large to fit in memory, use -;; the `predict-every-batch` API. - -(let [preds (m/predict-every-batch mod {:eval-data test-data})] - (mx-io/reduce-batches test-data - (fn [i batch] - (println (str "pred is " (first (get preds i)))) - (println (str "label is " (mx-io/batch-label batch))) - ;;; do something - (inc i)))) - -;; If you need to evaluate on a test set and don’t need the prediction -;; output, call the `score` function with a data iterator and an eval -;; metric: - -(m/score mod {:eval-data test-data - :eval-metric (eval-metric/accuracy)}) ;=>["accuracy" 0.2227] - -;; This runs predictions on each batch in the provided DataIter and -;; computes the evaluation score using the provided EvalMetric. The -;; evaluation results are stored in metric so that you can query -;; later. - - - -;;;; Saving and Loading - -;; To save the module parameters in each training epoch, use the -;; `save-checkpoint` function: - -(let [save-prefix "my-model"] - (doseq [epoch-num (range 3)] - (mx-io/do-batches train-data (fn [batch - ;; do something - ])) - (m/save-checkpoint mod {:prefix save-prefix - :epoch epoch-num - :save-opt-states true}))) - -;; INFO org.apache.mxnet.module.Module: Saved checkpoint to my-model-0000.params -;; INFO org.apache.mxnet.module.Module: Saved optimizer state to my-model-0000.states -;; INFO org.apache.mxnet.module.Module: Saved checkpoint to my-model-0001.params -;; INFO org.apache.mxnet.module.Module: Saved optimizer state to my-model-0001.states -;; INFO org.apache.mxnet.module.Module: Saved checkpoint to my-model-0002.params -;; INFO org.apache.mxnet.module.Module: Saved optimizer state to my-model-0002.states - - -;; To load the saved module parameters, call the `load-checkpoint` -;; function: - -(def new-mod (m/load-checkpoint {:prefix "my-model" :epoch 1 :load-optimizer-states true})) - -new-mod ;=> #object[org.apache.mxnet.module.Module 0x5304d0f4 "org.apache.mxnet.module.Module@5304d0f4"] - -;; To initialize parameters, bind the symbols to construct executors -;; first with the `bind` function. Then, initialize the parameters and -;; auxiliary states by calling the `init-params` function.\ -(-> new-mod - (m/bind {:data-shapes (mx-io/provide-data train-data) - :label-shapes (mx-io/provide-label train-data)}) - (m/init-params)) - -;; To get current parameters, use `params` -(let [[arg-params aux-params] (m/params new-mod)] - {:arg-params arg-params - :aux-params aux-params}) - -;; {:arg-params -;; {"fc3_bias" -;; #object[org.apache.mxnet.NDArray 0x39adc3b0 "org.apache.mxnet.NDArray@49caf426"], -;; "fc2_weight" -;; #object[org.apache.mxnet.NDArray 0x25baf623 "org.apache.mxnet.NDArray@a6c8f9ac"], -;; "fc1_bias" -;; #object[org.apache.mxnet.NDArray 0x6e089973 "org.apache.mxnet.NDArray@9f91d6eb"], -;; "fc3_weight" -;; #object[org.apache.mxnet.NDArray 0x756fd109 "org.apache.mxnet.NDArray@2dd0fe3c"], -;; "fc2_bias" -;; #object[org.apache.mxnet.NDArray 0x1dc69c8b "org.apache.mxnet.NDArray@d128f73d"], -;; "fc1_weight" -;; #object[org.apache.mxnet.NDArray 0x20abc769 "org.apache.mxnet.NDArray@b8e1c5e8"]}, -;; :aux-params {}} - - -;; To assign parameter and aux state values, use the `set-params` -;; function: -(m/set-params new-mod {:arg-params (m/arg-params new-mod) - :aux-params (m/aux-params new-mod)}) - - -;; To resume training from a saved checkpoint, pass the loaded -;; parameters to the `fit` function. This will prevent `fit` from -;; initializing randomly. - -;; (First, reset the training data before calling `fit` or you will -;; get an error) -(mx-io/reset train-data) -(mx-io/reset test-data) - -;; Create `fit-params` and then use it to set `begin-epoch` so that -;; `fit` knows to resume from a saved epoch. - - -(comment -;; FIXME -; Caused by: java.io.EOFException -; at java.io.DataInputStream.readInt(DataInputStream.java:392) -; at java.io.ObjectInputStream$BlockDataInputStream.readInt(ObjectInputStream.java:3182) -; at java.io.ObjectInputStream.readInt(ObjectInputStream.java:1032) -; at org.apache.mxnet.Optimizer$$anon$1$$anonfun$deserializeState$1.apply$mcVI$sp(Optimizer.scala:84) -; at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160) -; at org.apache.mxnet.Optimizer$$anon$1.deserializeState(Optimizer.scala:83) -; at org.apache.mxnet.module.Module$$anonfun$loadOptimizerStates$3.apply(Module.scala:594) -; at org.apache.mxnet.module.Module$$anonfun$loadOptimizerStates$3.apply(Module.scala:589) -; at scala.Option.foreach(Option.scala:257) -; at org.apache.mxnet.module.Module.loadOptimizerStates(Module.scala:589) -; at org.apache.mxnet.module.Module$$anonfun$initOptimizer$4.apply(Module.scala:407) -; at org.apache.mxnet.module.Module$$anonfun$initOptimizer$4.apply(Module.scala:406) -; at scala.Option.foreach(Option.scala:257) -; at org.apache.mxnet.module.Module.initOptimizer(Module.scala:406) -; at org.apache.mxnet.module.BaseModule.fit(BaseModule.scala:407) -; at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) -; at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) -; at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) -; at java.lang.reflect.Method.invoke(Method.java:498) -; at clojure.lang.Reflector.invokeMatchingMethod(Reflector.java:93) -; at clojure.lang.Reflector.invokeInstanceMethod(Reflector.java:28) -; at org.apache.clojure_mxnet.module$fit.invokeStatic(module.clj:551) -; at org.apache.clojure_mxnet.module$fit.invoke(module.clj:538) -; at tutorial.module$eval1787.invokeStatic(module.clj:250) -; at tutorial.module$eval1787.invoke(module.clj:250) - -(m/fit new-mod {:train-data train-data - :eval-data test-data - :num-epoch 2 - :fit-params (m/fit-params {:begin-epoch 1})}) - -) \ No newline at end of file diff --git a/contrib/clojure-package/examples/tutorial/src/tutorial/symbol.clj b/contrib/clojure-package/examples/tutorial/src/tutorial/symbol.clj index 73933b4395c5..0dc45dc095ef 100644 --- a/contrib/clojure-package/examples/tutorial/src/tutorial/symbol.clj +++ b/contrib/clojure-package/examples/tutorial/src/tutorial/symbol.clj @@ -30,23 +30,6 @@ ;; graphs. You can configure the graphs either at the level of neural ;; network layer operations or as fine-grained operations. -;; The following example configures a two-layer neural network. -(def data (sym/variable "data")) -(def fc1 (sym/fully-connected "fc1" {:data data :num-hidden 128})) -(def act1 (sym/activation "act1" {:data fc1 :act-type "relu"})) -(def fc2 (sym/fully-connected "fc2" {:data act1 :num-hidden 64})) -(def net (sym/softmax-output "out" {:data fc2})) - -;; This can also be combined more dynamically with the `as->` Clojure -;; threading form. -(as-> (sym/variable "data") data - (sym/fully-connected "fc1" {:data data :num-hidden 128}) - (sym/activation "act1" {:data data :act-type "relu"}) - (sym/fully-connected "fc2" {:data data :num-hidden 64}) - (sym/softmax-output "out" {:data data})) - -net ;=> #object[org.apache.mxnet.Symbol 0x5c78c8c2 "org.apache.mxnet.Symbol@5c78c8c2"] - ;; The basic arithmetic operators (plus, minus, div, multiplication) ;; work as expected. The following example creates a computation graph ;; that adds two inputs together. @@ -74,17 +57,7 @@ net ;=> #object[org.apache.mxnet.Symbol 0x5c78c8c2 "org.apache.mxnet.Symbol@5c78 ;;;; Group Multiple Symbols ;; To construct neural networks with multiple loss layers, we can use -;; `group` to group multiple symbols together. The following example -;; groups two outputs: - -(def net (sym/variable "data")) -(def fc1 (sym/fully-connected {:data net :num-hidden 128})) -(def net2 (sym/activation {:data fc1 :act-type "relu"})) -(def out1 (sym/softmax-output {:data net2})) -(def out2 (sym/linear-regression-output {:data net2})) -(def group (sym/group [out1 out2])) -(sym/list-outputs group) ;=> ["softmaxoutput0_output" "linearregressionoutput0_output"] - +;; `group` to group multiple symbols together. ;;;; Serialization diff --git a/contrib/clojure-package/examples/tutorial/test/tutorial/core_test.clj b/contrib/clojure-package/examples/tutorial/test/tutorial/core_test.clj deleted file mode 100644 index 0e5169c5cfaa..000000000000 --- a/contrib/clojure-package/examples/tutorial/test/tutorial/core_test.clj +++ /dev/null @@ -1,27 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns tutorial.core_test - (:require [clojure.test :refer :all]) - (:require - [tutorial.introduction] - [tutorial.kvstore] - [tutorial.module] - [tutorial.ndarray] - [tutorial.symbol])) - -(deftest if-this-goes-here-then-tutorials-have-loaded-properly (is true)) \ No newline at end of file diff --git a/contrib/clojure-package/examples/visualization/.gitignore b/contrib/clojure-package/examples/visualization/.gitignore deleted file mode 100644 index c53038ec0e3d..000000000000 --- a/contrib/clojure-package/examples/visualization/.gitignore +++ /dev/null @@ -1,11 +0,0 @@ -/target -/classes -/checkouts -pom.xml -pom.xml.asc -*.jar -*.class -/.lein-* -/.nrepl-port -.hgignore -.hg/ diff --git a/contrib/clojure-package/examples/visualization/README.md b/contrib/clojure-package/examples/visualization/README.md deleted file mode 100644 index c9eb75f6d496..000000000000 --- a/contrib/clojure-package/examples/visualization/README.md +++ /dev/null @@ -1,31 +0,0 @@ - - - - - - - - - - - - - - - - - -# visualization - - - -## Installation - -Before you run this example, make sure that you have the clojure package installed. -In the main clojure package directory, do `lein install`. Then you can run -`lein install` in this directory. - -## Usage - -Run `lein run` to have a sample network visualization printed for you -"testviz.pdf" diff --git a/contrib/clojure-package/examples/visualization/project.clj b/contrib/clojure-package/examples/visualization/project.clj deleted file mode 100644 index c19e19d6fd02..000000000000 --- a/contrib/clojure-package/examples/visualization/project.clj +++ /dev/null @@ -1,23 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(defproject visualization "0.1.0-SNAPSHOT" - :description "Visualization example" - :plugins [[lein-cljfmt "0.5.7"]] - :dependencies [[org.clojure/clojure "1.9.0"] - [org.apache.mxnet.contrib.clojure/clojure-mxnet "2.0.0-SNAPSHOT"]] - :main visualization.core) diff --git a/contrib/clojure-package/examples/visualization/src/visualization/core.clj b/contrib/clojure-package/examples/visualization/src/visualization/core.clj deleted file mode 100644 index 31cce9206d81..000000000000 --- a/contrib/clojure-package/examples/visualization/src/visualization/core.clj +++ /dev/null @@ -1,46 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns visualization.core - (:require [org.apache.clojure-mxnet.symbol :as sym] - [org.apache.clojure-mxnet.visualization :as viz])) - -(defn get-symbol [] - (as-> (sym/variable "data") data - - #_(sym/convolution "conv1" {:data data :kernel [3 3] :num-filter 32 :stride [2 2]}) - #_(sym/batch-norm "bn1" {:data data}) - #_(sym/activation "relu1" {:data data :act-type "relu"}) - #_(sym/pooling "mp1" {:data data :kernel [2 2] :pool-type "max" :stride [2 2]}) #_(sym/convolution "conv2" {:data data :kernel [3 3] :num-filter 32 :stride [2 2]}) - #_(sym/batch-norm "bn2" {:data data}) - #_(sym/activation "relu2" {:data data :act-type "relu"}) - #_(sym/pooling "mp2" {:data data :kernel [2 2] :pool-type "max" :stride [2 2]}) - - (sym/flatten "fl" {:data data}) - #_(sym/fully-connected "fc2" {:data data :num-hidden 10}) - (sym/softmax-output "softmax" {:data data}))) - -(defn test-viz [] - (let [dot (viz/plot-network (get-symbol) - {"data" [1 1 28 28]} - {:title "foo" :node-attrs {:shape "oval" :fixedsize "false"}})] - (viz/render dot "testviz" "./"))) - -(defn -main [& args] - (do (test-viz) - (println "Check for the testviz.pdf file in the project directory"))) - diff --git a/contrib/clojure-package/examples/visualization/test/visualization/core_test.clj b/contrib/clojure-package/examples/visualization/test/visualization/core_test.clj deleted file mode 100644 index 1b10695cb34c..000000000000 --- a/contrib/clojure-package/examples/visualization/test/visualization/core_test.clj +++ /dev/null @@ -1,28 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns visualization.core_test - (:require - [visualization.core :as visualization] - [clojure.test :refer :all])) - -(deftest check-pdf - (visualization/test-viz) - (let [new-pdf (clojure.java.io/as-file "testviz.pdf")] - (is (.exists new-pdf)) - (is (> 10000 (- (System/currentTimeMillis) (.lastModified new-pdf)))))) - \ No newline at end of file diff --git a/contrib/clojure-package/test/org/apache/clojure_mxnet/conv_test.clj b/contrib/clojure-package/test/org/apache/clojure_mxnet/conv_test.clj deleted file mode 100644 index ca9d4bc93986..000000000000 --- a/contrib/clojure-package/test/org/apache/clojure_mxnet/conv_test.clj +++ /dev/null @@ -1,92 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns org.apache.clojure-mxnet.conv-test - (:require [clojure.java.io :as io] - [clojure.java.shell :refer [sh]] - [clojure.test :refer :all] - [org.apache.clojure-mxnet.eval-metric :as eval-metric] - [org.apache.clojure-mxnet.io :as mx-io] - [org.apache.clojure-mxnet.module :as m] - [org.apache.clojure-mxnet.optimizer :as optimizer] - [org.apache.clojure-mxnet.symbol :as sym] - [org.apache.clojure-mxnet.symbol-api :as sym-api] - [org.apache.clojure-mxnet.util :as util] - [clojure.reflect :as r])) - -(def data-dir "data/") -(def batch-size 100) -(def num-epoch 1) - -(when-not (.exists (io/file (str data-dir "train-images-idx3-ubyte"))) - (sh "./scripts/get_mnist_data.sh")) - -;;; Load the MNIST datasets -(def train-data (mx-io/mnist-iter {:image (str data-dir "train-images-idx3-ubyte") - :label (str data-dir "train-labels-idx1-ubyte") - :label-name "softmax_label" - :data-shape [1 28 28] - :label-shape [1 1 10] - :batch-size batch-size - :shuffle true - :flat false - :silent false - :seed 10})) - -(def test-data (mx-io/mnist-iter {:image (str data-dir "t10k-images-idx3-ubyte") - :label (str data-dir "t10k-labels-idx1-ubyte") - :data-shape [1 28 28] - :batch-size batch-size - :flat false - :silent false})) -(defn get-symbol [] - (as-> (sym/variable "data") data - - (sym-api/convolution {:name "conv1" :data data :kernel [3 3] :num-filter 32 :stride [2 2]}) - (sym-api/batch-norm {:name "bn1" :data data}) - (sym-api/activation {:name "relu1" :data data :act-type "relu"}) - (sym-api/pooling {:name "mp1" :data data :kernel [2 2] :pool-type "max" :stride [2 2]}) - - (sym-api/convolution {:name "conv2" :data data :kernel [3 3] :num-filter 32 :stride [2 2]}) - (sym-api/batch-norm {:name "bn2" :data data}) - (sym-api/activation {:name "relu2" :data data :act-type "relu"}) - (sym-api/pooling {:name "mp2" :data data :kernel [2 2] :pool-type "max" :stride [2 2]}) - - (sym-api/flatten {:name "fl" :data data}) - (sym-api/fully-connected {:name "fc2" :data data :num-hidden 10}) - (sym-api/softmax-output {:name "softmax" :data data}))) - -(deftest test-conv [] - (let [mod (m/module (get-symbol))] - ;;; note only one function for training - (m/fit mod {:train-data train-data :eval-data test-data :num-epoch num-epoch - :fit-params (m/fit-params {:optimizer (optimizer/sgd {:learning-rate 0.1 - :momentum 0.9 - :wd 0.0001})})}) - - ;;high level predict (just a dummy call but it returns a vector of results - (m/predict mod {:eval-data test-data}) - - ;;;high level score (returs the eval values) - (let [score (m/score mod {:eval-data test-data :eval-metric (eval-metric/accuracy)})] - (println "Score" score) - (is (< 0.92 (last score)))))) - -(comment - - (require '[clojure.reflect :as r]) - (r/reflect train-data)) diff --git a/contrib/clojure-package/test/org/apache/clojure_mxnet/infer/imageclassifier_test.clj b/contrib/clojure-package/test/org/apache/clojure_mxnet/infer/imageclassifier_test.clj deleted file mode 100644 index 5890f754033f..000000000000 --- a/contrib/clojure-package/test/org/apache/clojure_mxnet/infer/imageclassifier_test.clj +++ /dev/null @@ -1,131 +0,0 @@ -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns org.apache.clojure-mxnet.infer.imageclassifier-test - (:require [org.apache.clojure-mxnet.context :as context] - [org.apache.clojure-mxnet.dtype :as dtype] - [org.apache.clojure-mxnet.infer :as infer] - [org.apache.clojure-mxnet.layout :as layout] - [org.apache.clojure-mxnet.ndarray :as ndarray] - [clojure.java.io :as io] - [clojure.java.shell :refer [sh]] - [clojure.test :refer :all] - [test-helper])) - -(test-helper/load-test-images) - -(def model-dir "data/") -(def model-path-prefix (str model-dir "resnet-18/resnet-18")) - -(when-not (.exists (io/file (str model-path-prefix "-symbol.json"))) - (sh "./scripts/infer/get_resnet_18_data.sh")) - -(defn create-classifier [] - (let [descriptors [{:name "data" - :shape [1 3 224 224] - :layout layout/NCHW - :dtype dtype/FLOAT32}] - factory (infer/model-factory model-path-prefix descriptors)] - (infer/create-image-classifier factory))) - -(deftest test-single-classification - (let [classifier (create-classifier) - image (infer/load-image-from-file "test/test-images/kitten.jpg") - [predictions-all] (infer/classify-image classifier image) - [predictions-with-default-dtype] (infer/classify-image classifier image 10) - [predictions] (infer/classify-image classifier image 5 dtype/FLOAT32)] - (is (= 1000 (count predictions-all))) - (is (= 10 (count predictions-with-default-dtype))) - (is (= 5 (count predictions))) - (is (= "n02123159 tiger cat" (:class (first predictions)))) - (is (< 0 (:prob (first predictions)) 1)))) - -(deftest test-batch-classification - (let [classifier (create-classifier) - image-batch (infer/load-image-paths ["test/test-images/kitten.jpg" - "test/test-images/Pug-Cookie.jpg"]) - [batch-predictions-all] (infer/classify-image-batch classifier image-batch) - [batch-predictions-with-default-dtype] (infer/classify-image-batch classifier image-batch 10) - [predictions] (infer/classify-image-batch classifier image-batch 5 dtype/FLOAT32)] - (is (= 1000 (count batch-predictions-all))) - (is (= 10 (count batch-predictions-with-default-dtype))) - (is (= 5 (count predictions))) - (is (= "n02123159 tiger cat" (:class (first predictions)))) - (is (< 0 (:prob (first predictions)) 1)))) - -(deftest test-single-classification-with-ndarray - (let [classifier (create-classifier) - image (-> (infer/load-image-from-file "test/test-images/kitten.jpg") - (infer/reshape-image 224 224) - (infer/buffered-image-to-pixels [3 224 224] dtype/FLOAT32) - (ndarray/expand-dims 0)) - [predictions-all] (infer/classify-with-ndarray classifier [image]) - [predictions] (infer/classify-with-ndarray classifier [image] 5)] - (is (= 1000 (count predictions-all))) - (is (= 5 (count predictions))) - (is (= "n02123159 tiger cat" (:class (first predictions)))) - (is (< 0 (:prob (first predictions)) 1)))) - -(deftest test-single-classify - (let [classifier (create-classifier) - image (-> (infer/load-image-from-file "test/test-images/kitten.jpg") - (infer/reshape-image 224 224) - (infer/buffered-image-to-pixels [3 224 224] dtype/FLOAT32) - (ndarray/expand-dims 0)) - predictions-all (infer/classify classifier [(ndarray/->vec image)]) - predictions (infer/classify classifier [(ndarray/->vec image)] 5)] - (is (= 1000 (count predictions-all))) - (is (= 5 (count predictions))) - (is (= "n02123159 tiger cat" (:class (first predictions)))) - (is (< 0 (:prob (first predictions)) 1)))) - -(deftest test-base-classification-with-ndarray - (let [descriptors [{:name "data" - :shape [1 3 224 224] - :layout layout/NCHW - :dtype dtype/FLOAT32}] - factory (infer/model-factory model-path-prefix descriptors) - classifier (infer/create-classifier factory) - image (-> (infer/load-image-from-file "test/test-images/kitten.jpg") - (infer/reshape-image 224 224) - (infer/buffered-image-to-pixels [3 224 224] dtype/FLOAT32) - (ndarray/expand-dims 0)) - [predictions-all] (infer/classify-with-ndarray classifier [image]) - [predictions] (infer/classify-with-ndarray classifier [image] 5)] - (is (= 1000 (count predictions-all))) - (is (= 5 (count predictions))) - (is (= "n02123159 tiger cat" (:class (first predictions)))) - (is (< 0 (:prob (first predictions)) 1)))) - -(deftest test-base-single-classify - (let [descriptors [{:name "data" - :shape [1 3 224 224] - :layout layout/NCHW - :dtype dtype/FLOAT32}] - factory (infer/model-factory model-path-prefix descriptors) - classifier (infer/create-classifier factory) - image (-> (infer/load-image-from-file "test/test-images/kitten.jpg") - (infer/reshape-image 224 224) - (infer/buffered-image-to-pixels [3 224 224] dtype/FLOAT32) - (ndarray/expand-dims 0)) - predictions-all (infer/classify classifier [(ndarray/->vec image)]) - predictions (infer/classify classifier [(ndarray/->vec image)] 5)] - (is (= 1000 (count predictions-all))) - (is (= 5 (count predictions))) - (is (= "n02123159 tiger cat" (:class (first predictions)))) - (is (< 0 (:prob (first predictions)) 1)))) - - diff --git a/contrib/clojure-package/test/org/apache/clojure_mxnet/infer/predictor_test.clj b/contrib/clojure-package/test/org/apache/clojure_mxnet/infer/predictor_test.clj deleted file mode 100644 index e1526be61fbf..000000000000 --- a/contrib/clojure-package/test/org/apache/clojure_mxnet/infer/predictor_test.clj +++ /dev/null @@ -1,77 +0,0 @@ -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns org.apache.clojure-mxnet.infer.predictor-test - (:require [org.apache.clojure-mxnet.context :as context] - [org.apache.clojure-mxnet.dtype :as dtype] - [org.apache.clojure-mxnet.infer :as infer] - [org.apache.clojure-mxnet.layout :as layout] - [org.apache.clojure-mxnet.ndarray :as ndarray] - [org.apache.clojure-mxnet.shape :as shape] - [clojure.java.io :as io] - [clojure.java.shell :refer [sh]] - [clojure.string :refer [split]] - [clojure.test :refer :all] - [org.apache.clojure-mxnet.util :as util])) - -(def model-dir "data/") -(def model-path-prefix (str model-dir "resnet-18/resnet-18")) -(def width 224) -(def height 224) - -(when-not (.exists (io/file (str model-path-prefix "-symbol.json"))) - (sh "./scripts/infer/get_resnet_18_data.sh")) - -(defn create-predictor [] - (let [descriptors [{:name "data" - :shape [1 3 height width] - :layout layout/NCHW - :dtype dtype/FLOAT32}] - factory (infer/model-factory model-path-prefix descriptors)] - (infer/create-predictor factory))) - -(deftest predictor-test-with-ndarray - (let [predictor (create-predictor) - image-ndarray (-> "test/test-images/kitten.jpg" - infer/load-image-from-file - (infer/reshape-image width height) - (infer/buffered-image-to-pixels [3 width height]) - (ndarray/expand-dims 0)) - predictions (infer/predict-with-ndarray predictor [image-ndarray]) - synset-file (-> (io/file model-path-prefix) - (.getParent) - (io/file "synset.txt")) - synset-names (split (slurp synset-file) #"\n") - [best-index] (ndarray/->int-vec (ndarray/argmax (first predictions) 1)) - best-prediction (synset-names best-index)] - (is (= "n02123159 tiger cat" best-prediction)))) - -(deftest predictor-test - (let [predictor (create-predictor) - image-ndarray (-> "test/test-images/kitten.jpg" - infer/load-image-from-file - (infer/reshape-image width height) - (infer/buffered-image-to-pixels [3 width height]) - (ndarray/expand-dims 0)) - predictions (infer/predict predictor [(ndarray/->vec image-ndarray)]) - synset-file (-> (io/file model-path-prefix) - (.getParent) - (io/file "synset.txt")) - synset-names (split (slurp synset-file) #"\n") - ndarray-preds (ndarray/array (first predictions) [1 1000]) - [best-index] (ndarray/->int-vec (ndarray/argmax ndarray-preds 1)) - best-prediction (synset-names best-index)] - (is (= "n02123159 tiger cat" best-prediction)))) diff --git a/contrib/clojure-package/test/org/apache/clojure_mxnet/module_test.clj b/contrib/clojure-package/test/org/apache/clojure_mxnet/module_test.clj deleted file mode 100644 index e03c43848332..000000000000 --- a/contrib/clojure-package/test/org/apache/clojure_mxnet/module_test.clj +++ /dev/null @@ -1,355 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns org.apache.clojure-mxnet.module-test - (:require [clojure.java.io :as io] - [org.apache.clojure-mxnet.context :as context] - [org.apache.clojure-mxnet.dtype :as dtype] - [org.apache.clojure-mxnet.io :as mx-io] - [org.apache.clojure-mxnet.layout :as layout] - [org.apache.clojure-mxnet.module :as m] - [org.apache.clojure-mxnet.monitor :as monitor] - [org.apache.clojure-mxnet.ndarray :as ndarray] - [org.apache.clojure-mxnet.optimizer :as optimizer] - [org.apache.clojure-mxnet.shape :as mx-shape] - [org.apache.clojure-mxnet.symbol :as sym] - [org.apache.clojure-mxnet.util :as util] - [clojure.spec.alpha :as s] - [clojure.test :refer :all] - [clojure.reflect :as r] - [clojure.string :as string])) - -(deftest test-model-dtype - (let [dtype dtype/FLOAT32 - dshape [3 8 7] - s (sym/variable "data") - s (sym/activation "act" {"__layout__" "TNC"} {:data s :act_type "relu"}) - - mod (m/module s ["data"] nil [(context/cpu 0) (context/cpu 1)])] - (-> mod - (m/bind {:data-shapes [{:name "data" :shape dshape :dtype dtype :layout "TNC"}]}) - (m/init-params) - (m/forward {:data [(ndarray/ones dshape {:dtype dtype})]}) - (m/backward [(ndarray/ones dshape {:dtype dtype})])) - (let [outputs (-> mod (m/outputs) flatten)] - (is (every? #(= dtype/FLOAT32 (ndarray/dtype %)) outputs))))) - -(deftest test-module-input-grads - (let [a (sym/variable "a" {:kwargs {"__layout__" "NC"}}) - b (sym/variable "b" {:kwargs {"__layout__" "NC"}}) - c (sym/variable "c" {:kwargs {"__layout__" "NC"}}) - c (sym/+ a (sym/+ (sym/* b 2) (sym/* c 3))) - mod (m/module c ["b" "c" "a"] nil [(context/cpu 0) (context/cpu 1)])] - (-> mod - (m/bind {:data-shapes [{:name "b" :shape [5 5] :layout layout/NT} - {:name "c" :shape [5 5] :layout layout/NT} - {:name "a" :shape [5 5] :layout layout/NT}] - :inputs-need-grad true}) - (m/init-params) - (m/forward {:data [(ndarray/ones [5 5]) - (ndarray/ones [5 5]) - (ndarray/ones [5 5])] - :label nil - :index nil - :pad 0}) - (m/backward [(ndarray/ones [5 5])])) - (let [[a-grad b-grad c-grad] (m/input-grads-merged mod)] - (is (every? #(= 1.0 %) (ndarray/->vec a-grad))) - (is (every? #(= 2.0 %) (ndarray/->vec b-grad))) - (is (every? #(= 3.0 %) (ndarray/->vec c-grad)))))) - -(deftest test-module-layout - (let [s (sym/variable "data") - s (sym/activation "act " {"__layout__" "TNC"} {:data s :act_type "relu"}) - dshape [3 8 7] - mod (m/module s ["data"] nil [(context/cpu 0) (context/cpu 1)])] - (-> mod - (m/bind {:data-shapes [{:name "data" :shape dshape :dtype dtype/FLOAT32 :layout "TNC"}]}) - (m/init-params) - (m/forward {:data [(ndarray/ones dshape)] - :label nil - :index nil - :pad 0}) - (m/backward [(ndarray/ones dshape)])) - (let [outputs-merged (m/outputs-merged mod) - outputs (m/outputs mod) - hd-shape [3 4 7]] - (is (= dshape (-> outputs-merged first (ndarray/shape) (ndarray/->vec)))) - (is (every? #(= hd-shape (-> % ndarray/shape ndarray/->vec)) (flatten outputs)))))) - -(deftest test-module-save-load-single-device - (let [s (sym/variable "data") - s (sym/fully-connected {:data s :num-hidden 100}) - ;; single device - mod (m/module s {:data-names ["data"] :label-names nil})] - (-> mod - (m/bind {:data-shapes [{:name "data" :shape [10 10] :layout "NT"}]}) - (m/init-params) - (m/init-optimizer {:optimizer (optimizer/sgd {:learning-rate 0.1 :momentum 0.9})}) - (m/update) - (m/save-checkpoint {:prefix "test" :epoch 0 :save-opt-states true})) - (let [mod2 (m/load-checkpoint {:prefix "test" :epoch 0 :load-optimizer-states true})] - (-> mod2 - (m/bind {:data-shapes [{:name "data" :shape [10 10] :layout "NT"}]}) - (m/init-optimizer {:optimizer (optimizer/sgd {:learning-rate 0.1 :momentum 0.9})})) - (is (= (-> mod m/symbol sym/to-json) (-> mod2 m/symbol sym/to-json))) - (is (= (-> mod m/params first) (-> mod2 m/params first)))) - ;; arity 2 version of above. `load-optimizer-states` is `false` here by default, - ;; but optimizers states aren't checked here so it's not relevant to the test outcome. - (let [mod3 (m/load-checkpoint "test" 0)] - (-> mod3 - (m/bind {:data-shapes [{:name "data" :shape [10 10] :layout "NT"}]}) - (m/init-optimizer {:optimizer (optimizer/sgd {:learning-rate 0.1 :momentum 0.9})})) - (is (= (-> mod m/symbol sym/to-json) (-> mod3 m/symbol sym/to-json))) - (is (= (-> mod m/params first) (-> mod3 m/params first)))))) - -(deftest test-module-save-load-multi-device - (let [s (sym/variable "data") - s (sym/fully-connected {:data s :num-hidden 100}) - ;; multi device - mod (m/module s {:data-names ["data"] :label-names nil - :contexts [(context/cpu 0) (context/cpu 1)]})] - (-> mod - (m/bind {:data-shapes [{:name "data" :shape [10 10] :layout "NT"}]}) - (m/init-params) - (m/init-optimizer {:optimizer (optimizer/sgd {:learning-rate 0.1 :momentum 0.9})}) - (m/update) - (m/save-checkpoint {:prefix "test" :epoch 0 :save-opt-states true})) - - (let [mod2 (m/load-checkpoint {:prefix "test" :epoch 0 :load-optimizer-states true})] - (-> mod2 - (m/bind {:data-shapes [{:name "data" :shape [10 10] :layout "NT"}]}) - (m/init-optimizer {:optimizer (optimizer/sgd {:learning-rate 0.1 :momentum 0.9})})) - (is (= (-> mod m/symbol sym/to-json) (-> mod2 m/symbol sym/to-json))) - (is (= (-> mod m/params first) (-> mod2 m/params first)))))) - -(deftest test-module-reshape - (let [s (sym/variable "data") - s (sym/fully-connected "fc" {:data s :num-hidden 20}) - dshape [7 20] - mod (m/module s ["data"] nil [(context/cpu 0) (context/cpu 1)])] - (-> mod - (m/bind {:data-shapes [{:name "data" :shape dshape :layout "NT"}]}) - (m/init-params) - (m/init-optimizer {:optimizer (optimizer/sgd {:learning-rate 1.0})}) - (m/forward {:data [(ndarray/ones dshape)] :label nil :index nil :pad 0}) - (m/backward [(ndarray/ones dshape)]) - (m/update)) - (is (= dshape (-> (m/outputs-merged mod) first ndarray/shape mx-shape/->vec))) - (is (every? #(= -1.0 %) (-> (m/params mod) (first) (get "fc_bias") (ndarray/->vec)))) - - (let [dshape [14 20]] - (-> mod - (m/reshape [{:name "data" :shape dshape :layout "NT"}]) - (m/forward {:data [(ndarray/ones dshape)] :label nil :index nil :pad 0}) - (m/backward [(ndarray/ones dshape)]) - (m/update)) - (is (= dshape (-> (m/outputs-merged mod) first ndarray/shape mx-shape/->vec))) - (is (every? #(< 1e-3 (- 3 %)) (-> mod m/params first (get "fc_bias") (ndarray/->vec))))))) - -(deftest test-set-params - (let [data (ndarray/array [0.05 0.1] [1 1 1 2]) - label (ndarray/array [0.01 0.99] [1 1 1 2]) - train-data (mx-io/ndarray-iter [data] {:label [label] :label-name "softmax_label"}) - x (as-> (sym/variable "data") v - (sym/fully-connected "fc_0" {:data v :num-hidden 2}) - (sym/activation "act_0" {:data v :act-type "sigmoid"}) - (sym/fully-connected "fc_1" {:data v :num-hidden 2}) - (sym/activation "act_1" {:data v :act-type "sigmoid"}) - (sym/linear-regression-output "softmax" {:data v :grad-scale 2})) - - mod (m/module x)] - (m/bind mod {:data-shapes (mx-io/provide-data-desc train-data) :label-shapes (mx-io/provide-label train-data)}) - - (let [arg-params-correct {"fc_0_weight" (ndarray/array [0.15 0.2 0.25 0.3] [2 2]) - "fc_0_bias" (ndarray/array [0.35 0.35] [2]) - "fc_1_weight" (ndarray/array [0.4 0.45 05 0.55] [2 2]) - "fc_1_bias" (ndarray/array [0.6 0.6] [2])} - arg-params-missing {"fc_0_weight" (ndarray/array [0.15 0.2 0.25 0.3] [2 2]) - "fc_0_bias" (ndarray/array [0.35 0.35] [2]) - "fc_1_weight" (ndarray/array [0.4 0.45 05 0.55] [2 2])} - arg-params-extra {"fc_0_weight" (ndarray/array [0.15 0.2 0.25 0.3] [2 2]) - "fc_0_bias" (ndarray/array [0.35 0.35] [2]) - "fc_1_weight" (ndarray/array [0.4 0.45 05 0.55] [2 2]) - "fc_1_bias" (ndarray/array [0.6 0.6] [2]) - "fc_2_weight" (ndarray/array [0.6 0.6] [2])}] - (m/set-params mod {:arg-params arg-params-correct :force-init true}) - (m/set-params mod {:arg-params arg-params-missing :allow-missing true}) - (m/set-params mod {:arg-params arg-params-extra :allow-extra true})))) - -(deftest test-monitor - (let [data (ndarray/array [0.05 0.1] [1 1 1 2]) - label (ndarray/array [0.01 0.99] [1 1 1 2]) - train-data (mx-io/ndarray-iter [data] {:label [label] :label-name "softmax_label"}) - x (as-> (sym/variable "data") v - (sym/fully-connected "fc_0" {:data v :num-hidden 2}) - (sym/activation "act_0" {:data v :act-type "sigmoid"}) - (sym/fully-connected "fc_1" {:data v :num-hidden 2}) - (sym/activation "act_1" {:data v :act-type "sigmoid"}) - (sym/linear-regression-output "softmax" {:data v :grad-scale 2})) - ;; create monitor - mon (monitor/monitor 1 (fn [x] - (ndarray/div (ndarray/sum (ndarray/abs x)) - (mx-shape/product (ndarray/shape x))))) - mod (m/module x {:contexts [(context/cpu 0)]}) - arg-params {"fc_0_weight" (ndarray/array [0.15 0.2 0.25 0.3] [2 2]) - "fc_0_bias" (ndarray/array [0.35 0.35] [2]) - "fc_1_weight" (ndarray/array [0.4 0.45 05 0.55] [2 2]) - "fc_1_bias" (ndarray/array [0.6 0.6] [2])} - data-batch (mx-io/next train-data)] - (-> mod - (m/bind {:data-shapes [{:name "data", :shape [1 1 1 2]}] - :label-shapes [{:name "softmax_label", :shape [1 1 1 2]}]}) - (m/install-monitor mon) - (m/init-params {:arg-params arg-params})) - (monitor/tic mon) - (m/forward-backward mod data-batch) - (let [result (monitor/toc mon) - freq (->> result - (map (fn [v] (as-> (second v) ? - (clojure.string/split ? #"_") - (take 2 ?) - (clojure.string/join "_" ?)))) - (frequencies)) - expected-freq {"act_0" 2 "act_1" 2 "data" 1 "fc_0" 6 "fc_1" 6}] - (is (= expected-freq (select-keys freq (keys expected-freq))))))) - -(deftest test-forward-reshape - (let [num-class 10 - data1 (sym/variable "data1") - data2 (sym/variable "data2") - conv1 (sym/convolution {:data data1 :kernel [2 2] :num-filter 2 :stride [2 2]}) - conv2 (sym/convolution {:data data2 :kernel [3 3] :num-filter 3 :stride [1 1]}) - pooling1 (sym/pooling {:data conv1 :kernel [2 2] :pool-type "avg" :stride [1 1]}) - pooling2 (sym/pooling {:data conv2 :kernel [2 2] :pool-type "max" :stride [1 1]}) - flatten1 (sym/flatten {:data pooling1}) - flatten2 (sym/flatten {:data pooling2}) - sum (sym/+ (sym/sum {:data flatten1 :axis 1}) - (sym/sum {:data flatten2 :axis 1})) - fc (sym/fully-connected {:data sum :num-hidden num-class}) - my-sym (sym/softmax-output "softmax" {:data fc}) - - d-shape1 [10 3 64 64] - d-shape2 [10 3 32 32] - l-shape [10] - - mod (m/module my-sym {:data-names ["data1" "data2"]}) - data-batch {:data [(ndarray/random-uniform 0 9 (str (mx-shape/->shape d-shape1))) - (ndarray/random-uniform 5 15 (str (mx-shape/->shape d-shape2)))] - :label [(ndarray/ones l-shape)] - :index nil - :pad 0}] - - ;; train with the original shapes - (-> mod - (m/bind {:data-shapes [{:name "data1" :shape d-shape1} - {:name "data2" :shape d-shape2}] - :label-shapes [{:name "softmax_label" :shape l-shape :layout "N"}]}) - (m/init-params) - (m/init-optimizer {:optimizer (optimizer/sgd {:learning-rate 0.1})}) - (m/forward data-batch)) - (is (= [(first l-shape) num-class] - (-> mod - (m/outputs-merged) - (first) - (ndarray/shape) - (mx-shape/->vec)))) - (-> mod - (m/backward) - (m/update)) - - (let [d-shape1 [3 3 64 64] - d-shape2 [3 3 32 32] - l-shape [3] - data-batch-2 {:data [(ndarray/random-uniform 0 9 (str (mx-shape/->shape d-shape1))) - (ndarray/random-uniform 5 15 (str (mx-shape/->shape d-shape2)))] - :label [(ndarray/ones l-shape)] - :index nil - :pad 0}] - (-> mod - (m/forward data-batch-2)) - (is (= [(first l-shape) num-class] - (-> mod - (m/outputs-merged) - (first) - (ndarray/shape) - (mx-shape/->vec)))) - (-> mod - (m/backward) - (m/update))) - - (let [d-shape1 [20 3 64 64] - d-shape2 [20 3 32 32] - l-shape [20] - data-batch-2 {:data [(ndarray/random-uniform 3 5 (str (mx-shape/->shape d-shape1))) - (ndarray/random-uniform 10 25 (str (mx-shape/->shape d-shape2)))] - :label [(ndarray/ones l-shape)] - :index nil - :pad 0}] - (-> mod - (m/forward data-batch-2)) - (is (= [(first l-shape) num-class] - (-> mod - (m/outputs-merged) - (first) - (ndarray/shape) - (mx-shape/->vec)))) - (-> mod - (m/backward) - (m/update))) - - ;; train with both different batch sizes and data shapes - (let [d-shape1 [20 3 120 120] - d-shape2 [20 3 32 64] - l-shape [20] - data-batch {:data [(ndarray/random-uniform 0 9 (str (mx-shape/->shape d-shape1))) - (ndarray/random-uniform 15 25 (str (mx-shape/->shape d-shape2)))] - :label [(ndarray/ones l-shape)] - :index nil - :pad 0}] - (-> mod - (m/forward data-batch)) - (is (= [(first l-shape) num-class] - (-> (m/outputs-merged mod) - first - (ndarray/shape) - (mx-shape/->vec)))) - (-> mod - (m/backward) - (m/update))) - (let [d-shape1 [5 3 28 40] - d-shape2 [5 3 24 16] - l-shape [5] - data-batch {:data [(ndarray/random-uniform 0 9 (str (mx-shape/->shape d-shape1))) - (ndarray/random-uniform 15 25 (str (mx-shape/->shape d-shape2)))] - :label [(ndarray/ones l-shape)] - :index nil - :pad 0}] - (-> mod - (m/forward data-batch)) - (is (= [(first l-shape) num-class] - (-> (m/outputs-merged mod) - first - (ndarray/shape) - (mx-shape/->vec)))) - (-> mod - (m/backward) - (m/update))))) - -(comment - - (m/data-shapes x)) diff --git a/contrib/clojure-package/test/org/apache/clojure_mxnet/ndarray_api_test.clj b/contrib/clojure-package/test/org/apache/clojure_mxnet/ndarray_api_test.clj deleted file mode 100644 index 18b8b78f19d1..000000000000 --- a/contrib/clojure-package/test/org/apache/clojure_mxnet/ndarray_api_test.clj +++ /dev/null @@ -1,415 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns org.apache.clojure-mxnet.ndarray-api-test - (:require [org.apache.clojure-mxnet.base :as base] - [org.apache.clojure-mxnet.context :as ctx] - [org.apache.clojure-mxnet.dtype :as dtype] - [org.apache.clojure-mxnet.ndarray :as ndarray :refer [->vec zeros ones += -= *= full shape shape-vec]] - [org.apache.clojure-mxnet.ndarray-api :as ndarray-api] - [org.apache.clojure-mxnet.shape :as mx-shape :refer [->shape]] - [org.apache.clojure-mxnet.test-util :as test-util :refer [approx=]] - [org.apache.clojure-mxnet.util :as util :refer [->option]] - [clojure.test :refer :all])) - -(deftest test-activation - (let [data (ndarray/array [2 1 0 -1 -2] [1 5]) - relu (ndarray-api/activation data "relu") - sigmoid (ndarray-api/activation data "sigmoid") - softsign (ndarray-api/activation data "softsign") - out (ndarray/zeros [1 5]) - _ (ndarray-api/activation {:data data :act-type "relu" :out out})] - (is (= [2.0 1.0 0.0 0.0 0.0] (->vec relu))) - (is (approx= 1e-3 [0.881 0.731 0.5 0.269 0.119] (->vec sigmoid))) - (is (approx= 1e-3 [0.666 0.5 0.0 -0.5 -0.666] (->vec softsign))) - (is (= [2.0 1.0 0.0 0.0 0.0] (->vec out))))) - -(deftest test-bilinear-sampler - (let [data (ndarray/array [1 4 3 6 - 1 8 8 9 - 0 4 1 5 - 1 0 1 3] - [1 1 4 4]) - affine (ndarray/array [2 0 0 - 0 2 0] - [1 6]) - grid (ndarray-api/grid-generator {:data affine :transform-type "affine" :target-shape [4 4]}) - out (ndarray-api/bilinear-sampler data grid)] - (is (approx= 1e-3 - [0.0 0.0 0.0 0.0 - 0.0 3.5 6.5 0.0 - 0.0 1.25 2.5 0.0 - 0.0 0.0 0.0 0.0] - (->vec out))))) - -(deftest test-cast - (let [nda1 (ndarray/array [0.9 1.3] [2]) - nda2 (ndarray/array [1e20 11.1] [2]) - nda3 (ndarray/array [300 11.1 10.9 -1 -3] [5]) - out (ndarray/zeros [2] {:dtype dtype/INT32}) - _ (ndarray-api/cast {:data nda1 :dtype (str dtype/INT32) :out out})] - (is (= [0.0 1.0] (->vec (ndarray-api/cast nda1 (str dtype/INT32))))) - (is (= [(float 1e20) (float 11.1)] (->vec (ndarray-api/cast nda2 (str dtype/FLOAT32))))) - ;; uint8 gets converted to native types after ->vec - (is (= [44.0 11.0 10.0 -1.0 -3.0] (->vec (ndarray-api/cast nda3 "uint8")))))) - -(deftest test-concat - (let [nda1 (ndarray/zeros [1 2]) - nda2 (ndarray/ones [1 2]) - out (ndarray/zeros [1 4]) - res1 (ndarray-api/concat [nda1 nda2] 2) ;; num_args=2, dim=1 (default) - res2 (ndarray-api/concat {:data [nda1 nda2] :num-args 2 :dim 0}) ;; num_args=2, dim=0 - res3 (ndarray-api/concat {:data [nda1 nda2 nda1] :num-args 3 :dim 1}) ;; num_args=3, dim=1 - _ (ndarray-api/concat {:data [nda1 nda2] :num-args 2 :dim 1 :out out}) ;; store result in out - ] - (is (= [0.0 0.0 1.0 1.0] (->vec res1))) - (is (= [1 4] (shape-vec res1))) - (is (= [0.0 0.0 1.0 1.0] (->vec res2))) - (is (= [2 2] (shape-vec res2))) - (is (= [0.0 0.0 1.0 1.0 0.0 0.0] (->vec res3))) - (is (= [1 6] (shape-vec res3))) - (is (= [0.0 0.0 1.0 1.0] (->vec out))) - (is (= [1 4] (shape-vec out))))) - -(deftest test-embedding - (let [input-dim 4 - output-dim 5 - w (ndarray/array [0. 1. 2. 3. 4. - 5. 6. 7. 8. 9. - 10. 11. 12. 13. 14. - 15. 16. 17. 18. 19.] - [4 5]) - x (ndarray/array [1. 3. - 0. 2.] - [2 2]) - out (ndarray-api/embedding x w input-dim output-dim)] - (is (= [5. 6. 7. 8. 9. - 15. 16. 17. 18. 19. - 0. 1. 2. 3. 4. - 10. 11. 12. 13. 14.] - (->vec out))) - (is (= [2 2 5] (shape-vec out))))) - -(deftest test-flatten - (let [nda (ndarray/array [1 2 3 - 4 5 6 - 7 8 9 - 1 2 3 - 4 5 6 - 7 8 9] - [2 3 3]) - out (ndarray/zeros [2 9]) - res (ndarray-api/flatten {:data nda}) - _ (ndarray-api/flatten {:data nda :out out})] - (is (= [1. 2. 3. 4. 5. 6. 7. 8. 9. - 1. 2. 3. 4. 5. 6. 7. 8. 9.] (->vec res))) - (is (= [2 9] (shape-vec res))) - (is (= [1. 2. 3. 4. 5. 6. 7. 8. 9. - 1. 2. 3. 4. 5. 6. 7. 8. 9.] (->vec out))) - (is (= [2 9] (shape-vec out))))) - -(deftest test-instance-norm - (let [x (ndarray/array [1.1 2.2 3.3 4.4] [2 1 2]) - gamma (ndarray/array [1.5] [1]) - beta (ndarray/array [0.5] [1]) - res (ndarray-api/instance-norm x gamma beta)] - (is (approx= 1e-4 [-0.9975 1.9975 - -0.9975 1.9975] (->vec res))) - (is (= [2 1 2] (shape-vec res))))) - -(deftest test-l2-normalization - (let [x (ndarray/array [1 2 3 4 2 2 5 6] [2 2 2]) - res1 (ndarray-api/l2-normalization {:data x}) ;; instance-wise - res2 (ndarray-api/l2-normalization {:data x :mode "instance"}) - res3 (ndarray-api/l2-normalization {:data x :mode "channel"}) - res4 (ndarray-api/l2-normalization {:data x :mode "spatial"})] - (is (approx= 1e-4 [0.1825 0.3651 - 0.5477 0.7303 - 0.2407 0.2407 - 0.6019 0.7223] (->vec res1))) - (is (approx= 1e-4 [0.1825 0.3651 - 0.5477 0.7303 - 0.2407 0.2407 - 0.6019 0.7223] (->vec res2))) - (is (approx= 1e-4 [0.3162 0.4472 - 0.9486 0.8944 - 0.3714 0.3162 - 0.9284 0.9486] (->vec res3))) - (is (approx= 1e-4 [0.4472 0.8944 - 0.6 0.8 - 0.7071 0.7071 - 0.6402 0.7682] (->vec res4))))) - -(deftest test-pad - (let [x (ndarray/array [1 2 3 - 4 5 6 - 7 8 9 - 10 11 12 - 11 12 13 - 14 15 16 - 17 18 19 - 20 21 22] - [2 2 2 3]) - res1 (ndarray-api/pad x "edge" [0,0,0,0,1,1,1,1]) - res2 (ndarray-api/pad {:data x :mode "constant" :pad-width [0,0,0,0,1,1,1,1] :constant-value 0})] - (is (= [1. 1. 2. 3. 3. - 1. 1. 2. 3. 3. - 4. 4. 5. 6. 6. - 4. 4. 5. 6. 6. - 7. 7. 8. 9. 9. - 7. 7. 8. 9. 9. - 10. 10. 11. 12. 12. - 10. 10. 11. 12. 12. - 11. 11. 12. 13. 13. - 11. 11. 12. 13. 13. - 14. 14. 15. 16. 16. - 14. 14. 15. 16. 16. - 17. 17. 18. 19. 19. - 17. 17. 18. 19. 19. - 20. 20. 21. 22. 22. - 20. 20. 21. 22. 22.] (->vec res1))) - (is (= [2 2 4 5] (shape-vec res1))) - (is (= [0. 0. 0. 0. 0. - 0. 1. 2. 3. 0. - 0. 4. 5. 6. 0. - 0. 0. 0. 0. 0. - - 0. 0. 0. 0. 0. - 0. 7. 8. 9. 0. - 0. 10. 11. 12. 0. - 0. 0. 0. 0. 0. - - 0. 0. 0. 0. 0. - 0. 11. 12. 13. 0. - 0. 14. 15. 16. 0. - 0. 0. 0. 0. 0. - - 0. 0. 0. 0. 0. - 0. 17. 18. 19. 0. - 0. 20. 21. 22. 0. - 0. 0. 0. 0. 0.] (->vec res2))) - (is (= [2 2 4 5] (shape-vec res2))))) - -(deftest test-roi-pooling - (let [xi [[[[ 0., 1., 2., 3., 4., 5.], - [ 6., 7., 8., 9., 10., 11.], - [ 12., 13., 14., 15., 16., 17.], - [ 18., 19., 20., 21., 22., 23.], - [ 24., 25., 26., 27., 28., 29.], - [ 30., 31., 32., 33., 34., 35.], - [ 36., 37., 38., 39., 40., 41.], - [ 42., 43., 44., 45., 46., 47.]]]] - x (ndarray/array (-> xi flatten vec) [1 1 8 6]) - y (ndarray/array [0 0 0 4 4] [1 5]) - res1 (ndarray-api/roi-pooling x y [2 2] 1.0) - res2 (ndarray-api/roi-pooling x y [2 2] 0.7)] - (is (= [14. 16. 26. 28.] (->vec res1))) - (is (= [1 1 2 2] (shape-vec res1))) - (is (= [7. 9. 19. 21.] (->vec res2))) - (is (= [1 1 2 2] (shape-vec res2))))) - -(deftest test-reshape - (let [x (ndarray/array (vec (range 4)) [4]) - y (ndarray/array (vec (range 24)) [2 3 4]) - z (ndarray/array (vec (range 120)) [2 3 4 5]) - res1 (ndarray-api/reshape {:data x :shape [2 2]})] - (is (= [0. 1. 2. 3.] (->vec res1))) - (is (= [2 2] (shape-vec res1))) - (is (= (map float (range 24)) (->vec (ndarray-api/reshape {:data y :shape [4 0 2]})))) - (is (= [4 3 2] (shape-vec (ndarray-api/reshape {:data y :shape [4 0 2]})))) - (is (= [2 3 4] (shape-vec (ndarray-api/reshape {:data y :shape [2 0 0]})))) - (is (= [6 1 4] (shape-vec (ndarray-api/reshape {:data y :shape [6 1 -1]})))) - (is (= [3 1 8] (shape-vec (ndarray-api/reshape {:data y :shape [3 -1 8]})))) - (is (= [24] (shape-vec (ndarray-api/reshape {:data y :shape [-1]})))) - (is (= [2 3 4] (shape-vec (ndarray-api/reshape {:data y :shape [-2]})))) - (is (= [2 3 4] (shape-vec (ndarray-api/reshape {:data y :shape [2 -2]})))) - (is (= [2 3 4 1 1] (shape-vec (ndarray-api/reshape {:data y :shape [-2 1 1]})))) - (is (= [6 4] (shape-vec (ndarray-api/reshape {:data y :shape [-3 4]})))) - (is (= [6 20] (shape-vec (ndarray-api/reshape {:data z :shape [-3 -3]})))) - (is (= [2 12] (shape-vec (ndarray-api/reshape {:data y :shape [0 -3]})))) - (is (= [6 4] (shape-vec (ndarray-api/reshape {:data y :shape [-3 -2]})))) - (is (= [1 2 3 4] (shape-vec (ndarray-api/reshape {:data y :shape [-4 1 2 -2]})))) - (is (= [2 1 3 4] (shape-vec (ndarray-api/reshape {:data y :shape [2 -4 -1 3 -2]})))))) - -(deftest test-sequence-last - (let [xi [[[ 1., 2., 3.], - [ 4., 5., 6.], - [ 7., 8., 9.]], - - [[ 10., 11., 12.], - [ 13., 14., 15.], - [ 16., 17., 18.]], - - [[ 19., 20., 21.], - [ 22., 23., 24.], - [ 25., 26., 27.]]] - x (ndarray/array (-> xi flatten vec) [3 3 3]) - seq-len1 (ndarray/array [1 1 1] [3]) - seq-len2 (ndarray/array [1 2 3] [3]) - ;; This test is failing with an exception - ;; (most likely a scala generation issue) - ;; res1 (ndarray-api/sequence-last x nil) - ] - ;; (is (= [] (->vec res1))) -)) - -(deftest test-sequence-mask - (let [xi [[[ 1., 2., 3.], - [ 4., 5., 6.]], - - [[ 7., 8., 9.], - [ 10., 11., 12.]], - - [[ 13., 14., 15.], - [ 16., 17., 18.]]] - x (ndarray/array (-> xi flatten vec) [3 2 3]) - seq-len1 (ndarray/array [1 1] [2]) - seq-len2 (ndarray/array [2 3] [2]) - ;; Same issue as previous test - ;; res1 (ndarray-api/sequence-mask x seq-len1) - ] - ;; (is (= [] (->vec res1))) -)) - -(deftest test-slice-channel - (let [xi [[[ 1.] [ 2.]] - [[ 3.] [ 4.]] - [[ 5.] [ 6.]]] - x (ndarray/array (-> xi flatten vec) [3 2 1]) - res1 (ndarray-api/slice-channel {:data x :num-outputs 2 :axis 1}) - res2 (ndarray-api/slice-channel {:data x :num-outputs 3 :axis 0}) - res3 (ndarray-api/slice-channel {:data x :num-outputs 3 :axis 0 :squeeze-axis 1})] - (is (= [1. 3. 5.] (->vec res1))) - (is (= [3 1 1] (shape-vec res1))) - (is (= [1. 2.] (->vec res2))) - (is (= [1 2 1] (shape-vec res2))) - (is (= [1. 2.] (->vec res3))) - (is (= [2 1] (shape-vec res3))))) - -(deftest test-softmax-activation - (let [x (ndarray/array [1 1 1 1 1 1] [2 3]) - res1 (ndarray-api/softmax-activation {:data x :mode "instance"})] - (is (approx= 1e-3 [0.333 0.333 0.333 - 0.333 0.333 0.333] (->vec res1))) - (is (= [2 3] (shape-vec res1))))) - -(deftest test-softmax-output - (let [datai [[1,2,3,4],[2,2,2,2],[3,3,3,3],[4,4,4,4]] - data (ndarray/array (-> datai flatten vec) [4 4]) - label (ndarray/array [1,0,2,3] [4]) - res1 (ndarray-api/softmax-output data label)] - (is (approx= 1e-4 [0.0321 0.0871 0.2369 0.6439 - 0.25 0.25 0.25 0.25 - 0.25 0.25 0.25 0.25 - 0.25 0.25 0.25 0.25] (->vec res1))) - (is (= [4 4] (shape-vec res1))))) - -(deftest test-swap-axis - (let [x (ndarray/array (range 3) [1 3]) - y (ndarray/array (range 8) [2 2 2]) - res1 (ndarray-api/swap-axis {:data x :dim1 0 :dim2 1}) - res2 (ndarray-api/swap-axis {:data y :dim1 0 :dim2 2})] - (is (= [0. 1. 2.] (->vec res1))) - (is (= [3 1] (shape-vec res1))) - (is (= [0. 4. 2. 6. 1. 5. 3. 7.] (->vec res2))) - (is (= [2 2 2] (shape-vec res2))))) - -(deftest test-abs - (let [x (ndarray/array [-2 0 3] [3]) - res1 (ndarray-api/abs {:data x})] - (is (= [2. 0. 3.] (->vec res1))) - (is (= [3] (shape-vec res1))))) - -(deftest test-arccos - (let [x (ndarray/array [-1 -0.707 0 0.707 1] [5]) - pi Math/PI - res1 (ndarray-api/arccos {:data x})] - (is (approx= 1e-3 [pi (* 0.75 pi) (* 0.5 pi) (* 0.25 pi) 0.] (->vec res1))))) - -(deftest test-arcsin - (let [x (ndarray/array [-1 -0.707 0 0.707 1] [5]) - pi Math/PI - res1 (ndarray-api/arcsin {:data x})] - (is (approx= 1e-3 [(- (* 0.5 pi)) (- (* 0.25 pi)) 0 (* 0.25 pi) (* 0.5 pi)] (->vec res1))))) - -(deftest test-argmax - (let [x (ndarray/array (range 6) [2 3]) - res1 (ndarray-api/argmax {:data x :axis 0}) - res2 (ndarray-api/argmax {:data x :axis 1}) - res3 (ndarray-api/argmax {:data x :axis 0 :keepdims true}) - res4 (ndarray-api/argmax {:data x :axis 1 :keepdims true})] - (is (= [1. 1. 1.] (->vec res1))) - (is (= [3] (shape-vec res1))) - (is (= [2. 2.] (->vec res2))) - (is (= [2] (shape-vec res2))) - (is (= [1. 1. 1.] (->vec res3))) - (is (= [1 3] (shape-vec res3))) - (is (= [2. 2.] (->vec res4))) - (is (= [2 1] (shape-vec res4))))) - -(deftest test-argmax-channel - (let [x (ndarray/array (range 6) [2 3]) - res1 (ndarray-api/argmax-channel {:data x})] - (is (= [2. 2.] (->vec res1))) - (is (= [2] (shape-vec res1))))) - -(deftest test-argmin - (let [x (ndarray/array (reverse (range 6)) [2 3]) - res1 (ndarray-api/argmin {:data x :axis 0}) - res2 (ndarray-api/argmin {:data x :axis 1}) - res3 (ndarray-api/argmin {:data x :axis 0 :keepdims true}) - res4 (ndarray-api/argmin {:data x :axis 1 :keepdims true})] - (is (= [1. 1. 1.] (->vec res1))) - (is (= [3] (shape-vec res1))) - (is (= [2. 2.] (->vec res2))) - (is (= [2] (shape-vec res2))) - (is (= [1. 1. 1.] (->vec res3))) - (is (= [1 3] (shape-vec res3))) - (is (= [2. 2.] (->vec res4))) - (is (= [2 1] (shape-vec res4))))) - -(deftest test-argsort - (let [x (ndarray/array [0.3 0.2 0.4 - 0.1 0.3 0.2] - [2 3]) - y (ndarray/array [0.3 0.2 0.4 0.1 0.3 0.2] [6]) - res1 (ndarray-api/argsort {:data x}) - res2 (ndarray-api/argsort {:data x :axis 0}) - res3 (ndarray-api/argsort {:data y})] - (is (= [1. 0. 2. - 0. 2. 1.] - (->vec res1))) - (is (= [2 3] (shape-vec res1))) - (is (= [1. 0. 1. - 0. 1. 0.] - (->vec res2))) - (is (= [2 3] (shape-vec res1))) - (is (= [3. 1. 5. 0. 4. 2.] (->vec res3))) - (is (= [6] (shape-vec res3))))) - -(deftest test-batch-take - (let [x (ndarray/array (range 6) [3 2]) - i (ndarray/as-type (ndarray/array [0 1 0] [3]) dtype/INT32) - res1 (ndarray-api/batch-take x i) ] - (is (= [0. 3. 4.] (->vec res1))))) - -(deftest test-broadcast-add - (let [x (ndarray/ones [2 3]) - y (ndarray/array (range 2) [2 1]) - res1 (ndarray-api/broadcast-add x y)] - (is (= [1. 1. 1. 2. 2. 2.] (->vec res1))) - (is (= [2 3] (shape-vec res1))))) diff --git a/contrib/clojure-package/test/org/apache/clojure_mxnet/operator_test.clj b/contrib/clojure-package/test/org/apache/clojure_mxnet/operator_test.clj index c4edb19d7e3d..2498e3027bcf 100644 --- a/contrib/clojure-package/test/org/apache/clojure_mxnet/operator_test.clj +++ b/contrib/clojure-package/test/org/apache/clojure_mxnet/operator_test.clj @@ -80,14 +80,6 @@ np-out (ndarray/->vec arr-label))] (is (approx= 1e-6 npout-back arr-grad))))) -(deftest test-regression - (check-regression (sym/logistic-regression-output {:data (sym/variable "data") :label (sym/variable "label")}) - (fn [x] (/ 1.0 (+ 1.0 (Math/exp (* -1.0 x))))) - (fn [x y] (- x y))) - (check-regression (sym/linear-regression-output {:data (sym/variable "data") :label (sym/variable "label")}) - (fn [x] x) - (fn [x y] (- x y)))) - (deftest swap-axes (let [data (sym/variable "data") shape-vec [2 3 4] diff --git a/contrib/clojure-package/test/org/apache/clojure_mxnet/symbol_api_test.clj b/contrib/clojure-package/test/org/apache/clojure_mxnet/symbol_api_test.clj index b642ad75d1d0..03bf7c31b30c 100644 --- a/contrib/clojure-package/test/org/apache/clojure_mxnet/symbol_api_test.clj +++ b/contrib/clojure-package/test/org/apache/clojure_mxnet/symbol_api_test.clj @@ -49,13 +49,3 @@ (= (sym/list-arguments oldfc) (-> (sym/get-internals net1) (sym/get "fc1_output") (sym/list-arguments))))) - -(deftest test-infer-type - (let [data (sym/variable "data") - f32data (sym-api/cast {:data data :dtype "float32"}) - fc1 (sym-api/fully-connected {:data f32data :num-hidden 128 :name"fc1"}) - mlp (sym-api/softmax-output {:data fc1 :name"softmax"}) - [arg out aux] (sym/infer-type mlp {:data dtype/FLOAT64})] - (is (= [dtype/FLOAT64 dtype/FLOAT32 dtype/FLOAT32 dtype/FLOAT32] (util/buffer->vec arg))) - (is (= [dtype/FLOAT32] (util/buffer->vec out))) - (is (= [] (util/buffer->vec aux))))) diff --git a/contrib/clojure-package/test/org/apache/clojure_mxnet/symbol_test.clj b/contrib/clojure-package/test/org/apache/clojure_mxnet/symbol_test.clj index 4d1b493ab2b6..5308b883aa3c 100644 --- a/contrib/clojure-package/test/org/apache/clojure_mxnet/symbol_test.clj +++ b/contrib/clojure-package/test/org/apache/clojure_mxnet/symbol_test.clj @@ -50,16 +50,6 @@ (sym/get "fc1_output") (sym/list-arguments))))) -(deftest test-infer-type - (let [data (sym/variable "data") - f32data (sym/cast {:data data :dtype "float32"}) - fc1 (sym/fully-connected "fc1" {:data f32data :num-hidden 128}) - mlp (sym/softmax-output "softmax" {:data fc1}) - [arg out aux] (sym/infer-type mlp {:data dtype/FLOAT64})] - (is (= [dtype/FLOAT64 dtype/FLOAT32 dtype/FLOAT32 dtype/FLOAT32] (util/buffer->vec arg))) - (is (= [dtype/FLOAT32] (util/buffer->vec out))) - (is (= [] (util/buffer->vec aux))))) - (deftest test-copy (let [data (sym/variable "data") data2 (sym/clone data)] diff --git a/contrib/clojure-package/test/org/apache/clojure_mxnet/visualization_test.clj b/contrib/clojure-package/test/org/apache/clojure_mxnet/visualization_test.clj deleted file mode 100644 index a2bea9478390..000000000000 --- a/contrib/clojure-package/test/org/apache/clojure_mxnet/visualization_test.clj +++ /dev/null @@ -1,32 +0,0 @@ -;; -;; Licensed to the Apache Software Foundation (ASF) under one or more -;; contributor license agreements. See the NOTICE file distributed with -;; this work for additional information regarding copyright ownership. -;; The ASF licenses this file to You under the Apache License, Version 2.0 -;; (the "License"); you may not use this file except in compliance with -;; the License. You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -(ns org.apache.clojure-mxnet.visualization-test - (:require [org.apache.clojure-mxnet.symbol :as sym] - [org.apache.clojure-mxnet.visualization :as viz] - [clojure.test :refer :all]) - (:import (org.apache.mxnet Visualization$Dot))) - -(deftest test-plot-network - (let [to-plot-sym (as-> (sym/variable "data") data - (sym/flatten "fl" {:data data}) - (sym/softmax-output "softmax" {:data data})) - dot (viz/plot-network to-plot-sym - {"data" [1 1 28 28]} - {:title "foo" - :node-attrs {:shape "oval" :fixedsize "false"}})] - (is (instance? Visualization$Dot dot)))) diff --git a/cpp-package/example/CMakeLists.txt b/cpp-package/example/CMakeLists.txt index bf9427af03ee..d54843f319b4 100644 --- a/cpp-package/example/CMakeLists.txt +++ b/cpp-package/example/CMakeLists.txt @@ -18,40 +18,6 @@ # Explicitly set GENERATED property https://gitlab.kitware.com/cmake/cmake/issues/18399 set_property(SOURCE ${CMAKE_CURRENT_LIST_DIR}/../include/mxnet-cpp/op.h PROPERTY GENERATED 1) -add_executable(test_regress_label test_regress_label.cpp) -target_link_libraries(test_regress_label mxnet_cpp) - -add_executable(lenet lenet.cpp) -target_link_libraries(lenet mxnet_cpp) - -add_executable(lenet_with_mxdataiter lenet_with_mxdataiter.cpp) -target_link_libraries(lenet_with_mxdataiter mxnet_cpp) - -add_executable(alexnet alexnet.cpp) -target_link_libraries(alexnet mxnet_cpp) - -add_executable(charRNN charRNN.cpp) -target_link_libraries(charRNN mxnet_cpp) - -add_executable(googlenet googlenet.cpp) -target_link_libraries(googlenet mxnet_cpp) - -add_executable(inception_bn inception_bn.cpp) -target_link_libraries(inception_bn mxnet_cpp) - -add_executable(mlp mlp.cpp) -target_link_libraries(mlp mxnet_cpp) - -add_executable(mlp_cpu mlp_cpu.cpp) -target_link_libraries(mlp_cpu mxnet_cpp) - -add_executable(mlp_gpu mlp_gpu.cpp) -target_link_libraries(mlp_gpu mxnet_cpp) - -add_executable(resnet resnet.cpp) -target_link_libraries(resnet mxnet_cpp) - - if(MSVC) add_custom_target(cpp_package_deploy_library ALL DEPENDS mxnet diff --git a/cpp-package/example/README.md b/cpp-package/example/README.md index 555316dd1ac3..208532e0066e 100644 --- a/cpp-package/example/README.md +++ b/cpp-package/example/README.md @@ -35,97 +35,3 @@ By default, the examples are built to be run on GPU. To build examples to run on The examples that are built to be run on GPU may not work on the non-GPU machines. The makefile will also download the necessary data files and store in a data folder. (The download will take couple of minutes, but will be done only once on a fresh installation.) - - -## Examples demonstrating training workflow - -This directory contains following examples. In order to run the examples, ensure that the path to the MXNet shared library is added to the OS specific environment variable viz. **LD\_LIBRARY\_PATH** for Linux, Mac and Ubuntu OS and **PATH** for Windows OS. For example `export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/home/ubuntu/incubator-mxnet/lib` on ubuntu using gpu. - -### [alexnet.cpp]() - -The example implements the C++ version of AlexNet. The networks trains on MNIST data. The number of epochs can be specified as a command line argument. For example to train with 10 epochs use the following: - -``` -build/alexnet 10 -``` - -### [googlenet.cpp]() - -The code implements a GoogLeNet/Inception network using the C++ API. The example uses MNIST data to train the network. By default, the example trains the model for 100 epochs. The number of epochs can also be specified in the command line. For example, to train the model for 10 epochs use the following: - -``` -build/googlenet 10 -``` - -### [mlp.cpp]() - -The code implements a multilayer perceptron from scratch. The example creates its own dummy data to train the model. The example does not require command line parameters. It trains the model for 20,000 epochs. -To run the example use the following command: - -``` -build/mlp -``` - -### [mlp_cpu.cpp]() - -The code implements a multilayer perceptron to train the MNIST data. The code demonstrates the use of "SimpleBind" C++ API and MNISTIter. The example is designed to work on CPU. The example does not require command line parameters. -To run the example use the following command: - -``` -build/mlp_cpu -``` - -### [mlp_gpu.cpp]() - -The code implements a multilayer perceptron to train the MNIST data. The code demonstrates the use of the "SimpleBind" C++ API and MNISTIter. The example is designed to work on GPU. The example does not require command line arguments. To run the example execute following command: - -``` -build/mlp_gpu -``` - -### [mlp_csv.cpp]() - -The code implements a multilayer perceptron to train the MNIST data. The code demonstrates the use of the "SimpleBind" C++ API and CSVIter. The CSVIter can iterate data that is in CSV format. The example can be run on CPU or GPU. The example usage is as follows: - -``` -build/mlp_csv --train data/mnist_data/mnist_train.csv --test data/mnist_data/mnist_test.csv --epochs 10 --batch_size 100 --hidden_units "128 64 64" --gpu -``` -* To get the `mnist_training_set.csv` and `mnist_test_set.csv` please run the following command: -```python -# in incubator-mxnet/cpp-package/example directory -python mnist_to_csv.py ./data/mnist_data/train-images-idx3-ubyte ./data/mnist_data/train-labels-idx1-ubyte ./data/mnist_data/mnist_train.csv 60000 -python mnist_to_csv.py ./data/mnist_data/t10k-images-idx3-ubyte ./data/mnist_data/t10k-labels-idx1-ubyte ./data/mnist_data/mnist_test.csv 10000 -``` - -### [resnet.cpp]() - -The code implements a resnet model using the C++ API. The model is used to train MNIST data. The number of epochs for training the model can be specified on the command line. By default, model is trained for 100 epochs. For example, to train with 10 epochs use the following command: - -``` -build/resnet 10 -``` - -### [lenet.cpp]() - -The code implements a lenet model using the C++ API. It uses MNIST training data in CSV format to train the network. The example does not use built-in CSVIter to read the data from CSV file. The number of epochs can be specified on the command line. By default, the mode is trained for 100,000 epochs. For example, to train with 10 epochs use the following command: - -``` -build/lenet 10 -``` -### [lenet\_with\_mxdataiter.cpp]() - -The code implements a lenet model using the C++ API. It uses MNIST training data to train the network. The example uses built-in MNISTIter to read the data. The number of epochs can be specified on the command line. By default, the mode is trained for 100 epochs. For example, to train with 10 epochs use the following command: - -``` -build/lenet_with_mxdataiter 10 -``` - -In addition, there is `run_lenet_with_mxdataiter.sh` that downloads the mnist data and run `lenet_with_mxdataiter` example. - -### [inception_bn.cpp]() - -The code implements an Inception network using the C++ API with batch normalization. The example uses MNIST data to train the network. The model trains for 100 epochs. The example can be run by executing the following command: - -``` -build/inception_bn -``` diff --git a/cpp-package/example/alexnet.cpp b/cpp-package/example/alexnet.cpp deleted file mode 100644 index 1c182182c1a5..000000000000 --- a/cpp-package/example/alexnet.cpp +++ /dev/null @@ -1,358 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - */ -#include -#include -#include -#include -#include -#include "utils.h" -#include "mxnet-cpp/MxNetCpp.h" - -using namespace mxnet::cpp; - -Symbol AlexnetSymbol(int num_classes) { - auto input_data = Symbol::Variable("data"); - auto target_label = Symbol::Variable("label"); - /*stage 1*/ - auto conv1 = Operator("Convolution") - .SetParam("kernel", Shape(11, 11)) - .SetParam("num_filter", 96) - .SetParam("stride", Shape(4, 4)) - .SetParam("dilate", Shape(1, 1)) - .SetParam("pad", Shape(0, 0)) - .SetParam("num_group", 1) - .SetParam("workspace", 512) - .SetParam("no_bias", false) - .SetInput("data", input_data) - .CreateSymbol("conv1"); - auto relu1 = Operator("Activation") - .SetParam("act_type", "relu") /*relu,sigmoid,softrelu,tanh */ - .SetInput("data", conv1) - .CreateSymbol("relu1"); - auto pool1 = Operator("Pooling") - .SetParam("kernel", Shape(3, 3)) - .SetParam("pool_type", "max") /*avg,max,sum */ - .SetParam("global_pool", false) - .SetParam("stride", Shape(2, 2)) - .SetParam("pad", Shape(0, 0)) - .SetInput("data", relu1) - .CreateSymbol("pool1"); - auto lrn1 = Operator("LRN") - .SetParam("nsize", 5) - .SetParam("alpha", 0.0001) - .SetParam("beta", 0.75) - .SetParam("knorm", 1) - .SetInput("data", pool1) - .CreateSymbol("lrn1"); - /*stage 2*/ - auto conv2 = Operator("Convolution") - .SetParam("kernel", Shape(5, 5)) - .SetParam("num_filter", 256) - .SetParam("stride", Shape(1, 1)) - .SetParam("dilate", Shape(1, 1)) - .SetParam("pad", Shape(2, 2)) - .SetParam("num_group", 1) - .SetParam("workspace", 512) - .SetParam("no_bias", false) - .SetInput("data", lrn1) - .CreateSymbol("conv2"); - auto relu2 = Operator("Activation") - .SetParam("act_type", "relu") /*relu,sigmoid,softrelu,tanh */ - .SetInput("data", conv2) - .CreateSymbol("relu2"); - auto pool2 = Operator("Pooling") - .SetParam("kernel", Shape(3, 3)) - .SetParam("pool_type", "max") /*avg,max,sum */ - .SetParam("global_pool", false) - .SetParam("stride", Shape(2, 2)) - .SetParam("pad", Shape(0, 0)) - .SetInput("data", relu2) - .CreateSymbol("pool2"); - auto lrn2 = Operator("LRN") - .SetParam("nsize", 5) - .SetParam("alpha", 0.0001) - .SetParam("beta", 0.75) - .SetParam("knorm", 1) - .SetInput("data", pool2) - .CreateSymbol("lrn2"); - /*stage 3*/ - auto conv3 = Operator("Convolution") - .SetParam("kernel", Shape(3, 3)) - .SetParam("num_filter", 384) - .SetParam("stride", Shape(1, 1)) - .SetParam("dilate", Shape(1, 1)) - .SetParam("pad", Shape(1, 1)) - .SetParam("num_group", 1) - .SetParam("workspace", 512) - .SetParam("no_bias", false) - .SetInput("data", lrn2) - .CreateSymbol("conv3"); - auto relu3 = Operator("Activation") - .SetParam("act_type", "relu") /*relu,sigmoid,softrelu,tanh */ - .SetInput("data", conv3) - .CreateSymbol("relu3"); - auto conv4 = Operator("Convolution") - .SetParam("kernel", Shape(3, 3)) - .SetParam("num_filter", 384) - .SetParam("stride", Shape(1, 1)) - .SetParam("dilate", Shape(1, 1)) - .SetParam("pad", Shape(1, 1)) - .SetParam("num_group", 1) - .SetParam("workspace", 512) - .SetParam("no_bias", false) - .SetInput("data", relu3) - .CreateSymbol("conv4"); - auto relu4 = Operator("Activation") - .SetParam("act_type", "relu") /*relu,sigmoid,softrelu,tanh */ - .SetInput("data", conv4) - .CreateSymbol("relu4"); - auto conv5 = Operator("Convolution") - .SetParam("kernel", Shape(3, 3)) - .SetParam("num_filter", 256) - .SetParam("stride", Shape(1, 1)) - .SetParam("dilate", Shape(1, 1)) - .SetParam("pad", Shape(1, 1)) - .SetParam("num_group", 1) - .SetParam("workspace", 512) - .SetParam("no_bias", false) - .SetInput("data", relu4) - .CreateSymbol("conv5"); - auto relu5 = Operator("Activation") - .SetParam("act_type", "relu") - .SetInput("data", conv5) - .CreateSymbol("relu5"); - auto pool3 = Operator("Pooling") - .SetParam("kernel", Shape(3, 3)) - .SetParam("pool_type", "max") - .SetParam("global_pool", false) - .SetParam("stride", Shape(2, 2)) - .SetParam("pad", Shape(0, 0)) - .SetInput("data", relu5) - .CreateSymbol("pool3"); - /*stage4*/ - auto flatten = - Operator("Flatten").SetInput("data", pool3).CreateSymbol("flatten"); - auto fc1 = Operator("FullyConnected") - .SetParam("num_hidden", 4096) - .SetParam("no_bias", false) - .SetInput("data", flatten) - .CreateSymbol("fc1"); - auto relu6 = Operator("Activation") - .SetParam("act_type", "relu") - .SetInput("data", fc1) - .CreateSymbol("relu6"); - auto dropout1 = Operator("Dropout") - .SetParam("p", 0.5) - .SetInput("data", relu6) - .CreateSymbol("dropout1"); - /*stage5*/ - auto fc2 = Operator("FullyConnected") - .SetParam("num_hidden", 4096) - .SetParam("no_bias", false) - .SetInput("data", dropout1) - .CreateSymbol("fc2"); - auto relu7 = Operator("Activation") - .SetParam("act_type", "relu") - .SetInput("data", fc2) - .CreateSymbol("relu7"); - auto dropout2 = Operator("Dropout") - .SetParam("p", 0.5) - .SetInput("data", relu7) - .CreateSymbol("dropout2"); - /*stage6*/ - auto fc3 = Operator("FullyConnected") - .SetParam("num_hidden", num_classes) - .SetParam("no_bias", false) - .SetInput("data", dropout2) - .CreateSymbol("fc3"); - auto softmax = Operator("SoftmaxOutput") - .SetParam("grad_scale", 1) - .SetParam("ignore_label", -1) - .SetParam("multi_output", false) - .SetParam("use_ignore", false) - .SetParam("normalization", "null") /*batch,null,valid */ - .SetInput("data", fc3) - .SetInput("label", target_label) - .CreateSymbol("softmax"); - return softmax; -} - -NDArray ResizeInput(NDArray data, const Shape new_shape) { - NDArray pic = data.Reshape(Shape(0, 1, 28, 28)); - NDArray pic_1channel; - Operator("_contrib_BilinearResize2D") - .SetParam("height", new_shape[2]) - .SetParam("width", new_shape[3]) - (pic).Invoke(pic_1channel); - NDArray output; - Operator("tile") - .SetParam("reps", Shape(1, 3, 1, 1)) - (pic_1channel).Invoke(output); - return output; -} - -int main(int argc, char const *argv[]) { - /*basic config*/ - int max_epo = argc > 1 ? strtol(argv[1], nullptr, 10) : 100; - float learning_rate = 1e-4; - float weight_decay = 1e-4; - - /*context*/ - auto ctx = Context::cpu(); - int num_gpu; - MXGetGPUCount(&num_gpu); - int batch_size = 32; -#if !MXNET_USE_CPU - if (num_gpu > 0) { - ctx = Context::gpu(); - batch_size = 256; - } -#endif - - TRY - /*net symbol*/ - auto Net = AlexnetSymbol(10); - - /*args_map and aux_map is used for parameters' saving*/ - std::map args_map; - std::map aux_map; - - /*we should tell mxnet the shape of data and label*/ - const Shape data_shape = Shape(batch_size, 3, 256, 256), - label_shape = Shape(batch_size); - args_map["data"] = NDArray(data_shape, ctx); - args_map["label"] = NDArray(label_shape, ctx); - - /*with data and label, executor can be generated automatically*/ - auto *exec = Net.SimpleBind(ctx, args_map); - auto arg_names = Net.ListArguments(); - aux_map = exec->aux_dict(); - args_map = exec->arg_dict(); - - /*if fine tune from some pre-trained model, we should load the parameters*/ - // NDArray::Load("./model/alex_params_3", nullptr, &args_map); - /*else, we should use initializer Xavier to init the params*/ - Xavier xavier = Xavier(Xavier::gaussian, Xavier::in, 2.34); - for (auto &arg : args_map) { - /*be careful here, the arg's name must has some specific ends or starts for - * initializer to call*/ - xavier(arg.first, &arg.second); - } - - /*these binary files should be generated using im2rc tools, which can be found - * in mxnet/bin*/ - std::vector data_files = { "./data/mnist_data/train-images-idx3-ubyte", - "./data/mnist_data/train-labels-idx1-ubyte", - "./data/mnist_data/t10k-images-idx3-ubyte", - "./data/mnist_data/t10k-labels-idx1-ubyte" - }; - - auto train_iter = MXDataIter("MNISTIter"); - if (!setDataIter(&train_iter, "Train", data_files, batch_size)) { - return 1; - } - - auto val_iter = MXDataIter("MNISTIter"); - if (!setDataIter(&val_iter, "Label", data_files, batch_size)) { - return 1; - } - - Optimizer* opt = OptimizerRegistry::Find("sgd"); - opt->SetParam("momentum", 0.9) - ->SetParam("rescale_grad", 1.0 / batch_size) - ->SetParam("clip_gradient", 10) - ->SetParam("lr", learning_rate) - ->SetParam("wd", weight_decay); - - Accuracy acu_train, acu_val; - LogLoss logloss_train, logloss_val; - for (int epoch = 0; epoch < max_epo; ++epoch) { - LG << "Train Epoch: " << epoch; - /*reset the metric every epoch*/ - acu_train.Reset(); - /*reset the data iter every epoch*/ - train_iter.Reset(); - int iter = 0; - while (train_iter.Next()) { - auto batch = train_iter.GetDataBatch(); - /*use copyto to feed new data and label to the executor*/ - ResizeInput(batch.data, data_shape).CopyTo(&args_map["data"]); - batch.label.CopyTo(&args_map["label"]); - exec->Forward(true); - exec->Backward(); - for (size_t i = 0; i < arg_names.size(); ++i) { - if (arg_names[i] == "data" || arg_names[i] == "label") continue; - opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]); - } - - NDArray::WaitAll(); - acu_train.Update(batch.label, exec->outputs[0]); - logloss_train.Reset(); - logloss_train.Update(batch.label, exec->outputs[0]); - ++iter; - LG << "EPOCH: " << epoch << " ITER: " << iter - << " Train Accuracy: " << acu_train.Get() - << " Train Loss: " << logloss_train.Get(); - } - LG << "EPOCH: " << epoch << " Train Accuracy: " << acu_train.Get(); - - LG << "Val Epoch: " << epoch; - acu_val.Reset(); - val_iter.Reset(); - logloss_val.Reset(); - iter = 0; - while (val_iter.Next()) { - auto batch = val_iter.GetDataBatch(); - ResizeInput(batch.data, data_shape).CopyTo(&args_map["data"]); - batch.label.CopyTo(&args_map["label"]); - exec->Forward(false); - NDArray::WaitAll(); - acu_val.Update(batch.label, exec->outputs[0]); - logloss_val.Update(batch.label, exec->outputs[0]); - LG << "EPOCH: " << epoch << " ITER: " << iter << " Val Accuracy: " << acu_val.Get(); - ++iter; - } - LG << "EPOCH: " << epoch << " Val Accuracy: " << acu_val.Get(); - LG << "EPOCH: " << epoch << " Val LogLoss: " << logloss_val.Get(); - - /*save the parameters*/ - std::stringstream ss; - ss << epoch; - std::string epoch_str; - ss >> epoch_str; - std::string save_path_param = "alex_param_" + epoch_str; - auto save_args = args_map; - /*we do not want to save the data and label*/ - save_args.erase(save_args.find("data")); - save_args.erase(save_args.find("label")); - /*the alexnet does not get any aux array, so we do not need to save - * aux_map*/ - LG << "EPOCH: " << epoch << " Saving to..." << save_path_param; - NDArray::Save(save_path_param, save_args); - } - /*don't foget to release the executor*/ - delete exec; - delete opt; - MXNotifyShutdown(); - CATCH - return 0; -} diff --git a/cpp-package/example/charRNN.cpp b/cpp-package/example/charRNN.cpp deleted file mode 100644 index 3d90e2e9ed9f..000000000000 --- a/cpp-package/example/charRNN.cpp +++ /dev/null @@ -1,756 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * Hua Zhang mz24cn@hotmail.com - * The code implements C++ version charRNN for mxnet\example\rnn\char-rnn.ipynb with MXNet.cpp API. - * The generated params file is compatiable with python version. - * train() and predict() has been verified with original data samples. - * 2017/1/23: - * Add faster version charRNN based on built-in cuDNN RNN operator, 10 times faster. - * Add time major computation graph, although no substantial performance difference. - * Support continuing training from last params file. - * Rename params file epoch number starts from zero. - */ - -#if _MSC_VER -#pragma warning(disable: 4996) // VS2015 complains on 'std::copy' ... -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "mxnet-cpp/MxNetCpp.h" -#include "utils.h" - -using namespace mxnet::cpp; - -struct LSTMState { - Symbol C; - Symbol h; -}; - -struct LSTMParam { - Symbol i2h_weight; - Symbol i2h_bias; - Symbol h2h_weight; - Symbol h2h_bias; -}; - -bool TIME_MAJOR = true; - -// LSTM Cell symbol -LSTMState LSTM(int num_hidden, const Symbol& indata, const LSTMState& prev_state, - const LSTMParam& param, int seqidx, int layeridx, mx_float dropout = 0) { - auto input = dropout > 0? Dropout(indata, dropout) : indata; - auto prefix = std::string("t") + std::to_string(seqidx) + "_l" + std::to_string(layeridx); - auto i2h = FullyConnected(prefix + "_i2h", input, param.i2h_weight, param.i2h_bias, - num_hidden * 4); - auto h2h = FullyConnected(prefix + "_h2h", prev_state.h, param.h2h_weight, param.h2h_bias, - num_hidden * 4); - auto gates = i2h + h2h; - auto slice_gates = SliceChannel(prefix + "_slice", gates, 4); - auto in_gate = Activation(slice_gates[0], ActivationActType::kSigmoid); - auto in_transform = Activation(slice_gates[1], ActivationActType::kTanh); - auto forget_gate = Activation(slice_gates[2], ActivationActType::kSigmoid); - auto out_gate = Activation(slice_gates[3], ActivationActType::kSigmoid); - - LSTMState state; - state.C = (forget_gate * prev_state.C) + (in_gate * in_transform); - state.h = out_gate * Activation(state.C, ActivationActType::kTanh); - return state; -} - -Symbol LSTMUnroll(int num_lstm_layer, int sequence_length, int input_dim, - int num_hidden, int num_embed, mx_float dropout = 0) { - auto isTrain = sequence_length > 1; - auto data = Symbol::Variable("data"); - if (TIME_MAJOR && isTrain) - data = transpose(data); - auto embed_weight = Symbol::Variable("embed_weight"); - auto embed = Embedding("embed", data, embed_weight, input_dim, num_embed); - auto wordvec = isTrain? SliceChannel(embed, sequence_length, TIME_MAJOR? 0 : 1, true) : embed; - - std::vector last_states; - std::vector param_cells; - for (int l = 0; l < num_lstm_layer; l++) { - std::string layer = "l" + std::to_string(l); - LSTMParam param; - param.i2h_weight = Symbol::Variable(layer + "_i2h_weight"); - param.i2h_bias = Symbol::Variable(layer + "_i2h_bias"); - param.h2h_weight = Symbol::Variable(layer + "_h2h_weight"); - param.h2h_bias = Symbol::Variable(layer + "_h2h_bias"); - param_cells.push_back(param); - LSTMState state; - state.C = Symbol::Variable(layer + "_init_c"); - state.h = Symbol::Variable(layer + "_init_h"); - last_states.push_back(state); - } - - std::vector hidden_all; - for (int i = 0; i < sequence_length; i++) { - auto hidden = wordvec[i]; - for (int layer = 0; layer < num_lstm_layer; layer++) { - double dp_ratio = layer == 0? 0 : dropout; - auto next_state = LSTM(num_hidden, hidden, last_states[layer], param_cells[layer], - i, layer, dp_ratio); - hidden = next_state.h; - last_states[layer] = next_state; - } - if (dropout > 0) - hidden = Dropout(hidden, dropout); - hidden_all.push_back(hidden); - } - - auto hidden_concat = isTrain? Concat(hidden_all, hidden_all.size(), 0) : hidden_all[0]; - auto cls_weight = Symbol::Variable("cls_weight"); - auto cls_bias = Symbol::Variable("cls_bias"); - auto pred = FullyConnected("pred", hidden_concat, cls_weight, cls_bias, input_dim); - - auto label = Symbol::Variable("softmax_label"); - label = transpose(label); - label = Reshape(label, Shape(), false, Shape(0), false); // -1: infer from graph - auto sm = SoftmaxOutput("softmax", pred, label); - if (isTrain) - return sm; - - std::vector outputs = { sm }; - for (auto& state : last_states) { - outputs.push_back(state.C); - outputs.push_back(state.h); - } - return Symbol::Group(outputs); -} - -// Currently mxnet GPU version RNN operator is implemented via *fast* NVIDIA cuDNN. -Symbol LSTMWithBuiltInRNNOp(int num_lstm_layer, int sequence_length, int input_dim, - int num_hidden, int num_embed, mx_float dropout = 0) { - auto isTrain = sequence_length > 1; - auto data = Symbol::Variable("data"); - if (TIME_MAJOR && isTrain) - data = transpose(data); - - auto embed_weight = Symbol::Variable("embed_weight"); - auto embed = Embedding("embed", data, embed_weight, input_dim, num_embed); - auto label = Symbol::Variable("softmax_label"); - label = transpose(label); - label = Reshape(label, Shape(), false, - Shape(0), false); // FullyConnected requires one dimension - if (!TIME_MAJOR && isTrain) - embed = SwapAxis(embed, 0, 1); // Change to time-major as cuDNN requires - - // We need not do the SwapAxis op as python version does. Direct and better performance in C++! - auto rnn_h_init = Symbol::Variable("LSTM_init_h"); - auto rnn_c_init = Symbol::Variable("LSTM_init_c"); - auto rnn_params = Symbol::Variable("LSTM_parameters"); // See explanations near RNNXavier class - auto variable_sequence_length = Symbol::Variable("sequence_length"); - auto rnn = RNN(embed, rnn_params, rnn_h_init, rnn_c_init, variable_sequence_length, num_hidden, - num_lstm_layer, RNNMode::kLstm, false, dropout, !isTrain); - auto hidden = Reshape(rnn[0], Shape(), false, Shape(0, num_hidden), false); - - auto cls_weight = Symbol::Variable("cls_weight"); - auto cls_bias = Symbol::Variable("cls_bias"); - auto pred = FullyConnected("pred", hidden, cls_weight, cls_bias, input_dim); - /*In rnn-time-major/rnn_cell_demo.py, the author claimed time-major version speeds up - * 1.5~2 times versus batch version. I doubts on the conclusion. In my test, the performance - * of both codes are almost same. In fact, there are no substantially differences between - * two codes. They are both based on time major cuDNN, the computation graph only differs - * slightly on the choices of where to put Reshape/SwapAxis/transpose operation. Here I don't - * use Reshape on pred and keep label shape on SoftmaxOutput like time major version code, - * but Reshape on label for simplification. It doesn't make influence on performacne. */ - - auto sm = SoftmaxOutput("softmax", pred, label); - if (isTrain) - return sm; - else - return Symbol::Group({ sm, rnn[1/*RNNOpOutputs::kStateOut=1*/], - rnn[2/*RNNOpOutputs::kStateCellOut=2*/] }); -} - -class Shuffler { - std::vector sequence; - public: - explicit Shuffler(int size) : sequence(size) { - int* p = sequence.data(); - for (int i = 0; i < size; i++) - *p++ = i; - } - void shuffle(std::function lambda = nullptr) { - random_shuffle(sequence.begin(), sequence.end()); - int n = 0; - if (lambda != nullptr) - for (int i : sequence) - lambda(n++, i); - } - const int* data() { - return sequence.data(); - } -}; - -class BucketSentenceIter : public DataIter { - Shuffler* random; - int batch, current, end; - unsigned int sequence_length; - Context device; - std::vector> sequences; - std::vector index2chars; - std::unordered_map charIndices; - - public: - BucketSentenceIter(std::string filename, int minibatch, Context context) : batch(minibatch), - current(-1), device(context) { - auto content = readContent(filename); - buildCharIndex(content); - sequences = convertTextToSequences(content, '\n'); - - int N = sequences.size() / batch * batch; // total used samples - sequences.resize(N); - sort(sequences.begin(), sequences.end(), [](const std::vector& a, - const std::vector& b) { return a.size() < b.size(); }); - - sequence_length = sequences.back().size(); - random = new Shuffler(N); - // We still can get random results if call Reset() firstly -// std::vector>* target = &sequences; -// random->shuffle([target](int n, int i) { (*target)[n].swap((*target)[i]); }); - end = N / batch; - } - virtual ~BucketSentenceIter() { - delete random; - } - - unsigned int maxSequenceLength() { - return sequence_length; - } - - size_t characterSize() { - return charIndices.size(); - } - - virtual bool Next(void) { - return ++current < end; - } - virtual NDArray GetData(void) { - const int* indices = random->data(); - mx_float *data = new mx_float[sequence_length * batch], *pdata = data; - - for (int i = current * batch, end = i + batch; i < end; i++) { - memcpy(pdata, sequences[indices[i]].data(), sequences[indices[i]].size() * sizeof(mx_float)); - if (sequences[indices[i]].size() < sequence_length) - memset(pdata + sequences[indices[i]].size(), 0, - (sequence_length - sequences[indices[i]].size()) * sizeof(mx_float)); - pdata += sequence_length; - } - NDArray array(Shape(batch, sequence_length), device, false); - array.SyncCopyFromCPU(data, batch * sequence_length); - return array; - } - virtual NDArray GetLabel(void) { - const int* indices = random->data(); - mx_float *label = new mx_float[sequence_length * batch], *plabel = label; - - for (int i = current * batch, end = i + batch; i < end; i++) { - memcpy(plabel, sequences[indices[i]].data() + 1, - (sequences[indices[i]].size() - 1) * sizeof(mx_float)); - memset(plabel + sequences[indices[i]].size() - 1, 0, - (sequence_length - sequences[indices[i]].size() + 1) * sizeof(mx_float)); - plabel += sequence_length; - } - NDArray array(Shape(batch, sequence_length), device, false); - array.SyncCopyFromCPU(label, batch * sequence_length); - return array; - } - virtual int GetPadNum(void) { - return sequence_length - sequences[random->data()[current * batch]].size(); - } - virtual std::vector GetIndex(void) { - const int* indices = random->data(); - std::vector list(indices + current * batch, indices + current * batch + batch); - return list; - } - virtual void BeforeFirst(void) { - current = -1; - random->shuffle(nullptr); - } - - std::wstring readContent(const std::string file) { - std::wifstream ifs(file, std::ios::binary); - if (ifs) { - std::wostringstream os; - os << ifs.rdbuf(); - return os.str(); - } - return L""; - } - - void buildCharIndex(const std::wstring& content) { - // This version buildCharIndex() Compatiable with python version char_rnn dictionary - int n = 1; - charIndices['\0'] = 0; // padding character - index2chars.push_back(0); // padding character index - for (auto c : content) - if (charIndices.find(c) == charIndices.end()) { - charIndices[c] = n++; - index2chars.push_back(c); - } - } -// void buildCharIndex(wstring& content) { -// for (auto c : content) -// charIndices[c]++; // char-frequency map; then char-index map -// std::vector> characters; -// for (auto& iter : charIndices) -// characters.push_back(make_tuple(iter.first, iter.second)); -// sort(characters.begin(), characters.end(), [](const tuple& a, -// const tuple& b) { return get<1>(a) > get<1>(b); }); -// mx_float index = 1; //0 is left for zero-padding -// index2chars.clear(); -// index2chars.push_back(0); //zero-padding -// for (auto& t : characters) { -// charIndices[get<0>(t)] = index++; -// index2chars.push_back(get<0>(t)); -// }s -// } - - inline wchar_t character(int i) { - return index2chars[i]; - } - - inline mx_float index(wchar_t c) { - return charIndices[c]; - } - - void saveCharIndices(const std::string file) { - std::wofstream ofs(file, std::ios::binary); - if (ofs) { - ofs.write(index2chars.data() + 1, index2chars.size() - 1); - ofs.close(); - } - } - - static std::tuple, std::vector> loadCharIndices( - const std::string file) { - std::wifstream ifs(file, std::ios::binary); - std::unordered_map map; - std::vector chars; - if (ifs) { - std::wostringstream os; - os << ifs.rdbuf(); - int n = 1; - map[L'\0'] = 0; - chars.push_back(L'\0'); - for (auto c : os.str()) { - map[c] = (mx_float) n++; - chars.push_back(c); - } - } - // Note: Can't use {} because this would hit the explicit constructor - return std::tuple, std::vector>(map, chars); - } - - std::vector> - convertTextToSequences(const std::wstring& content, wchar_t spliter) { - std::vector> sequences; - sequences.push_back(std::vector()); - for (auto c : content) - if (c == spliter && !sequences.back().empty()) - sequences.push_back(std::vector()); - else - sequences.back().push_back(charIndices[c]); - return sequences; - } -}; - -void OutputPerplexity(NDArray* labels, NDArray* output) { - std::vector charIndices, a; - labels->SyncCopyToCPU(&charIndices, 0L); // 0L indicates all - output->SyncCopyToCPU(&a, 0L)/*4128*84*/; - mx_float loss = 0; - int batchSize = labels->GetShape()[0]/*32*/, sequenceLength = labels->GetShape()[1]/*129*/, - nSamples = output->GetShape()[0]/*4128*/, vocabSize = output->GetShape()[1]/*84*/; - for (int n = 0; n < nSamples; n++) { - int row = n % batchSize, column = n / batchSize, labelOffset = column + - row * sequenceLength; // Search based on column storage: labels.T - mx_float safe_value = std::max(1e-10f, a[vocabSize * n + - static_cast(charIndices[labelOffset])]); - loss += -log(safe_value); // Calculate negative log-likelihood - } - loss = exp(loss / nSamples); - std::cout << "Train-Perplexity=" << loss << std::endl; -} - -void SaveCheckpoint(const std::string filepath, Symbol net, Executor* exe) { - std::map params; - for (auto iter : exe->arg_dict()) - if (iter.first.find("_init_") == std::string::npos - && iter.first.rfind("data") != iter.first.length() - 4 - && iter.first.rfind("label") != iter.first.length() - 5) - params.insert({"arg:" + iter.first, iter.second}); - for (auto iter : exe->aux_dict()) - params.insert({"aux:" + iter.first, iter.second}); - NDArray::Save(filepath, params); -} - -void LoadCheckpoint(const std::string filepath, Executor* exe) { - std::map params = NDArray::LoadToMap(filepath); - for (auto iter : params) { - std::string type = iter.first.substr(0, 4); - std::string name = iter.first.substr(4); - NDArray target; - if (type == "arg:") - target = exe->arg_dict()[name]; - else if (type == "aux:") - target = exe->aux_dict()[name]; - else - continue; - iter.second.CopyTo(&target); - } -} - -int input_dim = 0;/*84*/ -int sequence_length_max = 0;/*129*/ -int num_embed = 256; -int num_lstm_layer = 3; -int num_hidden = 512; -mx_float dropout = 0.2; -void train(const std::string file, int batch_size, int max_epoch, int start_epoch) { - Context device(DeviceType::kGPU, 0); - BucketSentenceIter dataIter(file, batch_size, device); - std::string prefix = file.substr(0, file.rfind(".")); - dataIter.saveCharIndices(prefix + ".dictionary"); - - input_dim = static_cast(dataIter.characterSize()); - sequence_length_max = dataIter.maxSequenceLength(); - - auto RNN = LSTMUnroll(num_lstm_layer, sequence_length_max, input_dim, num_hidden, - num_embed, dropout); - std::map args_map; - args_map["data"] = NDArray(Shape(batch_size, sequence_length_max), device, false); - args_map["softmax_label"] = NDArray(Shape(batch_size, sequence_length_max), device, false); - for (int i = 0; i < num_lstm_layer; i++) { - std::string key = "l" + std::to_string(i) + "_init_"; - args_map[key + "c"] = NDArray(Shape(batch_size, num_hidden), device, false); - args_map[key + "h"] = NDArray(Shape(batch_size, num_hidden), device, false); - } - std::vector zeros(batch_size * num_hidden, 0); - // RNN.SimpleBind(device, args_map, {}, {{"data", kNullOp}}); - Executor* exe = RNN.SimpleBind(device, args_map); - - if (start_epoch == -1) { - Xavier xavier = Xavier(Xavier::gaussian, Xavier::in, 2.34); - for (auto &arg : exe->arg_dict()) - xavier(arg.first, &arg.second); - } else { - LoadCheckpoint(prefix + "-" + std::to_string(start_epoch) + ".params", exe); - } - start_epoch++; - - mx_float learning_rate = 0.0002; - mx_float weight_decay = 0.000002; - Optimizer* opt = OptimizerRegistry::Find("sgd"); - opt->SetParam("lr", learning_rate) - ->SetParam("wd", weight_decay); -// opt->SetParam("momentum", 0.9)->SetParam("rescale_grad", 1.0 / batch_size) -// ->SetParam("clip_gradient", 10); - - for (int epoch = start_epoch; epoch < max_epoch; ++epoch) { - dataIter.Reset(); - auto tic = std::chrono::system_clock::now(); - while (dataIter.Next()) { - auto data_batch = dataIter.GetDataBatch(); - data_batch.data.CopyTo(&exe->arg_dict()["data"]); - data_batch.label.CopyTo(&exe->arg_dict()["softmax_label"]); - for (int l = 0; l < num_lstm_layer; l++) { - std::string key = "l" + std::to_string(l) + "_init_"; - exe->arg_dict()[key + "c"].SyncCopyFromCPU(zeros); - exe->arg_dict()[key + "h"].SyncCopyFromCPU(zeros); - } - NDArray::WaitAll(); - - exe->Forward(true); - exe->Backward(); - for (size_t i = 0; i < exe->arg_arrays.size(); ++i) { - opt->Update(i, exe->arg_arrays[i], exe->grad_arrays[i]); - } - - NDArray::WaitAll(); - } - auto toc = std::chrono::system_clock::now(); - std::cout << "Epoch[" << epoch << "] Time Cost:" << - std::chrono::duration_cast< std::chrono::seconds>(toc - tic).count() << " seconds "; - OutputPerplexity(&exe->arg_dict()["softmax_label"], &exe->outputs[0]); - std::string filepath = prefix + "-" + std::to_string(epoch) + ".params"; - SaveCheckpoint(filepath, RNN, exe); - } - - delete exe; - delete opt; -} - -/*The original example, rnn_cell_demo.py, uses default Xavier as initalizer, which relies on - * variable name, cannot initialize LSTM_parameters. Thus it was renamed to LSTM_bias, - * which can be initialized as zero. But it cannot converge after 100 epochs in this corpus - * example. Using RNNXavier, after 15 oscillating epochs, it rapidly converges like old - * LSTMUnroll version. */ -class RNNXavier : public Xavier { - public: - RNNXavier(RandType rand_type = gaussian, FactorType factor_type = avg, - float magnitude = 3) : Xavier(rand_type, factor_type, magnitude) { - } - virtual ~RNNXavier() {} - protected: - virtual void InitDefault(NDArray* arr) { - Xavier::InitWeight(arr); - } -}; - -void trainWithBuiltInRNNOp(const std::string file, int batch_size, int max_epoch, int start_epoch) { - Context device(DeviceType::kGPU, 0); - BucketSentenceIter dataIter(file, batch_size, device); - std::string prefix = file.substr(0, file.rfind(".")); - dataIter.saveCharIndices(prefix + ".dictionary"); - - input_dim = static_cast(dataIter.characterSize()); - sequence_length_max = dataIter.maxSequenceLength(); - - auto RNN = LSTMWithBuiltInRNNOp(num_lstm_layer, sequence_length_max, input_dim, num_hidden, - num_embed, dropout); - std::map args_map; - args_map["data"] = NDArray(Shape(batch_size, sequence_length_max), device, false); - // Avoiding SwapAxis, batch_size is of second dimension. - args_map["LSTM_init_c"] = NDArray(Shape(num_lstm_layer, batch_size, num_hidden), device, false); - args_map["LSTM_init_h"] = NDArray(Shape(num_lstm_layer, batch_size, num_hidden), device, false); - args_map["softmax_label"] = NDArray(Shape(batch_size, sequence_length_max), device, false); - std::vector zeros(batch_size * num_lstm_layer * num_hidden, 0); - Executor* exe = RNN.SimpleBind(device, args_map); - - if (start_epoch == -1) { - RNNXavier xavier = RNNXavier(Xavier::gaussian, Xavier::in, 2.34); - for (auto &arg : exe->arg_dict()) - xavier(arg.first, &arg.second); - } else { - LoadCheckpoint(prefix + "-" + std::to_string(start_epoch) + ".params", exe); - } - start_epoch++; - - Optimizer* opt = OptimizerRegistry::Find("sgd"); -// opt->SetParam("momentum", 0.9)->SetParam("rescale_grad", 1.0 / batch_size) -// ->SetParam("clip_gradient", 10); - - for (int epoch = start_epoch; epoch < max_epoch; ++epoch) { - dataIter.Reset(); - auto tic = std::chrono::system_clock::now(); - while (dataIter.Next()) { - auto data_batch = dataIter.GetDataBatch(); - data_batch.data.CopyTo(&exe->arg_dict()["data"]); - data_batch.label.CopyTo(&exe->arg_dict()["softmax_label"]); - exe->arg_dict()["LSTM_init_c"].SyncCopyFromCPU(zeros); - exe->arg_dict()["LSTM_init_h"].SyncCopyFromCPU(zeros); - NDArray::WaitAll(); - - exe->Forward(true); - exe->Backward(); - for (size_t i = 0; i < exe->arg_arrays.size(); ++i) { - opt->Update(i, exe->arg_arrays[i], exe->grad_arrays[i]); - } - NDArray::WaitAll(); - } - auto toc = std::chrono::system_clock::now(); - std::cout << "Epoch[" << epoch << "] Time Cost:" << - std::chrono::duration_cast< std::chrono::seconds>(toc - tic).count() << " seconds "; - OutputPerplexity(&exe->arg_dict()["softmax_label"], &exe->outputs[0]); - std::string filepath = prefix + "-" + std::to_string(epoch) + ".params"; - SaveCheckpoint(filepath, RNN, exe); - } - - delete exe; - delete opt; -} - -void predict(std::wstring* ptext, int sequence_length, const std::string param_file, - const std::string dictionary_file) { - Context device(DeviceType::kGPU, 0); - auto results = BucketSentenceIter::loadCharIndices(dictionary_file); - auto dictionary = std::get<0>(results); - auto charIndices = std::get<1>(results); - input_dim = static_cast(charIndices.size()); - auto RNN = LSTMUnroll(num_lstm_layer, 1, input_dim, num_hidden, num_embed, 0); - - std::map args_map; - args_map["data"] = NDArray(Shape(1, 1), device, false); - args_map["softmax_label"] = NDArray(Shape(1, 1), device, false); - std::vector zeros(1 * num_hidden, 0); - for (int l = 0; l < num_lstm_layer; l++) { - std::string key = "l" + std::to_string(l) + "_init_"; - args_map[key + "c"] = NDArray(Shape(1, num_hidden), device, false); - args_map[key + "h"] = NDArray(Shape(1, num_hidden), device, false); - args_map[key + "c"].SyncCopyFromCPU(zeros); - args_map[key + "h"].SyncCopyFromCPU(zeros); - } - Executor* exe = RNN.SimpleBind(device, args_map); - LoadCheckpoint(param_file, exe); - - mx_float index; - wchar_t next = 0; - std::vector softmax; - softmax.resize(input_dim); - for (auto c : *ptext) { - exe->arg_dict()["data"].SyncCopyFromCPU(&dictionary[c], 1); - exe->Forward(false); - - exe->outputs[0].SyncCopyToCPU(softmax.data(), input_dim); - for (int l = 0; l < num_lstm_layer; l++) { - std::string key = "l" + std::to_string(l) + "_init_"; - exe->outputs[l * 2 + 1].CopyTo(&args_map[key + "c"]); - exe->outputs[l * 2 + 2].CopyTo(&args_map[key + "h"]); - } - - size_t n = max_element(softmax.begin(), softmax.end()) - softmax.begin(); - index = (mx_float) n; - next = charIndices[n]; - } - ptext->push_back(next); - - for (int i = 0; i < sequence_length; i++) { - exe->arg_dict()["data"].SyncCopyFromCPU(&index, 1); - exe->Forward(false); - - exe->outputs[0].SyncCopyToCPU(softmax.data(), input_dim); - for (int l = 0; l < num_lstm_layer; l++) { - std::string key = "l" + std::to_string(l) + "_init_"; - exe->outputs[l * 2 + 1].CopyTo(&args_map[key + "c"]); - exe->outputs[l * 2 + 2].CopyTo(&args_map[key + "h"]); - } - - size_t n = max_element(softmax.begin(), softmax.end()) - softmax.begin(); - index = (mx_float) n; - next = charIndices[n]; - ptext->push_back(next); - } - - delete exe; -} - -void predictWithBuiltInRNNOp(std::wstring* ptext, int sequence_length, const std::string param_file, - const std::string dictionary_file) { - Context device(DeviceType::kGPU, 0); - auto results = BucketSentenceIter::loadCharIndices(dictionary_file); - auto dictionary = std::get<0>(results); - auto charIndices = std::get<1>(results); - input_dim = static_cast(charIndices.size()); - auto RNN = LSTMWithBuiltInRNNOp(num_lstm_layer, 1, input_dim, num_hidden, num_embed, 0); - - std::map args_map; - args_map["data"] = NDArray(Shape(1, 1), device, false); - args_map["softmax_label"] = NDArray(Shape(1, 1), device, false); - std::vector zeros(1 * num_lstm_layer * num_hidden, 0); - // Avoiding SwapAxis, batch_size=1 is of second dimension. - args_map["LSTM_init_c"] = NDArray(Shape(num_lstm_layer, 1, num_hidden), device, false); - args_map["LSTM_init_h"] = NDArray(Shape(num_lstm_layer, 1, num_hidden), device, false); - args_map["LSTM_init_c"].SyncCopyFromCPU(zeros); - args_map["LSTM_init_h"].SyncCopyFromCPU(zeros); - Executor* exe = RNN.SimpleBind(device, args_map); - LoadCheckpoint(param_file, exe); - - mx_float index; - wchar_t next = 0; - std::vector softmax; - softmax.resize(input_dim); - for (auto c : *ptext) { - exe->arg_dict()["data"].SyncCopyFromCPU(&dictionary[c], 1); - exe->Forward(false); - - exe->outputs[0].SyncCopyToCPU(softmax.data(), input_dim); - exe->outputs[1].CopyTo(&args_map["LSTM_init_h"]); - exe->outputs[2].CopyTo(&args_map["LSTM_init_c"]); - - size_t n = max_element(softmax.begin(), softmax.end()) - softmax.begin(); - index = (mx_float) n; - next = charIndices[n]; - } - ptext->push_back(next); - - for (int i = 0; i < sequence_length; i++) { - exe->arg_dict()["data"].SyncCopyFromCPU(&index, 1); - exe->Forward(false); - - exe->outputs[0].SyncCopyToCPU(softmax.data(), input_dim); - exe->outputs[1].CopyTo(&args_map["LSTM_init_h"]); - exe->outputs[2].CopyTo(&args_map["LSTM_init_c"]); - - size_t n = max_element(softmax.begin(), softmax.end()) - softmax.begin(); - index = (mx_float) n; - next = charIndices[n]; - ptext->push_back(next); - } - - delete exe; -} - -int main(int argc, char** argv) { - if (argc < 5) { - std::cout << "Usage for training: charRNN train[BuiltIn][TimeMajor] {corpus file}" - " {batch size} {max epoch} [{starting epoch}]" << std::endl; - std::cout <<"Usage for prediction: charRNN predict[BuiltIn][TimeMajor] {params file}" - " {dictionary file} {beginning of text}" << std::endl; - std::cout <<"Note: The {params file} of train/trainBuiltIn/trainTimeMajor/trainBuiltInTimeMajor" - " are not compatible with each other." << std::endl; - return 0; - } - - std::string task = argv[1]; - bool builtIn = task.find("BuiltIn") != std::string::npos; - TIME_MAJOR = task.find("TimeMajor") != std::string::npos; - std::cout << "use BuiltIn cuDNN RNN: " << builtIn << std::endl - << "use data as TimeMajor: " << TIME_MAJOR << std::endl; - TRY - if (task.find("train") == 0) { - std::cout << "train batch size: " << argv[3] << std::endl - << "train max epoch: " << argv[4] << std::endl; - int start_epoch = argc > 5? atoi(argv[5]) : -1; - // this function will generate dictionary file and params file. - if (builtIn) - trainWithBuiltInRNNOp(argv[2], atoi(argv[3]), atoi(argv[4]), start_epoch); - else - train(argv[2], atoi(argv[3]), atoi(argv[4]), start_epoch); // ditto - } else if (task.find("predict") == 0) { - std::wstring text; // = L"If there is anyone out there who still doubts "; - // Considering of extending to Chinese samples in future, use wchar_t instead of char - for (char c : std::string(argv[4])) - text.push_back((wchar_t) c); - /*Python version predicts text default to random selecltions. Here I didn't write the random - code, always choose the 'best' character. So the text length reduced to 600. Longer size often - leads to repeated sentances, since training sequence length is only 129 for obama corpus.*/ - if (builtIn) - predictWithBuiltInRNNOp(&text, 600, argv[2], argv[3]); - else - predict(&text, 600, argv[2], argv[3]); - std::wcout << text << std::endl; - } - - MXNotifyShutdown(); - CATCH - return 0; -} diff --git a/cpp-package/example/googlenet.cpp b/cpp-package/example/googlenet.cpp deleted file mode 100644 index 7b51f4fde3a7..000000000000 --- a/cpp-package/example/googlenet.cpp +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - */ -#include -#include -#include -#include -#include "utils.h" -#include "mxnet-cpp/MxNetCpp.h" - -using namespace mxnet::cpp; - -Symbol ConvFactory(Symbol data, int num_filter, - Shape kernel, - Shape stride = Shape(1, 1), - Shape pad = Shape(0, 0), - const std::string & name = "", - const std::string & suffix = "") { - Symbol conv_w("conv_" + name + suffix + "_w"), conv_b("conv_" + name + suffix + "_b"); - - Symbol conv = Convolution("conv_" + name + suffix, data, - conv_w, conv_b, kernel, - num_filter, stride, Shape(1, 1), pad); - return Activation("relu_" + name + suffix, conv, "relu"); -} - -Symbol InceptionFactory(Symbol data, int num_1x1, int num_3x3red, - int num_3x3, int num_d5x5red, int num_d5x5, - PoolingPoolType pool, int proj, const std::string & name) { - Symbol c1x1 = ConvFactory(data, num_1x1, Shape(1, 1), - Shape(1, 1), Shape(0, 0), name + "_1x1"); - - Symbol c3x3r = ConvFactory(data, num_3x3red, Shape(1, 1), - Shape(1, 1), Shape(0, 0), name + "_3x3", "_reduce"); - - Symbol c3x3 = ConvFactory(c3x3r, num_3x3, Shape(3, 3), - Shape(1, 1), Shape(1, 1), name + "_3x3"); - - Symbol cd5x5r = ConvFactory(data, num_d5x5red, Shape(1, 1), - Shape(1, 1), Shape(0, 0), name + "_5x5", "_reduce"); - - Symbol cd5x5 = ConvFactory(cd5x5r, num_d5x5, Shape(5, 5), - Shape(1, 1), Shape(2, 2), name + "_5x5"); - - Symbol pooling = Pooling(name + "_pool", data, Shape(3, 3), pool, - false, false, PoolingPoolingConvention::kValid, - Shape(1, 1), Shape(1, 1)); - - Symbol cproj = ConvFactory(pooling, proj, Shape(1, 1), - Shape(1, 1), Shape(0, 0), name + "_proj"); - - std::vector lst; - lst.push_back(c1x1); - lst.push_back(c3x3); - lst.push_back(cd5x5); - lst.push_back(cproj); - return Concat("ch_concat_" + name + "_chconcat", lst, lst.size()); -} - -Symbol GoogleNetSymbol(int num_classes) { - // data and label - Symbol data = Symbol::Variable("data"); - Symbol data_label = Symbol::Variable("data_label"); - - Symbol conv1 = ConvFactory(data, 64, Shape(7, 7), Shape(2, 2), Shape(3, 3), "conv1"); - Symbol pool1 = Pooling("pool1", conv1, Shape(3, 3), PoolingPoolType::kMax, - false, false, PoolingPoolingConvention::kValid, Shape(2, 2)); - Symbol conv2 = ConvFactory(pool1, 64, Shape(1, 1), Shape(1, 1), - Shape(0, 0), "conv2"); - Symbol conv3 = ConvFactory(conv2, 192, Shape(3, 3), Shape(1, 1), Shape(1, 1), "conv3"); - Symbol pool3 = Pooling("pool3", conv3, Shape(3, 3), PoolingPoolType::kMax, - false, false, PoolingPoolingConvention::kValid, Shape(2, 2)); - - Symbol in3a = InceptionFactory(pool3, 64, 96, 128, 16, 32, PoolingPoolType::kMax, 32, "in3a"); - Symbol in3b = InceptionFactory(in3a, 128, 128, 192, 32, 96, PoolingPoolType::kMax, 64, "in3b"); - Symbol pool4 = Pooling("pool4", in3b, Shape(3, 3), PoolingPoolType::kMax, - false, false, PoolingPoolingConvention::kValid, Shape(2, 2)); - Symbol in4a = InceptionFactory(pool4, 192, 96, 208, 16, 48, PoolingPoolType::kMax, 64, "in4a"); - Symbol in4b = InceptionFactory(in4a, 160, 112, 224, 24, 64, PoolingPoolType::kMax, 64, "in4b"); - Symbol in4c = InceptionFactory(in4b, 128, 128, 256, 24, 64, PoolingPoolType::kMax, 64, "in4c"); - Symbol in4d = InceptionFactory(in4c, 112, 144, 288, 32, 64, PoolingPoolType::kMax, 64, "in4d"); - Symbol in4e = InceptionFactory(in4d, 256, 160, 320, 32, 128, PoolingPoolType::kMax, 128, "in4e"); - Symbol pool5 = Pooling("pool5", in4e, Shape(3, 3), PoolingPoolType::kMax, - false, false, PoolingPoolingConvention::kValid, Shape(2, 2)); - Symbol in5a = InceptionFactory(pool5, 256, 160, 320, 32, 128, PoolingPoolType::kMax, 128, "in5a"); - Symbol in5b = InceptionFactory(in5a, 384, 192, 384, 48, 128, PoolingPoolType::kMax, 128, "in5b"); - Symbol pool6 = Pooling("pool6", in5b, Shape(7, 7), PoolingPoolType::kAvg, - false, false, PoolingPoolingConvention::kValid, Shape(1, 1)); - - Symbol flatten = Flatten("flatten", pool6); - - Symbol fc1_w("fc1_w"), fc1_b("fc1_b"); - Symbol fc1 = FullyConnected("fc1", flatten, fc1_w, fc1_b, num_classes); - - return SoftmaxOutput("softmax", fc1, data_label); -} - -int main(int argc, char const *argv[]) { - int batch_size = 50; - int max_epoch = argc > 1 ? strtol(argv[1], nullptr, 10) : 100; - float learning_rate = 1e-4; - float weight_decay = 1e-4; - - auto ctx = Context::gpu(); -#if MXNET_USE_CPU - ctx = Context::cpu();; -#endif - - TRY - auto googlenet = GoogleNetSymbol(10); - std::map args_map; - std::map aux_map; - - args_map["data"] = NDArray(Shape(batch_size, 3, 256, 256), ctx); - args_map["data_label"] = NDArray(Shape(batch_size), ctx); - googlenet.InferArgsMap(ctx, &args_map, args_map); - - std::vector data_files = { "./data/mnist_data/train-images-idx3-ubyte", - "./data/mnist_data/train-labels-idx1-ubyte", - "./data/mnist_data/t10k-images-idx3-ubyte", - "./data/mnist_data/t10k-labels-idx1-ubyte" - }; - - auto train_iter = MXDataIter("MNISTIter"); - if (!setDataIter(&train_iter, "Train", data_files, batch_size)) { - return 1; - } - - auto val_iter = MXDataIter("MNISTIter"); - if (!setDataIter(&val_iter, "Label", data_files, batch_size)) { - return 1; - } - - Optimizer* opt = OptimizerRegistry::Find("sgd"); - opt->SetParam("momentum", 0.9) - ->SetParam("rescale_grad", 1.0 / batch_size) - ->SetParam("clip_gradient", 10) - ->SetParam("lr", learning_rate) - ->SetParam("wd", weight_decay); - - - auto *exec = googlenet.SimpleBind(ctx, args_map); - auto arg_names = googlenet.ListArguments(); - - for (int iter = 0; iter < max_epoch; ++iter) { - LG << "Epoch: " << iter; - train_iter.Reset(); - while (train_iter.Next()) { - auto data_batch = train_iter.GetDataBatch(); - data_batch.data.CopyTo(&args_map["data"]); - data_batch.label.CopyTo(&args_map["data_label"]); - NDArray::WaitAll(); - exec->Forward(true); - exec->Backward(); - for (size_t i = 0; i < arg_names.size(); ++i) { - if (arg_names[i] == "data" || arg_names[i] == "data_label") continue; - opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]); - } - } - - Accuracy acu; - val_iter.Reset(); - while (val_iter.Next()) { - auto data_batch = val_iter.GetDataBatch(); - data_batch.data.CopyTo(&args_map["data"]); - data_batch.label.CopyTo(&args_map["data_label"]); - NDArray::WaitAll(); - exec->Forward(false); - NDArray::WaitAll(); - acu.Update(data_batch.label, exec->outputs[0]); - } - LG << "Accuracy: " << acu.Get(); - } - - delete exec; - delete opt; - MXNotifyShutdown(); - CATCH - return 0; -} diff --git a/cpp-package/example/inception_bn.cpp b/cpp-package/example/inception_bn.cpp deleted file mode 100644 index 8fe6b070497c..000000000000 --- a/cpp-package/example/inception_bn.cpp +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - */ -#include -#include -#include -#include -#include "utils.h" -#include "mxnet-cpp/MxNetCpp.h" - -using namespace mxnet::cpp; - -Symbol ConvFactoryBN(Symbol data, int num_filter, - Shape kernel, Shape stride, Shape pad, - const std::string & name, - const std::string & suffix = "") { - Symbol conv_w("conv_" + name + suffix + "_w"), conv_b("conv_" + name + suffix + "_b"); - - Symbol conv = Convolution("conv_" + name + suffix, data, - conv_w, conv_b, kernel, - num_filter, stride, Shape(1, 1), pad); - std::string name_suffix = name + suffix; - Symbol gamma(name_suffix + "_gamma"); - Symbol beta(name_suffix + "_beta"); - Symbol mmean(name_suffix + "_mmean"); - Symbol mvar(name_suffix + "_mvar"); - Symbol bn = BatchNorm("bn_" + name + suffix, conv, gamma, beta, mmean, mvar); - return Activation("relu_" + name + suffix, bn, "relu"); -} - -Symbol InceptionFactoryA(Symbol data, int num_1x1, int num_3x3red, - int num_3x3, int num_d3x3red, int num_d3x3, - PoolingPoolType pool, int proj, - const std::string & name) { - Symbol c1x1 = ConvFactoryBN(data, num_1x1, Shape(1, 1), Shape(1, 1), - Shape(0, 0), name + "1x1"); - Symbol c3x3r = ConvFactoryBN(data, num_3x3red, Shape(1, 1), Shape(1, 1), - Shape(0, 0), name + "_3x3r"); - Symbol c3x3 = ConvFactoryBN(c3x3r, num_3x3, Shape(3, 3), Shape(1, 1), - Shape(1, 1), name + "_3x3"); - Symbol cd3x3r = ConvFactoryBN(data, num_d3x3red, Shape(1, 1), Shape(1, 1), - Shape(0, 0), name + "_double_3x3", "_reduce"); - Symbol cd3x3 = ConvFactoryBN(cd3x3r, num_d3x3, Shape(3, 3), Shape(1, 1), - Shape(1, 1), name + "_double_3x3_0"); - cd3x3 = ConvFactoryBN(data = cd3x3, num_d3x3, Shape(3, 3), Shape(1, 1), - Shape(1, 1), name + "_double_3x3_1"); - Symbol pooling = Pooling(name + "_pool", data, - Shape(3, 3), pool, false, false, - PoolingPoolingConvention::kValid, - Shape(1, 1), Shape(1, 1)); - Symbol cproj = ConvFactoryBN(pooling, proj, Shape(1, 1), Shape(1, 1), - Shape(0, 0), name + "_proj"); - std::vector lst; - lst.push_back(c1x1); - lst.push_back(c3x3); - lst.push_back(cd3x3); - lst.push_back(cproj); - return Concat("ch_concat_" + name + "_chconcat", lst, lst.size()); -} - -Symbol InceptionFactoryB(Symbol data, int num_3x3red, int num_3x3, - int num_d3x3red, int num_d3x3, const std::string & name) { - Symbol c3x3r = ConvFactoryBN(data, num_3x3red, Shape(1, 1), - Shape(1, 1), Shape(0, 0), - name + "_3x3", "_reduce"); - Symbol c3x3 = ConvFactoryBN(c3x3r, num_3x3, Shape(3, 3), Shape(2, 2), - Shape(1, 1), name + "_3x3"); - Symbol cd3x3r = ConvFactoryBN(data, num_d3x3red, Shape(1, 1), Shape(1, 1), - Shape(0, 0), name + "_double_3x3", "_reduce"); - Symbol cd3x3 = ConvFactoryBN(cd3x3r, num_d3x3, Shape(3, 3), Shape(1, 1), - Shape(1, 1), name + "_double_3x3_0"); - cd3x3 = ConvFactoryBN(cd3x3, num_d3x3, Shape(3, 3), Shape(2, 2), - Shape(1, 1), name + "_double_3x3_1"); - Symbol pooling = Pooling("max_pool_" + name + "_pool", data, - Shape(3, 3), PoolingPoolType::kMax, - false, false, PoolingPoolingConvention::kValid, - Shape(2, 2), Shape(1, 1)); - std::vector lst; - lst.push_back(c3x3); - lst.push_back(cd3x3); - lst.push_back(pooling); - return Concat("ch_concat_" + name + "_chconcat", lst, lst.size()); -} - -Symbol InceptionSymbol(int num_classes) { - // data and label - Symbol data = Symbol::Variable("data"); - Symbol data_label = Symbol::Variable("data_label"); - - // stage 1 - Symbol conv1 = ConvFactoryBN(data, 64, Shape(7, 7), Shape(2, 2), Shape(3, 3), "conv1"); - Symbol pool1 = Pooling("pool1", conv1, Shape(3, 3), PoolingPoolType::kMax, - false, false, PoolingPoolingConvention::kValid, Shape(2, 2)); - - // stage 2 - Symbol conv2red = ConvFactoryBN(pool1, 64, Shape(1, 1), Shape(1, 1), Shape(0, 0), "conv2red"); - Symbol conv2 = ConvFactoryBN(conv2red, 192, Shape(3, 3), Shape(1, 1), Shape(1, 1), "conv2"); - Symbol pool2 = Pooling("pool2", conv2, Shape(3, 3), PoolingPoolType::kMax, - false, false, PoolingPoolingConvention::kValid, Shape(2, 2)); - - // stage 3 - Symbol in3a = InceptionFactoryA(pool2, 64, 64, 64, 64, 96, PoolingPoolType::kAvg, 32, "3a"); - Symbol in3b = InceptionFactoryA(in3a, 64, 64, 96, 64, 96, PoolingPoolType::kAvg, 64, "3b"); - Symbol in3c = InceptionFactoryB(in3b, 128, 160, 64, 96, "3c"); - - // stage 4 - Symbol in4a = InceptionFactoryA(in3c, 224, 64, 96, 96, 128, PoolingPoolType::kAvg, 128, "4a"); - Symbol in4b = InceptionFactoryA(in4a, 192, 96, 128, 96, 128, PoolingPoolType::kAvg, 128, "4b"); - Symbol in4c = InceptionFactoryA(in4b, 160, 128, 160, 128, 160, PoolingPoolType::kAvg, 128, "4c"); - Symbol in4d = InceptionFactoryA(in4c, 96, 128, 192, 160, 192, PoolingPoolType::kAvg, 128, "4d"); - Symbol in4e = InceptionFactoryB(in4d, 128, 192, 192, 256, "4e"); - - // stage 5 - Symbol in5a = InceptionFactoryA(in4e, 352, 192, 320, 160, 224, PoolingPoolType::kAvg, 128, "5a"); - Symbol in5b = InceptionFactoryA(in5a, 352, 192, 320, 192, 224, PoolingPoolType::kMax, 128, "5b"); - - // average pooling - Symbol avg = Pooling("global_pool", in5b, Shape(7, 7), PoolingPoolType::kAvg); - - // classifier - Symbol flatten = Flatten("flatten", avg); - Symbol conv1_w("conv1_w"), conv1_b("conv1_b"); - Symbol fc1 = FullyConnected("fc1", flatten, conv1_w, conv1_b, num_classes); - return SoftmaxOutput("softmax", fc1, data_label); -} - -NDArray ResizeInput(NDArray data, const Shape new_shape) { - NDArray pic = data.Reshape(Shape(0, 1, 28, 28)); - NDArray pic_1channel; - Operator("_contrib_BilinearResize2D") - .SetParam("height", new_shape[2]) - .SetParam("width", new_shape[3]) - (pic).Invoke(pic_1channel); - NDArray output; - Operator("tile") - .SetParam("reps", Shape(1, 3, 1, 1)) - (pic_1channel).Invoke(output); - return output; -} - -int main(int argc, char const *argv[]) { - int batch_size = 40; - int max_epoch = argc > 1 ? strtol(argv[1], nullptr, 10) : 100; - float learning_rate = 1e-2; - float weight_decay = 1e-4; - - /*context*/ - auto ctx = Context::cpu(); - int num_gpu; - MXGetGPUCount(&num_gpu); -#if !MXNET_USE_CPU - if (num_gpu > 0) { - ctx = Context::gpu(); - } -#endif - - TRY - auto inception_bn_net = InceptionSymbol(10); - std::map args_map; - std::map aux_map; - - const Shape data_shape = Shape(batch_size, 3, 224, 224), - label_shape = Shape(batch_size); - args_map["data"] = NDArray(data_shape, ctx); - args_map["data_label"] = NDArray(label_shape, ctx); - inception_bn_net.InferArgsMap(ctx, &args_map, args_map); - - std::vector data_files = { "./data/mnist_data/train-images-idx3-ubyte", - "./data/mnist_data/train-labels-idx1-ubyte", - "./data/mnist_data/t10k-images-idx3-ubyte", - "./data/mnist_data/t10k-labels-idx1-ubyte" - }; - - auto train_iter = MXDataIter("MNISTIter"); - if (!setDataIter(&train_iter, "Train", data_files, batch_size)) { - return 1; - } - - auto val_iter = MXDataIter("MNISTIter"); - if (!setDataIter(&val_iter, "Label", data_files, batch_size)) { - return 1; - } - - // initialize parameters - Xavier xavier = Xavier(Xavier::gaussian, Xavier::in, 2); - for (auto &arg : args_map) { - xavier(arg.first, &arg.second); - } - - Optimizer* opt = OptimizerRegistry::Find("sgd"); - opt->SetParam("momentum", 0.9) - ->SetParam("rescale_grad", 1.0 / batch_size) - ->SetParam("clip_gradient", 10) - ->SetParam("lr", learning_rate) - ->SetParam("wd", weight_decay); - - auto *exec = inception_bn_net.SimpleBind(ctx, args_map); - auto arg_names = inception_bn_net.ListArguments(); - - // Create metrics - Accuracy train_acc, val_acc; - for (int iter = 0; iter < max_epoch; ++iter) { - LG << "Epoch: " << iter; - train_iter.Reset(); - train_acc.Reset(); - while (train_iter.Next()) { - auto data_batch = train_iter.GetDataBatch(); - ResizeInput(data_batch.data, data_shape).CopyTo(&args_map["data"]); - data_batch.label.CopyTo(&args_map["data_label"]); - NDArray::WaitAll(); - - exec->Forward(true); - exec->Backward(); - // Update parameters - for (size_t i = 0; i < arg_names.size(); ++i) { - if (arg_names[i] == "data" || arg_names[i] == "data_label") continue; - opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]); - } - - NDArray::WaitAll(); - train_acc.Update(data_batch.label, exec->outputs[0]); - } - - val_iter.Reset(); - val_acc.Reset(); - while (val_iter.Next()) { - auto data_batch = val_iter.GetDataBatch(); - ResizeInput(data_batch.data, data_shape).CopyTo(&args_map["data"]); - data_batch.label.CopyTo(&args_map["data_label"]); - NDArray::WaitAll(); - exec->Forward(false); - NDArray::WaitAll(); - val_acc.Update(data_batch.label, exec->outputs[0]); - } - LG << "Train Accuracy: " << train_acc.Get(); - LG << "Validation Accuracy: " << val_acc.Get(); - } - delete exec; - delete opt; - MXNotifyShutdown(); - CATCH - return 0; -} diff --git a/cpp-package/example/lenet.cpp b/cpp-package/example/lenet.cpp deleted file mode 100644 index 3e34dbb486ab..000000000000 --- a/cpp-package/example/lenet.cpp +++ /dev/null @@ -1,267 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - */ -#include -#include -#include -#include -#include -#include "mxnet-cpp/MxNetCpp.h" -#include "utils.h" - -using namespace mxnet::cpp; - -class Lenet { - public: - Lenet() - : ctx_cpu(Context(DeviceType::kCPU, 0)), -#if MXNET_USE_CPU - ctx_dev(Context(DeviceType::kCPU, 0)) -#else - ctx_dev(Context(DeviceType::kGPU, 0)) -#endif - {} - - void Run(int max_epoch) { - /* - * LeCun, Yann, Leon Bottou, Yoshua Bengio, and Patrick Haffner. - * "Gradient-based learning applied to document recognition." - * Proceedings of the IEEE (1998) - * */ - - /*define the symbolic net*/ - Symbol data = Symbol::Variable("data"); - Symbol data_label = Symbol::Variable("data_label"); - Symbol conv1_w("conv1_w"), conv1_b("conv1_b"); - Symbol conv2_w("conv2_w"), conv2_b("conv2_b"); - Symbol conv3_w("conv3_w"), conv3_b("conv3_b"); - Symbol fc1_w("fc1_w"), fc1_b("fc1_b"); - Symbol fc2_w("fc2_w"), fc2_b("fc2_b"); - - Symbol conv1 = - Convolution("conv1", data, conv1_w, conv1_b, Shape(5, 5), 20); - Symbol tanh1 = Activation("tanh1", conv1, ActivationActType::kTanh); - Symbol pool1 = Pooling("pool1", tanh1, Shape(2, 2), PoolingPoolType::kMax, - false, false, PoolingPoolingConvention::kValid, Shape(2, 2)); - - Symbol conv2 = Convolution("conv2", pool1, conv2_w, conv2_b, - Shape(5, 5), 50); - Symbol tanh2 = Activation("tanh2", conv2, ActivationActType::kTanh); - Symbol pool2 = Pooling("pool2", tanh2, Shape(2, 2), PoolingPoolType::kMax, - false, false, PoolingPoolingConvention::kValid, Shape(2, 2)); - - Symbol conv3 = Convolution("conv3", pool2, conv3_w, conv3_b, - Shape(2, 2), 500); - Symbol tanh3 = Activation("tanh3", conv3, ActivationActType::kTanh); - Symbol pool3 = Pooling("pool3", tanh3, Shape(2, 2), PoolingPoolType::kMax, - false, false, PoolingPoolingConvention::kValid, Shape(1, 1)); - - Symbol flatten = Flatten("flatten", pool3); - Symbol fc1 = FullyConnected("fc1", flatten, fc1_w, fc1_b, 500); - Symbol tanh4 = Activation("tanh4", fc1, ActivationActType::kTanh); - Symbol fc2 = FullyConnected("fc2", tanh4, fc2_w, fc2_b, 10); - - Symbol lenet = SoftmaxOutput("softmax", fc2, data_label); - - for (auto s : lenet.ListArguments()) { - LG << s; - } - - /*setup basic configs*/ - int val_fold = 1; - int W = 28; - int H = 28; - int batch_size = 42; - float learning_rate = 1e-4; - float weight_decay = 1e-4; - - /*prepare the data*/ - std::vector data_vec, label_vec; - size_t data_count = GetData(&data_vec, &label_vec); - const float *dptr = data_vec.data(); - const float *lptr = label_vec.data(); - NDArray data_array = NDArray(Shape(data_count, 1, W, H), ctx_cpu, - false); // store in main memory, and copy to - // device memory while training - NDArray label_array = - NDArray(Shape(data_count), ctx_cpu, - false); // it's also ok if just store them all in device memory - data_array.SyncCopyFromCPU(dptr, data_count * W * H); - label_array.SyncCopyFromCPU(lptr, data_count); - data_array.WaitToRead(); - label_array.WaitToRead(); - - size_t train_num = data_count * (1 - val_fold / 10.0); - train_data = data_array.Slice(0, train_num); - train_label = label_array.Slice(0, train_num); - val_data = data_array.Slice(train_num, data_count); - val_label = label_array.Slice(train_num, data_count); - - LG << "here read fin"; - - /*init some of the args*/ - // map args_map; - args_map["data"] = data_array.Slice(0, batch_size).Copy(ctx_dev); - args_map["data_label"] = label_array.Slice(0, batch_size).Copy(ctx_dev); - NDArray::WaitAll(); - - LG << "here slice fin"; - /* - * we can also feed in some of the args other than the input all by - * ourselves, - * fc2-w , fc1-b for example: - * */ - // args_map["fc2_w"] = - // NDArray(mshadow::Shape2(500, 4 * 4 * 50), ctx_dev, false); - // NDArray::SampleGaussian(0, 1, &args_map["fc2_w"]); - // args_map["fc1_b"] = NDArray(mshadow::Shape1(10), ctx_dev, false); - // args_map["fc1_b"] = 0; - - lenet.InferArgsMap(ctx_dev, &args_map, args_map); - Optimizer* opt = OptimizerRegistry::Find("sgd"); - opt->SetParam("momentum", 0.9) - ->SetParam("rescale_grad", 1.0) - ->SetParam("clip_gradient", 10) - ->SetParam("lr", learning_rate) - ->SetParam("wd", weight_decay); - - Executor *exe = lenet.SimpleBind(ctx_dev, args_map); - auto arg_names = lenet.ListArguments(); - - for (int ITER = 0; ITER < max_epoch; ++ITER) { - size_t start_index = 0; - while (start_index < train_num) { - if (start_index + batch_size > train_num) { - start_index = train_num - batch_size; - } - args_map["data"] = - train_data.Slice(start_index, start_index + batch_size) - .Copy(ctx_dev); - args_map["data_label"] = - train_label.Slice(start_index, start_index + batch_size) - .Copy(ctx_dev); - start_index += batch_size; - NDArray::WaitAll(); - - exe->Forward(true); - exe->Backward(); - // Update parameters - for (size_t i = 0; i < arg_names.size(); ++i) { - if (arg_names[i] == "data" || arg_names[i] == "data_label") continue; - opt->Update(i, exe->arg_arrays[i], exe->grad_arrays[i]); - } - } - - LG << "Iter " << ITER - << ", accuracy: " << ValAccuracy(batch_size * 10, lenet); - } - delete exe; - delete opt; - } - - private: - Context ctx_cpu; - Context ctx_dev; - std::map args_map; - NDArray train_data; - NDArray train_label; - NDArray val_data; - NDArray val_label; - - size_t GetData(std::vector *data, std::vector *label) { - const char *train_data_path = "./data/mnist_data/mnist_train.csv"; - std::ifstream inf(train_data_path); - std::string line; - inf >> line; // ignore the header - size_t _N = 0; - while (inf >> line) { - for (auto &c : line) c = (c == ',') ? ' ' : c; - std::stringstream ss; - ss << line; - float _data; - ss >> _data; - label->push_back(_data); - while (ss >> _data) data->push_back(_data / 256.0); - _N++; - } - inf.close(); - return _N; - } - - float ValAccuracy(int batch_size, Symbol lenet) { - size_t val_num = val_data.GetShape()[0]; - - size_t correct_count = 0; - size_t all_count = 0; - - size_t start_index = 0; - while (start_index < val_num) { - if (start_index + batch_size > val_num) { - start_index = val_num - batch_size; - } - args_map["data"] = - val_data.Slice(start_index, start_index + batch_size).Copy(ctx_dev); - args_map["data_label"] = - val_label.Slice(start_index, start_index + batch_size).Copy(ctx_dev); - start_index += batch_size; - NDArray::WaitAll(); - - Executor *exe = lenet.SimpleBind(ctx_dev, args_map); - exe->Forward(false); - - const auto &out = exe->outputs; - NDArray out_cpu = out[0].Copy(ctx_cpu); - NDArray label_cpu = - val_label.Slice(start_index - batch_size, start_index).Copy(ctx_cpu); - - NDArray::WaitAll(); - - const mx_float *dptr_out = out_cpu.GetData(); - const mx_float *dptr_label = label_cpu.GetData(); - for (int i = 0; i < batch_size; ++i) { - float label = dptr_label[i]; - int cat_num = out_cpu.GetShape()[1]; - float p_label = 0, max_p = dptr_out[i * cat_num]; - for (int j = 0; j < cat_num; ++j) { - float p = dptr_out[i * cat_num + j]; - if (max_p < p) { - p_label = j; - max_p = p; - } - } - if (label == p_label) correct_count++; - } - all_count += batch_size; - - delete exe; - } - return correct_count * 1.0 / all_count; - } -}; - -int main(int argc, char const *argv[]) { - TRY - Lenet lenet; - lenet.Run(argc > 1 ? strtol(argv[1], nullptr, 10) : 100000); - MXNotifyShutdown(); - CATCH - return 0; -} diff --git a/cpp-package/example/lenet_with_mxdataiter.cpp b/cpp-package/example/lenet_with_mxdataiter.cpp deleted file mode 100644 index 6b37693cda59..000000000000 --- a/cpp-package/example/lenet_with_mxdataiter.cpp +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - */ -#include -#include -#include -#include -#include -#include -#include "utils.h" -#include "mxnet-cpp/MxNetCpp.h" - -using namespace mxnet::cpp; - -Symbol LenetSymbol() { - /* - * LeCun, Yann, Leon Bottou, Yoshua Bengio, and Patrick Haffner. - * "Gradient-based learning applied to document recognition." - * Proceedings of the IEEE (1998) - * */ - - /*define the symbolic net*/ - Symbol data = Symbol::Variable("data"); - Symbol data_label = Symbol::Variable("data_label"); - Symbol conv1_w("conv1_w"), conv1_b("conv1_b"); - Symbol conv2_w("conv2_w"), conv2_b("conv2_b"); - Symbol conv3_w("conv3_w"), conv3_b("conv3_b"); - Symbol fc1_w("fc1_w"), fc1_b("fc1_b"); - Symbol fc2_w("fc2_w"), fc2_b("fc2_b"); - - Symbol conv1 = Convolution("conv1", data, conv1_w, conv1_b, Shape(5, 5), 20); - Symbol tanh1 = Activation("tanh1", conv1, ActivationActType::kTanh); - Symbol pool1 = Pooling("pool1", tanh1, Shape(2, 2), PoolingPoolType::kMax, - false, false, PoolingPoolingConvention::kValid, Shape(2, 2)); - - Symbol conv2 = Convolution("conv2", pool1, conv2_w, conv2_b, Shape(5, 5), 50); - Symbol tanh2 = Activation("tanh2", conv2, ActivationActType::kTanh); - Symbol pool2 = Pooling("pool2", tanh2, Shape(2, 2), PoolingPoolType::kMax, - false, false, PoolingPoolingConvention::kValid, Shape(2, 2)); - - Symbol flatten = Flatten("flatten", pool2); - Symbol fc1 = FullyConnected("fc1", flatten, fc1_w, fc1_b, 500); - Symbol tanh3 = Activation("tanh3", fc1, ActivationActType::kTanh); - Symbol fc2 = FullyConnected("fc2", tanh3, fc2_w, fc2_b, 10); - - Symbol lenet = SoftmaxOutput("softmax", fc2, data_label); - - return lenet; -} - -NDArray ResizeInput(NDArray data, const Shape new_shape) { - NDArray pic = data.Reshape(Shape(0, 1, 28, 28)); - NDArray output; - Operator("_contrib_BilinearResize2D") - .SetParam("height", new_shape[2]) - .SetParam("width", new_shape[3]) - (pic).Invoke(output); - return output; -} - -int main(int argc, char const *argv[]) { - /*setup basic configs*/ - int W = 28; - int H = 28; - int batch_size = 128; - int max_epoch = argc > 1 ? strtol(argv[1], nullptr, 10) : 100; - float learning_rate = 1e-4; - float weight_decay = 1e-4; - - auto dev_ctx = Context::cpu(); - int num_gpu; - MXGetGPUCount(&num_gpu); -#if !MXNET_USE_CPU - if (num_gpu > 0) { - dev_ctx = Context::gpu(); - } -#endif - - TRY - auto lenet = LenetSymbol(); - std::map args_map; - - const Shape data_shape = Shape(batch_size, 1, H, W), - label_shape = Shape(batch_size); - args_map["data"] = NDArray(data_shape, dev_ctx); - args_map["data_label"] = NDArray(label_shape, dev_ctx); - lenet.InferArgsMap(dev_ctx, &args_map, args_map); - - args_map["fc1_w"] = NDArray(Shape(500, 4 * 4 * 50), dev_ctx); - NDArray::SampleGaussian(0, 1, &args_map["fc1_w"]); - args_map["fc2_b"] = NDArray(Shape(10), dev_ctx); - args_map["fc2_b"] = 0; - - std::vector data_files = { "./data/mnist_data/train-images-idx3-ubyte", - "./data/mnist_data/train-labels-idx1-ubyte", - "./data/mnist_data/t10k-images-idx3-ubyte", - "./data/mnist_data/t10k-labels-idx1-ubyte" - }; - - auto train_iter = MXDataIter("MNISTIter"); - if (!setDataIter(&train_iter, "Train", data_files, batch_size)) { - return 1; - } - - auto val_iter = MXDataIter("MNISTIter"); - if (!setDataIter(&val_iter, "Label", data_files, batch_size)) { - return 1; - } - - Optimizer* opt = OptimizerRegistry::Find("sgd"); - opt->SetParam("momentum", 0.9) - ->SetParam("rescale_grad", 1.0) - ->SetParam("clip_gradient", 10) - ->SetParam("lr", learning_rate) - ->SetParam("wd", weight_decay); - - - auto *exec = lenet.SimpleBind(dev_ctx, args_map); - auto arg_names = lenet.ListArguments(); - - // Create metrics - Accuracy train_acc, val_acc; - - for (int iter = 0; iter < max_epoch; ++iter) { - int samples = 0; - train_iter.Reset(); - train_acc.Reset(); - - auto tic = std::chrono::system_clock::now(); - - while (train_iter.Next()) { - samples += batch_size; - auto data_batch = train_iter.GetDataBatch(); - - ResizeInput(data_batch.data, data_shape).CopyTo(&args_map["data"]); - data_batch.label.CopyTo(&args_map["data_label"]); - NDArray::WaitAll(); - - // Compute gradients - exec->Forward(true); - exec->Backward(); - - // Update parameters - for (size_t i = 0; i < arg_names.size(); ++i) { - if (arg_names[i] == "data" || arg_names[i] == "data_label") continue; - opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]); - } - - // Update metric - train_acc.Update(data_batch.label, exec->outputs[0]); - } - - // one epoch of training is finished - auto toc = std::chrono::system_clock::now(); - float duration = std::chrono::duration_cast - (toc - tic).count() / 1000.0; - LG << "Epoch[" << iter << "] " << samples / duration \ - << " samples/sec " << "Train-Accuracy=" << train_acc.Get();; - - val_iter.Reset(); - val_acc.Reset(); - - Accuracy acu; - val_iter.Reset(); - while (val_iter.Next()) { - auto data_batch = val_iter.GetDataBatch(); - ResizeInput(data_batch.data, data_shape).CopyTo(&args_map["data"]); - data_batch.label.CopyTo(&args_map["data_label"]); - NDArray::WaitAll(); - - // Only forward pass is enough as no gradient is needed when evaluating - exec->Forward(false); - NDArray::WaitAll(); - acu.Update(data_batch.label, exec->outputs[0]); - val_acc.Update(data_batch.label, exec->outputs[0]); - } - LG << "Epoch[" << iter << "] Val-Accuracy=" << val_acc.Get(); - } - - delete exec; - delete opt; - MXNotifyShutdown(); - CATCH - return 0; -} diff --git a/cpp-package/example/mlp.cpp b/cpp-package/example/mlp.cpp deleted file mode 100644 index 970dad74e727..000000000000 --- a/cpp-package/example/mlp.cpp +++ /dev/null @@ -1,182 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - */ - -#include -#include -#include -#include "mxnet-cpp/MxNetCpp.h" -#include "utils.h" - -using namespace mxnet::cpp; - -/* - * In this example, - * we make by hand some data in 10 classes with some pattern - * and try to use MLP to recognize the pattern. - */ - -void OutputAccuracy(mx_float* pred, mx_float* target) { - int right = 0; - for (int i = 0; i < 128; ++i) { - float mx_p = pred[i * 10 + 0]; - float p_y = 0; - for (int j = 0; j < 10; ++j) { - if (pred[i * 10 + j] > mx_p) { - mx_p = pred[i * 10 + j]; - p_y = j; - } - } - if (p_y == target[i]) right++; - } - std::cout << "Accuracy: " << right / 128.0 << std::endl; -} - -void MLP(int max_epoch) { - auto sym_x = Symbol::Variable("X"); - auto sym_label = Symbol::Variable("label"); - - const int nLayers = 2; - std::vector layerSizes({512, 10}); - std::vector weights(nLayers); - std::vector biases(nLayers); - std::vector outputs(nLayers); - - Symbol null_sym; - for (int i = 0; i < nLayers; i++) { - std::string istr = std::to_string(i); - weights[i] = Symbol::Variable(std::string("w") + istr); - biases[i] = Symbol::Variable(std::string("b") + istr); - Symbol fc = FullyConnected(std::string("fc") + istr, - i == 0? sym_x : outputs[i-1], - weights[i], biases[i], layerSizes[i]); - outputs[i] = LeakyReLU(std::string("act") + istr, fc, null_sym, LeakyReLUActType::kLeaky); - } - auto sym_out = SoftmaxOutput("softmax", outputs[nLayers - 1], sym_label); - - Context ctx_dev(DeviceType::kCPU, 0); - - NDArray array_x(Shape(128, 28), ctx_dev, false); - NDArray array_y(Shape(128), ctx_dev, false); - - mx_float* aptr_x = new mx_float[128 * 28]; - mx_float* aptr_y = new mx_float[128]; - - // we make the data by hand, in 10 classes, with some pattern - for (int i = 0; i < 128; i++) { - for (int j = 0; j < 28; j++) { - aptr_x[i * 28 + j] = i % 10 * 1.0f; - } - aptr_y[i] = i % 10; - } - array_x.SyncCopyFromCPU(aptr_x, 128 * 28); - array_x.WaitToRead(); - array_y.SyncCopyFromCPU(aptr_y, 128); - array_y.WaitToRead(); - - // init the parameters - NDArray array_w_1(Shape(512, 28), ctx_dev, false); - NDArray array_b_1(Shape(512), ctx_dev, false); - NDArray array_w_2(Shape(10, 512), ctx_dev, false); - NDArray array_b_2(Shape(10), ctx_dev, false); - - // the parameters should be initialized in some kind of distribution, - // so it learns fast - // but here just give a const value by hand - array_w_1 = 0.5f; - array_b_1 = 0.0f; - array_w_2 = 0.5f; - array_b_2 = 0.0f; - - // the grads - NDArray array_w_1_g(Shape(512, 28), ctx_dev, false); - NDArray array_b_1_g(Shape(512), ctx_dev, false); - NDArray array_w_2_g(Shape(10, 512), ctx_dev, false); - NDArray array_b_2_g(Shape(10), ctx_dev, false); - - // Bind the symolic network with the ndarray - // all the input args - std::vector in_args; - in_args.push_back(array_x); - in_args.push_back(array_w_1); - in_args.push_back(array_b_1); - in_args.push_back(array_w_2); - in_args.push_back(array_b_2); - in_args.push_back(array_y); - // all the grads - std::vector arg_grad_store; - arg_grad_store.push_back(NDArray()); // we don't need the grad of the input - arg_grad_store.push_back(array_w_1_g); - arg_grad_store.push_back(array_b_1_g); - arg_grad_store.push_back(array_w_2_g); - arg_grad_store.push_back(array_b_2_g); - arg_grad_store.push_back( - NDArray()); // neither do we need the grad of the loss - // how to handle the grad - std::vector grad_req_type; - grad_req_type.push_back(kNullOp); - grad_req_type.push_back(kWriteTo); - grad_req_type.push_back(kWriteTo); - grad_req_type.push_back(kWriteTo); - grad_req_type.push_back(kWriteTo); - grad_req_type.push_back(kNullOp); - std::vector aux_states; - - std::cout << "make the Executor" << std::endl; - Executor* exe = new Executor(sym_out, ctx_dev, in_args, arg_grad_store, - grad_req_type, aux_states); - - std::cout << "Training" << std::endl; - mx_float learning_rate = 0.0001; - for (int epoch_num = 0; epoch_num < max_epoch; ++epoch_num) { - exe->Forward(true); - // print accuracy every 100 epoch - if (epoch_num % 100 == 0) { - std::cout << "epoch " << epoch_num << std::endl; - std::vector& out = exe->outputs; - float* cptr = new float[128 * 10]; - out[0].SyncCopyToCPU(cptr, 128 * 10); - NDArray::WaitAll(); - OutputAccuracy(cptr, aptr_y); - delete[] cptr; - } - - // update the parameters - exe->Backward(); - for (int i = 1; i < 5; ++i) { - in_args[i] -= arg_grad_store[i] * learning_rate; - } - NDArray::WaitAll(); - } - - delete exe; - delete[] aptr_x; - delete[] aptr_y; -} - -int main(int argc, char** argv) { - int max_epoch = argc > 1 ? strtol(argv[1], nullptr, 10) : 15000; - TRY - MLP(max_epoch); - MXNotifyShutdown(); - CATCH - return 0; -} diff --git a/cpp-package/example/mlp_cpu.cpp b/cpp-package/example/mlp_cpu.cpp deleted file mode 100644 index 7ea6946dd8c2..000000000000 --- a/cpp-package/example/mlp_cpu.cpp +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * Xin Li yakumolx@gmail.com - */ -#include -#include "utils.h" -#include "mxnet-cpp/MxNetCpp.h" - -using namespace mxnet::cpp; - -Symbol mlp(const std::vector &layers) { - auto x = Symbol::Variable("X"); - auto label = Symbol::Variable("label"); - - std::vector weights(layers.size()); - std::vector biases(layers.size()); - std::vector outputs(layers.size()); - - for (size_t i = 0; i < layers.size(); ++i) { - weights[i] = Symbol::Variable("w" + std::to_string(i)); - biases[i] = Symbol::Variable("b" + std::to_string(i)); - Symbol fc = FullyConnected( - i == 0? x : outputs[i-1], // data - weights[i], - biases[i], - layers[i]); - outputs[i] = i == layers.size()-1 ? fc : Activation(fc, ActivationActType::kRelu); - } - - return SoftmaxOutput(outputs.back(), label); -} - -int main(int argc, char** argv) { - const int image_size = 28; - const std::vector layers{128, 64, 10}; - const int batch_size = 100; - const int max_epoch = 10; - const float learning_rate = 0.1; - const float weight_decay = 1e-2; - - std::vector data_files = { "./data/mnist_data/train-images-idx3-ubyte", - "./data/mnist_data/train-labels-idx1-ubyte", - "./data/mnist_data/t10k-images-idx3-ubyte", - "./data/mnist_data/t10k-labels-idx1-ubyte" - }; - - auto train_iter = MXDataIter("MNISTIter"); - if (!setDataIter(&train_iter, "Train", data_files, batch_size)) { - return 1; - } - - auto val_iter = MXDataIter("MNISTIter"); - if (!setDataIter(&val_iter, "Label", data_files, batch_size)) { - return 1; - } - - TRY - auto net = mlp(layers); - - Context ctx = Context::cpu(); // Use CPU for training - - std::map args; - args["X"] = NDArray(Shape(batch_size, image_size*image_size), ctx); - args["label"] = NDArray(Shape(batch_size), ctx); - // Let MXNet infer shapes other parameters such as weights - net.InferArgsMap(ctx, &args, args); - - // Initialize all parameters with uniform distribution U(-0.01, 0.01) - auto initializer = Uniform(0.01); - for (auto& arg : args) { - // arg.first is parameter name, and arg.second is the value - initializer(arg.first, &arg.second); - } - - // Create sgd optimizer - Optimizer* opt = OptimizerRegistry::Find("sgd"); - opt->SetParam("rescale_grad", 1.0/batch_size) - ->SetParam("lr", learning_rate) - ->SetParam("wd", weight_decay); - - // Create executor by binding parameters to the model - auto *exec = net.SimpleBind(ctx, args); - auto arg_names = net.ListArguments(); - - // Start training - for (int iter = 0; iter < max_epoch; ++iter) { - int samples = 0; - train_iter.Reset(); - - auto tic = std::chrono::system_clock::now(); - while (train_iter.Next()) { - samples += batch_size; - auto data_batch = train_iter.GetDataBatch(); - // Set data and label - data_batch.data.CopyTo(&args["X"]); - data_batch.label.CopyTo(&args["label"]); - - // Compute gradients - exec->Forward(true); - exec->Backward(); - // Update parameters - for (size_t i = 0; i < arg_names.size(); ++i) { - if (arg_names[i] == "X" || arg_names[i] == "label") continue; - opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]); - } - } - auto toc = std::chrono::system_clock::now(); - - Accuracy acc; - val_iter.Reset(); - while (val_iter.Next()) { - auto data_batch = val_iter.GetDataBatch(); - data_batch.data.CopyTo(&args["X"]); - data_batch.label.CopyTo(&args["label"]); - // Forward pass is enough as no gradient is needed when evaluating - exec->Forward(false); - acc.Update(data_batch.label, exec->outputs[0]); - } - float duration = std::chrono::duration_cast - (toc - tic).count() / 1000.0; - LG << "Epoch: " << iter << " " << samples/duration << " samples/sec Accuracy: " << acc.Get(); - } - - delete exec; - delete opt; - MXNotifyShutdown(); - CATCH - return 0; -} diff --git a/cpp-package/example/mlp_csv.cpp b/cpp-package/example/mlp_csv.cpp deleted file mode 100644 index 8db6638a90d3..000000000000 --- a/cpp-package/example/mlp_csv.cpp +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Example: mlp_csv - * Description: - * The following example demonstrates how to use CSVIter. This example creates - * mlp (multi-layer perceptron) model and trains the MNIST data which is in - * CSV format. - */ -#include -#include -#include "utils.h" -#include "mxnet-cpp/MxNetCpp.h" - -using namespace mxnet::cpp; - -/* - * Implementing the mlp symbol with given hidden units configuration. - */ -Symbol mlp(const std::vector &hidden_units) { - auto data = Symbol::Variable("data"); - auto label = Symbol::Variable("label"); - - std::vector weights(hidden_units.size()); - std::vector biases(hidden_units.size()); - std::vector outputs(hidden_units.size()); - - for (size_t i = 0; i < hidden_units.size(); ++i) { - weights[i] = Symbol::Variable("w" + std::to_string(i)); - biases[i] = Symbol::Variable("b" + std::to_string(i)); - Symbol fc = FullyConnected( - i == 0? data : outputs[i-1], // data - weights[i], - biases[i], - hidden_units[i]); - outputs[i] = i == hidden_units.size()-1 ? fc : Activation(fc, ActivationActType::kRelu); - } - return SoftmaxOutput(outputs.back(), label); -} - -/* - * Convert the input string of number of hidden units into the vector of integers. - */ -std::vector getLayers(const std::string& hidden_units_string) { - std::vector hidden_units; - char *pNext; - int num_unit = strtol(hidden_units_string.c_str(), &pNext, 10); - hidden_units.push_back(num_unit); - while (*pNext) { - num_unit = strtol(pNext, &pNext, 10); - hidden_units.push_back(num_unit); - } - return hidden_units; -} - -void printUsage() { - std::cout << "Usage:" << std::endl; - std::cout << "mlp_csv --train mnist_training_set.csv --test mnist_test_set.csv --epochs 10 " - << "--batch_size 100 --hidden_units \"128 64 64\" --gpu" << std::endl; - std::cout << "The example uses mnist data in CSV format. The MNIST data in CSV format assumes " - << "the column 0 to be label and the rest 784 column to be data." << std::endl; - std::cout << "By default, the example uses 'cpu' context. If '--gpu' is specified, " - << "program uses 'gpu' context." <