Setting up basic structure for CIFAR directory. (#1554)

* [srcipts] steps/nnet3/report/generate_plots.py: plot 5,50,95th percentile of value and derivative instead of mean+-stddev (#1472) * Update travis.yml so PRs to kaldi_52 are built * Setting up basic structure for CIFAR directory. * [src] Some code changes/additions to support image recognition applications of nnet3 * Adding results for using batchnorm components instead of renorm * Some partial work on CIFAR setup * Removing old results in AMI * More work on nnet3-egs-augment-image.cc * [build] Slight change to how tests are reported, to figure out which one is not completing. * Add data preparation script for CIFAR * Add cmd.sh and run.sh * Various fixes to CIFAR setup * [src] Code fix RE compressed matrices
kaldi-asr · Apr 21, 2017 · 3d011de · 3d011de
1 parent 4ffe5ea
commit 3d011de
Show file tree

Hide file tree

Showing 19 changed files with 743 additions and 8 deletions.
diff --git a/egs/cifar/README.txt b/egs/cifar/README.txt
@@ -0,0 +1,7 @@
+
+This directory contains example scripts for image classification with the
+CIFAR-10 and CIFAR-100 datasets, which are available for free from
+https://www.cs.toronto.edu/~kriz/cifar.html.
+
+This demonstrates applying the nnet3 framework to image classification for
+fixed size images.
diff --git a/egs/cifar/v1/cmd.sh b/egs/cifar/v1/cmd.sh
@@ -0,0 +1,29 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="queue.pl"
+export decode_cmd="queue.pl --mem 4G"
+export mkgraph_cmd="queue.pl --mem 8G"
+export cuda_cmd="queue.pl --gpu 1"
+
+
+# the rest of this file is present for historical reasons.  it's better to
+# create and edit conf/queue.conf for cluster-specific configuration.
+if [ "$(hostname -d)" == "fit.vutbr.cz" ]; then
+  # BUT cluster:
+  queue="all.q@@blade,all.q@@speech"
+  storage="matylda5"
+  export train_cmd="queue.pl -q $queue -l ram_free=1.5G,mem_free=1.5G,${storage}=0.25"
+  export decode_cmd="queue.pl -q $queue -l ram_free=2.5G,mem_free=2.5G,${storage}=0.1"
+  export cuda_cmd="queue.pl -q long.q -l gpu=1"
+fi 
+
diff --git a/egs/cifar/v1/image/README.txt b/egs/cifar/v1/image/README.txt
@@ -0,0 +1,2 @@
+This directory contains various scripts that relate to image recognition: specifically,
+the recognition of fixed-size images.
diff --git a/egs/cifar/v1/image/nnet3/get_egs.sh b/egs/cifar/v1/image/nnet3/get_egs.sh
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+
+# This script is like steps/nnet3/get_egs.sh, except it is specialized for
+# classification of fixed-size images; and you have to provide the
+# dev or test data in a separate directory.
+
+
+# Begin configuration section.
+cmd=run.pl
+egs_per_archive=25000
+test_mode=false
+# end configuration section
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 2 ]; then
+  echo "Usage: $0 [opts] <train-data-dir> <test-or-dev-data-dir> <egs-dir>"
+  echo " e.g.: $0 --egs-per-iter 25000 data/cifar10_train exp/cifar10_train_egs"
+  echo " or: $0 --test-mode true data/cifar10_test exp/cifar10_test_egs"
+  echo "Options (with defaults):"
+  echo "  --cmd 'run.pl'     How to run jobs (e.g. queue.pl)"
+  echo "  --test-mode false  Set this to true if you just want a single archive"
+  echo "                     egs.ark to be created (useful for test data)"
+  echo "  --egs-per-archive 25000  Number of images to put in each training archive"
+  echo "                     (this is a target; the actual number will be chosen"
+  echo "                    as some fraction of the total."
+  exit 1;
+fi
diff --git a/egs/cifar/v1/image/validate_image_dir.sh b/egs/cifar/v1/image/validate_image_dir.sh
@@ -0,0 +1,78 @@
+#!/usr/bin/env bash
+
+# This script validates a directory containing training or test images
+# for image-classification tasks with fixed-size images.
+
+
+if [ $# != 1 ]; then
+  echo "Usage: $0 <image-dir-to-validate>"
+  echo "e.g.: $0 data/cifar10_train"
+fi
+
+dir=$1
+
+[ -e ./path.sh ] && . ./path.sh
+
+if [ ! -d $dir ]; then
+  echo "$0: directory $dir does not exist."
+fi
+
+for f in images.scp labels.txt classes.txt num_colors; do
+  if [ ! -s "$dir/$f" ]; then
+    echo "$0: expected file $dir/$f to exist and be nonempty"
+  fi
+done
+
+
+num_colors=$(cat $dir/num_colors)
+
+if ! [[ $num_colors -gt 0 ]]; then
+  echo "$0: expected the file $dir/num_colors to contain a number >0"
+  exit 1
+fi
+
+paf="--print-args=false"
+
+num_cols=$(head -n 1 $dir/images.scp | feat-to-dim $paf scp:- -)
+if ! [[ $[$num_cols%$num_colors] == 0 ]]; then
+  echo "$0: expected the number of columns in the image matrices ($num_cols) to "
+  echo "    be a multiple of the number of colors ($num_colors)"
+  exit 1
+fi
+
+num_rows=$(head -n 1 $dir/images.scp | feat-to-len $paf scp:- -)
+
+height=$[$num_cols/$num_colors]
+
+echo "$0: images are width=$num_rows by height=$height, with $num_colors colors."
+
+if ! cmp <(awk '{print $1}' $dir/images.scp) <(awk '{print $1}' $dir/labels.txt); then
+  echo "$0: expected the first fields of $dir/images.scp and $dir/labels.txt to match up."
+  exit 1;
+fi
+
+if ! [[ $num_cols -eq $(tail -n 1 $dir/images.scp | feat-to-dim $paf scp:- -) ]]; then
+  echo "$0: the number of columns in the image matrices is not consistent."
+  exit 1
+fi
+
+if ! [[ $num_rows -eq $(tail -n 1 $dir/images.scp | feat-to-len scp:- -) ]]; then
+  echo "$0: the number of rows in the image matrices is not consistent."
+  exit 1
+fi
+
+# Note: we don't require images.scp and labels.txt to be sorted, but they
+# may not contain repeated keys.
+if ! awk '{if($1 in a) { print "validate_image_dir.sh: key " $1 " is repeated in labels.txt"; exit 1; } a[$1]=1; }'; then
+  exit 1
+fi
+
+
+if ! utils/int2sym.pl -f 2 $dir/classes.txt <$dir/labels.txt >/dev/null; then
+  echo "$0: classes.txt may have the wrong format or may not cover all labels in $dir/labels.txt"
+  exit 1;
+fi
+
+
+echo "$0: validated image-data directory $dir"
+exit 0
diff --git a/egs/cifar/v1/local/prepare_data.sh b/egs/cifar/v1/local/prepare_data.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+# Copyright 2017 Johns Hopkins University (author: Hossein Hadian)
+# Apache 2.0
+
+# This script loads the training and test data for CIFAR-10 or CIFAR-100.
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+
+dl_dir=data/dl
+cifar10=$dl_dir/cifar-10-batches-bin
+cifar10_url=https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz
+cifar100=$dl_dir/cifar-100-binary
+cifar100_url=https://www.cs.toronto.edu/~kriz/cifar-100-binary.tar.gz
+
+mkdir -p $dl_dir
+if [ -d $cifar10 ]; then
+  echo Not downloading CIFAR-10 as it is already there.
+else
+  if [ ! -f $dl_dir/cifar-10-binary.tar.gz ]; then
+    echo Downloading CIFAR-10...
+    wget -P $dl_dir $cifar10_url || exit 1;
+  fi
+  tar -xvzf $dl_dir/cifar-10-binary.tar.gz -C $dl_dir || exit 1;
+  echo Done downaloding and extracting CIFAR-10
+fi
+
+mkdir -p data/cifar10_{train,test}/data
+seq 0 9 | paste -d' ' data/dl/cifar-10-batches-bin/batches.meta.txt - | grep '\S' >data/cifar10_train/classes.txt
+cp data/cifar10_{train,test}/classes.txt
+echo 3 > data/cifar10_train/num_colors
+echo 3 > data/cifar10_test/num_colors
+
+local/process_data.py --dataset train $cifar10 data/cifar10_train/ | \
+  copy-feats --compress=true --compression-method=6 \
+   ark:- ark,scp:data/cifar10_train/data/images.ark,data/cifar10_train/images.scp || exit 1
+
+local/process_data.py --dataset test $cifar10 data/cifar10_test/ | \
+  copy-feats --compress=true --compression-method=6 \
+    ark:- ark,scp:data/cifar10_test/data/images.ark,data/cifar10_test/images.scp || exit 1
+
+
+
+### CIFAR 100
+
+if [ -d $cifar100 ]; then
+  echo Not downloading CIFAR-100 as it is already there.
+else
+  if [ ! -f $dl_dir/cifar-100-binary.tar.gz ]; then
+    echo Downloading CIFAR-100...
+    wget -P $dl_dir $cifar100_url || exit 1;
+  fi
+  tar -xvzf $dl_dir/cifar-100-binary.tar.gz -C $dl_dir || exit 1;
+  echo Done downaloding and extracting CIFAR-100
+fi
+
+mkdir -p data/cifar100_{train,test}/data
+seq 0 99 | paste -d' ' $cifar100/fine_label_names.txt - | grep '\S' >data/cifar100_train/fine_classes.txt
+seq 0 19 | paste -d' ' $cifar100/coarse_label_names.txt - | grep '\S' >data/cifar100_train/coarse_classes.txt
+
+cp data/cifar100_{train,test}/fine_classes.txt
+cp data/cifar100_{train,test}/coarse_classes.txt
+
+echo 3 > data/cifar100_train/num_colors
+echo 3 > data/cifar100_test/num_colors
+
+local/process_data.py --dataset train $cifar100 data/cifar100_train/ | \
+  copy-feats --compress=true --compression-method=6 \
+    ark:- ark,scp:data/cifar100_train/data/images.ark,data/cifar100_train/images.scp || exit 1
+
+local/process_data.py --dataset test $cifar100 data/cifar100_test/ | \
+  copy-feats --compress=true --compression-method=6 \
+    ark:- ark,scp:data/cifar100_test/data/images.ark,data/cifar100_test/images.scp || exit 1
diff --git a/egs/cifar/v1/local/process_data.py b/egs/cifar/v1/local/process_data.py
@@ -0,0 +1,165 @@
+#!/usr/bin/env python
+
+# Copyright 2017 Johns Hopkins University (author: Hossein Hadian)
+# Apache 2.0
+
+
+""" This script prepares the training and test data for CIFAR-10 or CIFAR-100.
+"""
+
+import argparse
+import os
+import sys
+import re
+import errno
+
+
+sys.path.insert(0, 'steps')
+import libs.common as common_lib
+
+parser = argparse.ArgumentParser(description="""Converts train/test data of
+                                                CIFAR-10 or CIFAR-100 to
+                                                Kaldi feature format""")
+parser.add_argument('database', type=str,
+                    default='data/dl/cifar-10-batches-bin',
+                    help='path to downloaded cifar data (binary version)')
+parser.add_argument('dir', type=str, help='output dir')
+parser.add_argument('--dataset', type=str, default='train', choices=['train', 'test'])
+parser.add_argument('--out-ark', type=str, default='-', help='where to write output feature data')
+
+args = parser.parse_args()
+
+# CIFAR image dimensions:
+C = 3  # num_channels
+H = 32  # num_rows
+W = 32  # num_cols
+
+def load_cifar10_data_batch(datafile):
+    num_images_in_batch = 10000
+    data = []
+    labels = []
+    with open(datafile, 'rb') as fh:
+        for i in range(num_images_in_batch):
+            label = ord(fh.read(1))
+            bin_img = fh.read(C * H * W)
+            img = [[[ord(byte) / 255.0 for byte in bin_img[channel*H*W+row*W:channel*H*W+(row+1)*W]]
+                  for row in range(H)] for channel in range(C)]
+            labels += [label]
+            data += [img]
+    return data, labels
+
+def load_cifar100_data_batch(datafile):
+    num_images_in_batch = 10000
+    data = []
+    fine_labels = []
+    coarse_labels = []
+    with open(datafile, 'rb') as fh:
+        for i in range(num_images_in_batch):
+            coarse_label = ord(fh.read(1))
+            fine_label = ord(fh.read(1))
+            bin_img = fh.read(C * H * W)
+            img = [[[ord(byte) / 255.0 for byte in bin_img[channel*H*W+row*W:channel*H*W+(row+1)*W]]
+                  for row in range(H)] for channel in range(C)]
+            fine_labels += [fine_label]
+            coarse_labels += [coarse_label]
+            data += [img]
+    return data, fine_labels, coarse_labels
+
+def image_to_feat_matrix(img):
+  mat = [0]*H  # 32 * 96
+  for row in range(H):
+    mat[row] = [0]*C*W
+    for ch in range(C):
+      for col in range(W):
+        mat[row][col*C+ch] = img[ch][row][col]
+  return mat
+
+def write_kaldi_matrix(file_handle, matrix, key):
+    # matrix is a list of lists
+    file_handle.write(key + "  [ ")
+    num_rows = len(matrix)
+    if num_rows == 0:
+        raise Exception("Matrix is empty")
+    num_cols = len(matrix[0])
+
+    for row_index in range(len(matrix)):
+        if num_cols != len(matrix[row_index]):
+            raise Exception("All the rows of a matrix are expected to "
+                            "have the same length")
+        file_handle.write(" ".join(map(lambda x: str(x), matrix[row_index])))
+        if row_index != num_rows - 1:
+            file_handle.write("\n")
+    file_handle.write(" ]\n")
+
+def zeropad(x, length):
+  s = str(x)
+  while len(s) < length:
+    s = '0' + s
+  return s
+
+### main ###
+cifar10 = (args.database.find('cifar-100') == -1)
+if args.out_ark == '-':
+  out_fh = sys.stdout  # output file handle to write the feats to
+else:
+  out_fh = open(args.out_ark, 'wb')
+
+if cifar10:
+    img_id = 1  # similar to utt_id
+    labels_file = os.path.join(args.dir, 'labels.txt')
+    labels_fh = open(labels_file, 'wb')
+
+
+    if args.dataset == 'train':
+        for i in range(1, 6):
+            fpath = os.path.join(args.database, 'data_batch_' + str(i) + '.bin')
+            data, labels = load_cifar10_data_batch(fpath)
+            for i in range(len(data)):
+                key = zeropad(img_id, 5)
+                labels_fh.write(key + ' ' + str(labels[i]) + '\n')
+                feat_mat = image_to_feat_matrix(data[i])
+                write_kaldi_matrix(out_fh, feat_mat, key)
+                img_id += 1
+    else:
+        fpath = os.path.join(args.database, 'test_batch.bin')
+        data, labels = load_cifar10_data_batch(fpath)
+        for i in range(len(data)):
+            key = zeropad(img_id, 5)
+            labels_fh.write(key + ' ' + str(labels[i]) + '\n')
+            feat_mat = image_to_feat_matrix(data[i])
+            write_kaldi_matrix(out_fh, feat_mat, key)
+            img_id += 1
+
+    labels_fh.close()
+else:
+    img_id = 1  # similar to utt_id
+    fine_labels_file = os.path.join(args.dir, 'fine_labels.txt')
+    coarse_labels_file = os.path.join(args.dir, 'coarse_labels.txt')
+    fine_labels_fh = open(fine_labels_file, 'wb')
+    coarse_labels_fh = open(coarse_labels_file, 'wb')
+
+    if args.dataset == 'train':
+        fpath = os.path.join(args.database, 'train.bin')
+        data, fine_labels, coarse_labels = load_cifar100_data_batch(fpath)
+        for i in range(len(data)):
+            key = zeropad(img_id, 5)
+            fine_labels_fh.write(key + ' ' + str(fine_labels[i]) + '\n')
+            coarse_labels_fh.write(key + ' ' + str(coarse_labels[i]) + '\n')
+            feat_mat = image_to_feat_matrix(data[i])
+            write_kaldi_matrix(out_fh, feat_mat, key)
+            img_id += 1
+    else:
+        fpath = os.path.join(args.database, 'test.bin')
+        data, fine_labels, coarse_labels = load_cifar100_data_batch(fpath)
+        for i in range(len(data)):
+            key = zeropad(img_id, 5)
+            fine_labels_fh.write(key + ' ' + str(fine_labels[i]) + '\n')
+            coarse_labels_fh.write(key + ' ' + str(coarse_labels[i]) + '\n')
+            feat_mat = image_to_feat_matrix(data[i])
+            write_kaldi_matrix(out_fh, feat_mat, key)
+            img_id += 1
+
+    fine_labels_fh.close()
+    coarse_labels_fh.close()
+
+out_fh.close()
diff --git a/egs/cifar/v1/path.sh b/egs/cifar/v1/path.sh
@@ -0,0 +1,6 @@
+export KALDI_ROOT=`pwd`/../../..
+[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+export LC_ALL=C
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		This directory contains various scripts that relate to image recognition: specifically,
		the recognition of fixed-size images.