From 3354913e2d5eaeb9bb235645d22e0e1ec878e575 Mon Sep 17 00:00:00 2001
From: HassanAmr <hassan.amr89@gmail.com>
Date: Tue, 24 Jan 2017 20:27:54 +0100
Subject: [PATCH 1/4] replaced preprocess.cpp by a python equivelant that
 includes a High- and Low-Pass filter

---
 preprocess.cpp | 214 -------------------------------------------------
 preprocess.py  |  98 ++++++++++++++++++++++
 2 files changed, 98 insertions(+), 214 deletions(-)
 delete mode 100644 preprocess.cpp
 create mode 100644 preprocess.py
diff --git a/preprocess.cpp b/preprocess.cpp
deleted file mode 100644
index e678c38..0000000
--- a/preprocess.cpp
+++ /dev/null
@@ -1,214 +0,0 @@
-#include <GRT.h>
-#include <iostream>
-#include "cmdline.h"
-#include "libgrt_util.h"
-
-using namespace GRT;
-using namespace std;
-
-InfoLog info;
-ErrorLog err;
-
-string list_preprocessors() {
-  stringstream ss;
-
-  for( auto name : PreProcessing::getRegisteredPreprocessors() )
-    ss << name << endl;
-
-  return ss.str();
-}
-
-PreProcessing *apply_cmdline_args(string, cmdline::parser&,int,string&);
-
-int main(int argc, const char *argv[]) {
-  static bool is_running = true;
-  string input_file = "-";
-  cmdline::parser c;
-
-  c.add<int>   ("verbose",    'v', "verbosity level: 0-4", false, 0);
-  c.add        ("help",       'h', "print this message");
-  c.add<string>("type",       't', "force classification, regression or timeseries input", false, "", cmdline::oneof<string>("classification", "regression", "timeseries", "auto"));
-  c.footer     ("<pre-processor> [<filename>] ");
-
-  /* parse common options */
-  bool parse_ok = c.parse(argc,argv,false) && !c.exist("help");
-  set_verbosity(c.get<int>("verbose"));
-
-  /* do we have a predictor? */
-  string preproc_name = c.rest().size() > 0 ? c.rest()[0] : "list";
-  if (preproc_name == "list") {
-    cout << c.usage() << endl;
-    cout << list_preprocessors();
-    exit(0);
-  }
-
-  PreProcessing *pp = apply_cmdline_args(preproc_name,c,1,input_file);
-
-  if (pp==NULL)
-    exit(-1);
-
-  if (!parse_ok) {
-    cerr << c.usage() << endl << c.error() << endl;
-    exit(-1);
-  }
-
-  /* do we read from a file or from stdin-? */
-  ifstream fin; fin.open(input_file);
-  istream &in = input_file=="-" ? cin : fin;
-
-  string line; int linenum=0;
-  while(getline(in,line)) {
-    stringstream ss(line);
-
-    if (line[0] == '#') {
-      cout << line << endl;
-      continue;
-    }
-
-    if (line.size() == 0) {
-      cout << endl;
-      continue;
-    }
-
-    try { string label; ss >> label; cout << label << "\t"; }
-    catch (exception &e) { /* unlabeled data */ }
-
-    VectorFloat vals; double value;
-    while (ss >> value)
-      vals.push_back(value);
-
-    if (linenum == 0) {
-      // weird stuff, pp resets only when initialized, it only initialized once
-      // data has been seen, and only set num outputdimenstion when reset so:
-      pp->setNumInputDimensions(vals.size());
-      pp->process(VectorFloat(vals.size(), 1.));
-      pp->reset();
-    }
-
-    bool ok = pp->process(vals);
-    if (!ok) {
-      cerr << "unable to process line " << linenum << endl;
-      exit(-1);
-    }
-
-    for(auto value : pp->getProcessedData())
-      cout << value << "\t";
-
-    cout << endl;
-    linenum++;
-  }
-}
-
-PreProcessing *apply_cmdline_args(string type, cmdline::parser &c, int num_dimensions, string &input_file) {
-  PreProcessing *pp;
-  cmdline::parser p;
-
-  if (type == "DeadZone") {
-    p.add<double>("lower-limit", 'L', "lower limit for dead-zone", false, -.1);
-    p.add<double>("upper-limit", 'U', "upper limit for dead-zone", false,  .1);
-  } else if (type == "Derivative") {
-    p.add<int>   ("order", 'O', "derivative order of the filter", false, 1, cmdline::oneof<int>(1,2));
-    p.add<double>("delta", 'D', "time between in sampleRate/1000.", false, 1);
-    p.add<int>   ("filter-size", 'F', "size of the filter, set to zero to disable", false, 3);
-  } else if (type == "DoubleMovingAverageFilter" || type == "MovingAverageFilter" || type == "MedianFilter") {
-    p.add<int>   ("filter-size", 'F', "size of the filter", false, 5);
-  } else if (type == "FIRFilter") {
-    p.add<string>("filter-type",  'T', "filter type, one of LPF, HPF, BPF", false, "LPF", cmdline::oneof<string>("LPF","HPF","BPF"));
-    p.add<int>   ("num-taps   ",  'N', "number of filter taps", false, 50);
-    p.add<double>("sample-duration",  'S', "sample rate of your data", true);
-    p.add<double>("cutoff",       'C', "cutoff frequency of the filter", false, 10);
-    p.add<double>("gain",         'G', "filter gain", false, 1);
-  } else if (type == "HighPassFilter") {
-    p.add<double>("factor",        'F', "the smaller this value the more smoothing is done", false, .1);
-    p.add<double>("gain",          'G', "multiplies filtered values by this value", false, 1);
-    p.add<double>("cutoff",        'C', "set the cutoff frequency in Hz", false, 50),
-    p.add<double>("sample-duration",   'R', "set the sample rate of your data, as 1/SR", true);
-  } else if (type == "LeakyIntegrator") {
-    p.add<double>("leak-rate",     'L', "leak rate", false, 0.99, cmdline::range<double>(0,1.));
-  } else if (type == "LowPassFilter") {
-    p.add<double>("factor",        'F', "the smaller this value the more smoothing is done", false, .1);
-    p.add<double>("gain",          'G', "multiplies filtered values by this value", false, 1);
-    p.add<double>("cutoff",        'C', "set the cutoff frequency in Hz", false, 50),
-    p.add<double>("sample-duration",   'R', "set the sample rate of your data, as 1/SR", true);
-  } else if (type == "SavitzkyGolayFilter") {
-    p.add<int>   ("left-hand",       'L', "number of left-hand points for filter design", false, 10);
-    p.add<int>   ("right-hand",      'R', "number of right-hand points for filter design", false, 10);
-    p.add<int>   ("order",           'O', "derivative order of the filter", false, 0);
-    p.add<int>   ("smoothing-order", 'S', "smoothing order, must be one of 2 or 4", false, 2, cmdline::oneof<int>(2,4));
-  } else {
-    cout << c.usage() << endl;
-    cout << list_preprocessors() << endl;
-    cerr << "unable to load preprocessor " << type << endl;
-    return NULL;
-  }
-
-  if (!p.parse(c.rest()) || c.exist("help")) {
-    cerr << c.usage() << endl << "pre processing options:" << endl << p.str_options() << endl << p.error() << endl;
-    exit(-1);
-  }
-
-  if (type == "DeadZone") {
-    pp = new DeadZone(
-        p.get<double>("lower-limit"),
-        p.get<double>("upper-limit"),
-        num_dimensions);
-  } else if (type == "Derivative") {
-    pp = new Derivative(
-        p.get<int>   ("order"),
-        p.get<double>("delta"),
-        num_dimensions,
-        p.get<int>  ("filter-size") != 0,
-        p.get<int>  ("filter-size"));
-  } else if (type == "DoubleMovingAverageFilter") {
-    pp = new DoubleMovingAverageFilter(
-        p.get<int>  ("filter-size"),
-        num_dimensions);
-  } else if (type == "FIRFilter") {
-    vector<string> list = {"LPF","HPF","BPF"};
-    pp = new FIRFilter(
-        find(list.begin(),list.end(),p.get<string>("func")) - list.begin(),
-        p.get<int>("num-taps"),
-        p.get<double>("sample-duration"),
-        p.get<double>("cutoff"),
-        p.get<double>("gain"),
-        num_dimensions);
-  } else if (type == "HighPassFilter") {
-    pp = new HighPassFilter(
-        p.get<double>("factor"),
-        p.get<double>("gain"),
-        num_dimensions,
-        p.get<double>("cutoff"),
-        p.get<double>("sample-duration"));
-  } else if (type == "LeakyIntegrator") {
-    pp = new LeakyIntegrator(
-        p.get<double>("leak-rate"),
-        num_dimensions);
-  } else if (type == "LowPassFilter") {
-    pp = new LowPassFilter(
-        p.get<double>("factor"),
-        p.get<double>("gain"),
-        num_dimensions,
-        p.get<double>("cutoff"),
-        p.get<double>("sample-duration"));
-  } else if (type == "MedianFilter") {
-    pp = new MedianFilter(
-        p.get<int>("filter-size"),
-        num_dimensions);
-  } else if (type == "MovingAverageFilter") {
-    pp = new MovingAverageFilter(
-        p.get<int>("filter-size"),
-        num_dimensions);
-  } else if(type == "SavitzkyGolayFilter") {
-    pp = new SavitzkyGolayFilter(
-        p.get<int>("left-hand"),
-        p.get<int>("right-hand"),
-        p.get<int>("order"),
-        p.get<int>("smoothing-order"),
-        num_dimensions);
-  }
-
-  if (p.rest().size() > 0)
-    input_file = p.rest()[0];
-
-  return pp;
-}
diff --git a/preprocess.py b/preprocess.py
new file mode 100644
index 0000000..3ccacc5
--- /dev/null
+++ b/preprocess.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python
+import numpy as np
+from scipy.signal import butter, lfilter, freqz, medfilt
+from optparse import OptionParser
+import sys
+import signal
+
+def butter_lowpass(cutoff, fs, order=5):
+  nyq = 0.5 * fs
+  normal_cutoff = cutoff / nyq
+  b, a = butter(order, normal_cutoff, btype='low', analog=False)
+  return b, a
+
+def butter_highpass(cutoff, fs, order=5):
+  nyq = 0.5 * fs
+  normal_cutoff = cutoff / nyq
+  b, a = butter(order, normal_cutoff, btype='high', analog=False)
+  return b, a
+
+def butter_lowpass_filter(data, cutoff, fs, order=5):
+  b, a = butter_lowpass(cutoff, fs, order=order)
+  y = lfilter(b, a, data)
+  return y
+
+def butter_highpass_filter(data, cutoff, fs, order=5):
+  b, a = butter_highpass(cutoff, fs, order=order)
+  y = lfilter(b, a, data)
+  return y
+
+
+parser = OptionParser()
+parser.add_option("-c", "--cutoff", dest="cutoff",
+                  help="cutoff frequency", metavar="CUTOFF")
+parser.add_option("-f", "--frequency", dest="fs",
+                  help="The rate of incoming data", metavar="RATE")
+parser.add_option("-n", "--size", dest="vectorSize",
+                  help="The vector size of the incoming data", metavar="VECTOR_SIZE")
+parser.add_option("-o", "--order", dest="order", default=5,
+                  help="The filter's order", metavar="ORDER")
+parser.add_option("-t", "--type", dest="filterType", default="HPF",
+                  help="The type of filter to use, HPF (High-Pass Filter), or LPF (Low-Pass Filter).", metavar="FILTER_TYPE")
+
+
+(options, args) = parser.parse_args()
+
+try: 
+  order = int(float(options.order))
+except ValueError:
+  print("Wrong value for order entered.")
+  sys.exit()
+
+try: 
+  vectorSize = int(float(options.vectorSize))
+except ValueError:
+  print("Wrong value for VECTOR_SIZE entered.")
+  sys.exit()
+
+try: 
+  cutoff = float(options.cutoff)
+except ValueError:
+  print("Wrong value for cutoff frequency entered.")
+  sys.exit()
+
+try: 
+  fs = float(options.fs)
+except ValueError:
+  print("Wrong value for RATE entered.")
+  sys.exit()
+
+filterType = options.filterType
+filterType = filterType.upper()
+
+#if condition to check on the data entered, exit if soemthing is wrong or missing
+if filterType not in ["LPF","HPF"]:
+  print("Wrong type of filter selected.")
+  sys.exit()
+
+if order < 1:
+  print("Invalid value for filter's order.")
+  sys.exit() 
+
+
+#load data from stdin as string
+data = np.loadtxt(sys.stdin, dtype=str)
+
+for i in range(0,vectorSize):
+  
+  #filter a column while skipping 1 (because of the preceding label) after converting it to float
+  if filterType == "HPF":
+    filteredData = butter_highpass_filter(data[:, i + 1].astype(np.float), cutoff, fs, order)
+  elif filterType == "LPF":
+    filteredData = butter_lowpass_filter(data[:, i + 1].astype(np.float), cutoff, fs, order)
+  
+  #replace the column used as input to the filter with the filtered values
+  data[:, i + 1] = filteredData 
+
+#print to stdout
+print('\n'.join(['\t'.join([item for item in row]) for row in data]))

From 05cfbff76d39a2040783bb0b5d248d67b1368c0e Mon Sep 17 00:00:00 2001
From: HassanAmr <hassan.amr89@gmail.com>
Date: Wed, 25 Jan 2017 19:34:05 +0100
Subject: [PATCH 2/4] added some validations and exceptions handling before
 using the filter

---
 preprocess.py | 72 ++++++++++++++++++++++++++++++++-------------------
 1 file changed, 45 insertions(+), 27 deletions(-)

diff --git a/preprocess.py b/preprocess.py
index 3ccacc5..d842ab6 100644
--- a/preprocess.py
+++ b/preprocess.py
@@ -1,7 +1,8 @@
 #!/usr/bin/env python
 import numpy as np
 from scipy.signal import butter, lfilter, freqz, medfilt
-from optparse import OptionParser
+#from optparse import OptionParser
+import argparse
 import sys
 import signal
 
@@ -28,60 +29,77 @@ def butter_highpass_filter(data, cutoff, fs, order=5):
   return y
 
 
-parser = OptionParser()
-parser.add_option("-c", "--cutoff", dest="cutoff",
-                  help="cutoff frequency", metavar="CUTOFF")
-parser.add_option("-f", "--frequency", dest="fs",
-                  help="The rate of incoming data", metavar="RATE")
-parser.add_option("-n", "--size", dest="vectorSize",
-                  help="The vector size of the incoming data", metavar="VECTOR_SIZE")
-parser.add_option("-o", "--order", dest="order", default=5,
-                  help="The filter's order", metavar="ORDER")
-parser.add_option("-t", "--type", dest="filterType", default="HPF",
-                  help="The type of filter to use, HPF (High-Pass Filter), or LPF (Low-Pass Filter).", metavar="FILTER_TYPE")
+parser = argparse.ArgumentParser()
+parser.add_argument("-c", "--cutoff", dest="cutoff",
+                  help="cutoff frequency", metavar="CUTOFF", type=float)
+parser.add_argument("-f", "--frequency", dest="fs",
+                  help="The rate of incoming data", metavar="RATE", type=float)
+#comment the next block for now and compute it straight from input
+#parser.add_argument("-n", "--size", dest="vectorSize",
+#                  help="The vector size of the incoming data", metavar="VECTOR_SIZE", type=int)
+parser.add_argument("-o", "--order", dest="order", default=5,
+                  help="The filter's order (optional)", metavar="ORDER", type=int)
+parser.add_argument("-t", "--type", dest="filterType", default="LPF",
+                  help="The type of filter to use, HPF (High-Pass Filter), or LPF (Low-Pass Filter). The default is set to LPF.", metavar="FILTER_TYPE")
 
 
-(options, args) = parser.parse_args()
+args = parser.parse_args()
 
 try: 
-  order = int(float(options.order))
+  order = int(args.order)
 except ValueError:
   print("Wrong value for order entered.")
-  sys.exit()
+  sys.exit(1)
 
-try: 
-  vectorSize = int(float(options.vectorSize))
-except ValueError:
-  print("Wrong value for VECTOR_SIZE entered.")
-  sys.exit()
+#comment the next block for now and compute it straight from input
+#try: 
+#  vectorSize = args.vectorSize
+#except ValueError:
+#  print("Wrong value for VECTOR_SIZE entered.")
+#  sys.exit()
 
 try: 
-  cutoff = float(options.cutoff)
+  cutoff = args.cutoff
 except ValueError:
   print("Wrong value for cutoff frequency entered.")
   sys.exit()
 
 try: 
-  fs = float(options.fs)
+  fs = args.fs
 except ValueError:
   print("Wrong value for RATE entered.")
-  sys.exit()
+  sys.exit(1)
 
-filterType = options.filterType
+filterType = args.filterType
 filterType = filterType.upper()
 
 #if condition to check on the data entered, exit if soemthing is wrong or missing
 if filterType not in ["LPF","HPF"]:
   print("Wrong type of filter selected.")
-  sys.exit()
+  sys.exit(1)
 
 if order < 1:
   print("Invalid value for filter's order.")
-  sys.exit() 
+  sys.exit(1) 
+
+if fs <= 0:
+  print("Invalid value for data frequency.")
+  sys.exit(1)
 
+if cutoff <= 0:
+  print("Invalid value for cutoff frequency.")
+  sys.exit(1)
 
 #load data from stdin as string
-data = np.loadtxt(sys.stdin, dtype=str)
+try:
+  data = np.loadtxt(sys.stdin, dtype=str)
+ #loadtxt will throw an exception if the rows are not consistent.
+except ValueError:
+  print("Invalid input data.")
+  sys.exit(1)
+
+#compute the vector size from the first line
+vectorSize = len(data[0]) - 1 
 
 for i in range(0,vectorSize):
   

From 627c23d98f528a5681a44c0d05aca1cea4bfd79f Mon Sep 17 00:00:00 2001
From: HassanAmr <hassan.amr89@gmail.com>
Date: Mon, 30 Jan 2017 05:22:49 +0100
Subject: [PATCH 3/4] modified tool to work with newer versions of python

---
 preprocess.py | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/preprocess.py b/preprocess.py
index d842ab6..cf11589 100644
--- a/preprocess.py
+++ b/preprocess.py
@@ -6,24 +6,24 @@
 import sys
 import signal
 
-def butter_lowpass(cutoff, fs, order=5):
+def butter_lowpass(cutoff, fs, order):
   nyq = 0.5 * fs
   normal_cutoff = cutoff / nyq
   b, a = butter(order, normal_cutoff, btype='low', analog=False)
   return b, a
 
-def butter_highpass(cutoff, fs, order=5):
+def butter_highpass(cutoff, fs, order):
   nyq = 0.5 * fs
   normal_cutoff = cutoff / nyq
   b, a = butter(order, normal_cutoff, btype='high', analog=False)
   return b, a
 
-def butter_lowpass_filter(data, cutoff, fs, order=5):
+def butter_lowpass_filter(data, cutoff, fs, order):
   b, a = butter_lowpass(cutoff, fs, order=order)
   y = lfilter(b, a, data)
   return y
 
-def butter_highpass_filter(data, cutoff, fs, order=5):
+def butter_highpass_filter(data, cutoff, fs, order):
   b, a = butter_highpass(cutoff, fs, order=order)
   y = lfilter(b, a, data)
   return y
@@ -45,6 +45,8 @@ def butter_highpass_filter(data, cutoff, fs, order=5):
 
 args = parser.parse_args()
 
+#this is a default value in case the user doesn't provide it.
+order=5
 try: 
   order = int(args.order)
 except ValueError:
@@ -92,25 +94,31 @@ def butter_highpass_filter(data, cutoff, fs, order=5):
 
 #load data from stdin as string
 try:
-  data = np.loadtxt(sys.stdin, dtype=str)
+#  data = np.loadtxt(sys.stdin, dtype=str)
+  data = np.loadtxt(sys.stdin, dtype=bytes).astype(str)
  #loadtxt will throw an exception if the rows are not consistent.
 except ValueError:
   print("Invalid input data.")
   sys.exit(1)
 
+#take the label column and place it in the final array
+finalData = data[:,0]
+#take the remainder in a 2d float array
+floats = np.delete(data, 0, 1).astype(np.float)
+
 #compute the vector size from the first line
-vectorSize = len(data[0]) - 1 
+vectorSize = len(floats[0])
 
 for i in range(0,vectorSize):
   
-  #filter a column while skipping 1 (because of the preceding label) after converting it to float
+  #filter a column
   if filterType == "HPF":
-    filteredData = butter_highpass_filter(data[:, i + 1].astype(np.float), cutoff, fs, order)
+    filteredData = butter_highpass_filter(floats[:, i], cutoff, fs, order)
   elif filterType == "LPF":
-    filteredData = butter_lowpass_filter(data[:, i + 1].astype(np.float), cutoff, fs, order)
-  
-  #replace the column used as input to the filter with the filtered values
-  data[:, i + 1] = filteredData 
+    filteredData = butter_lowpass_filter(floats[:, i], cutoff, fs, order)
+
+  #stack each newly filtered array to the final array
+  finalData = np.column_stack((finalData, filteredData))
 
 #print to stdout
-print('\n'.join(['\t'.join([item for item in row]) for row in data]))
+print('\n'.join(['\t'.join([item for item in row]) for row in finalData]))

From cce484a9cdb9ba82deb0c78f34591ae7ac0dde49 Mon Sep 17 00:00:00 2001
From: HassanAmr <hassan.amr89@gmail.com>
Date: Mon, 30 Jan 2017 18:16:53 +0100
Subject: [PATCH 4/4] modified the code to use slice instead of delete

---
 preprocess.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/preprocess.py b/preprocess.py
index cf11589..a87d708 100644
--- a/preprocess.py
+++ b/preprocess.py
@@ -104,7 +104,7 @@ def butter_highpass_filter(data, cutoff, fs, order):
 #take the label column and place it in the final array
 finalData = data[:,0]
 #take the remainder in a 2d float array
-floats = np.delete(data, 0, 1).astype(np.float)
+floats = data[:,1:].astype(np.float)
 
 #compute the vector size from the first line
 vectorSize = len(floats[0])