From 3354913e2d5eaeb9bb235645d22e0e1ec878e575 Mon Sep 17 00:00:00 2001 From: HassanAmr Date: Tue, 24 Jan 2017 20:27:54 +0100 Subject: [PATCH 1/4] replaced preprocess.cpp by a python equivelant that includes a High- and Low-Pass filter --- preprocess.cpp | 214 ------------------------------------------------- preprocess.py | 98 ++++++++++++++++++++++ 2 files changed, 98 insertions(+), 214 deletions(-) delete mode 100644 preprocess.cpp create mode 100644 preprocess.py diff --git a/preprocess.cpp b/preprocess.cpp deleted file mode 100644 index e678c38..0000000 --- a/preprocess.cpp +++ /dev/null @@ -1,214 +0,0 @@ -#include -#include -#include "cmdline.h" -#include "libgrt_util.h" - -using namespace GRT; -using namespace std; - -InfoLog info; -ErrorLog err; - -string list_preprocessors() { - stringstream ss; - - for( auto name : PreProcessing::getRegisteredPreprocessors() ) - ss << name << endl; - - return ss.str(); -} - -PreProcessing *apply_cmdline_args(string, cmdline::parser&,int,string&); - -int main(int argc, const char *argv[]) { - static bool is_running = true; - string input_file = "-"; - cmdline::parser c; - - c.add ("verbose", 'v', "verbosity level: 0-4", false, 0); - c.add ("help", 'h', "print this message"); - c.add("type", 't', "force classification, regression or timeseries input", false, "", cmdline::oneof("classification", "regression", "timeseries", "auto")); - c.footer (" [] "); - - /* parse common options */ - bool parse_ok = c.parse(argc,argv,false) && !c.exist("help"); - set_verbosity(c.get("verbose")); - - /* do we have a predictor? */ - string preproc_name = c.rest().size() > 0 ? c.rest()[0] : "list"; - if (preproc_name == "list") { - cout << c.usage() << endl; - cout << list_preprocessors(); - exit(0); - } - - PreProcessing *pp = apply_cmdline_args(preproc_name,c,1,input_file); - - if (pp==NULL) - exit(-1); - - if (!parse_ok) { - cerr << c.usage() << endl << c.error() << endl; - exit(-1); - } - - /* do we read from a file or from stdin-? */ - ifstream fin; fin.open(input_file); - istream &in = input_file=="-" ? cin : fin; - - string line; int linenum=0; - while(getline(in,line)) { - stringstream ss(line); - - if (line[0] == '#') { - cout << line << endl; - continue; - } - - if (line.size() == 0) { - cout << endl; - continue; - } - - try { string label; ss >> label; cout << label << "\t"; } - catch (exception &e) { /* unlabeled data */ } - - VectorFloat vals; double value; - while (ss >> value) - vals.push_back(value); - - if (linenum == 0) { - // weird stuff, pp resets only when initialized, it only initialized once - // data has been seen, and only set num outputdimenstion when reset so: - pp->setNumInputDimensions(vals.size()); - pp->process(VectorFloat(vals.size(), 1.)); - pp->reset(); - } - - bool ok = pp->process(vals); - if (!ok) { - cerr << "unable to process line " << linenum << endl; - exit(-1); - } - - for(auto value : pp->getProcessedData()) - cout << value << "\t"; - - cout << endl; - linenum++; - } -} - -PreProcessing *apply_cmdline_args(string type, cmdline::parser &c, int num_dimensions, string &input_file) { - PreProcessing *pp; - cmdline::parser p; - - if (type == "DeadZone") { - p.add("lower-limit", 'L', "lower limit for dead-zone", false, -.1); - p.add("upper-limit", 'U', "upper limit for dead-zone", false, .1); - } else if (type == "Derivative") { - p.add ("order", 'O', "derivative order of the filter", false, 1, cmdline::oneof(1,2)); - p.add("delta", 'D', "time between in sampleRate/1000.", false, 1); - p.add ("filter-size", 'F', "size of the filter, set to zero to disable", false, 3); - } else if (type == "DoubleMovingAverageFilter" || type == "MovingAverageFilter" || type == "MedianFilter") { - p.add ("filter-size", 'F', "size of the filter", false, 5); - } else if (type == "FIRFilter") { - p.add("filter-type", 'T', "filter type, one of LPF, HPF, BPF", false, "LPF", cmdline::oneof("LPF","HPF","BPF")); - p.add ("num-taps ", 'N', "number of filter taps", false, 50); - p.add("sample-duration", 'S', "sample rate of your data", true); - p.add("cutoff", 'C', "cutoff frequency of the filter", false, 10); - p.add("gain", 'G', "filter gain", false, 1); - } else if (type == "HighPassFilter") { - p.add("factor", 'F', "the smaller this value the more smoothing is done", false, .1); - p.add("gain", 'G', "multiplies filtered values by this value", false, 1); - p.add("cutoff", 'C', "set the cutoff frequency in Hz", false, 50), - p.add("sample-duration", 'R', "set the sample rate of your data, as 1/SR", true); - } else if (type == "LeakyIntegrator") { - p.add("leak-rate", 'L', "leak rate", false, 0.99, cmdline::range(0,1.)); - } else if (type == "LowPassFilter") { - p.add("factor", 'F', "the smaller this value the more smoothing is done", false, .1); - p.add("gain", 'G', "multiplies filtered values by this value", false, 1); - p.add("cutoff", 'C', "set the cutoff frequency in Hz", false, 50), - p.add("sample-duration", 'R', "set the sample rate of your data, as 1/SR", true); - } else if (type == "SavitzkyGolayFilter") { - p.add ("left-hand", 'L', "number of left-hand points for filter design", false, 10); - p.add ("right-hand", 'R', "number of right-hand points for filter design", false, 10); - p.add ("order", 'O', "derivative order of the filter", false, 0); - p.add ("smoothing-order", 'S', "smoothing order, must be one of 2 or 4", false, 2, cmdline::oneof(2,4)); - } else { - cout << c.usage() << endl; - cout << list_preprocessors() << endl; - cerr << "unable to load preprocessor " << type << endl; - return NULL; - } - - if (!p.parse(c.rest()) || c.exist("help")) { - cerr << c.usage() << endl << "pre processing options:" << endl << p.str_options() << endl << p.error() << endl; - exit(-1); - } - - if (type == "DeadZone") { - pp = new DeadZone( - p.get("lower-limit"), - p.get("upper-limit"), - num_dimensions); - } else if (type == "Derivative") { - pp = new Derivative( - p.get ("order"), - p.get("delta"), - num_dimensions, - p.get ("filter-size") != 0, - p.get ("filter-size")); - } else if (type == "DoubleMovingAverageFilter") { - pp = new DoubleMovingAverageFilter( - p.get ("filter-size"), - num_dimensions); - } else if (type == "FIRFilter") { - vector list = {"LPF","HPF","BPF"}; - pp = new FIRFilter( - find(list.begin(),list.end(),p.get("func")) - list.begin(), - p.get("num-taps"), - p.get("sample-duration"), - p.get("cutoff"), - p.get("gain"), - num_dimensions); - } else if (type == "HighPassFilter") { - pp = new HighPassFilter( - p.get("factor"), - p.get("gain"), - num_dimensions, - p.get("cutoff"), - p.get("sample-duration")); - } else if (type == "LeakyIntegrator") { - pp = new LeakyIntegrator( - p.get("leak-rate"), - num_dimensions); - } else if (type == "LowPassFilter") { - pp = new LowPassFilter( - p.get("factor"), - p.get("gain"), - num_dimensions, - p.get("cutoff"), - p.get("sample-duration")); - } else if (type == "MedianFilter") { - pp = new MedianFilter( - p.get("filter-size"), - num_dimensions); - } else if (type == "MovingAverageFilter") { - pp = new MovingAverageFilter( - p.get("filter-size"), - num_dimensions); - } else if(type == "SavitzkyGolayFilter") { - pp = new SavitzkyGolayFilter( - p.get("left-hand"), - p.get("right-hand"), - p.get("order"), - p.get("smoothing-order"), - num_dimensions); - } - - if (p.rest().size() > 0) - input_file = p.rest()[0]; - - return pp; -} diff --git a/preprocess.py b/preprocess.py new file mode 100644 index 0000000..3ccacc5 --- /dev/null +++ b/preprocess.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python +import numpy as np +from scipy.signal import butter, lfilter, freqz, medfilt +from optparse import OptionParser +import sys +import signal + +def butter_lowpass(cutoff, fs, order=5): + nyq = 0.5 * fs + normal_cutoff = cutoff / nyq + b, a = butter(order, normal_cutoff, btype='low', analog=False) + return b, a + +def butter_highpass(cutoff, fs, order=5): + nyq = 0.5 * fs + normal_cutoff = cutoff / nyq + b, a = butter(order, normal_cutoff, btype='high', analog=False) + return b, a + +def butter_lowpass_filter(data, cutoff, fs, order=5): + b, a = butter_lowpass(cutoff, fs, order=order) + y = lfilter(b, a, data) + return y + +def butter_highpass_filter(data, cutoff, fs, order=5): + b, a = butter_highpass(cutoff, fs, order=order) + y = lfilter(b, a, data) + return y + + +parser = OptionParser() +parser.add_option("-c", "--cutoff", dest="cutoff", + help="cutoff frequency", metavar="CUTOFF") +parser.add_option("-f", "--frequency", dest="fs", + help="The rate of incoming data", metavar="RATE") +parser.add_option("-n", "--size", dest="vectorSize", + help="The vector size of the incoming data", metavar="VECTOR_SIZE") +parser.add_option("-o", "--order", dest="order", default=5, + help="The filter's order", metavar="ORDER") +parser.add_option("-t", "--type", dest="filterType", default="HPF", + help="The type of filter to use, HPF (High-Pass Filter), or LPF (Low-Pass Filter).", metavar="FILTER_TYPE") + + +(options, args) = parser.parse_args() + +try: + order = int(float(options.order)) +except ValueError: + print("Wrong value for order entered.") + sys.exit() + +try: + vectorSize = int(float(options.vectorSize)) +except ValueError: + print("Wrong value for VECTOR_SIZE entered.") + sys.exit() + +try: + cutoff = float(options.cutoff) +except ValueError: + print("Wrong value for cutoff frequency entered.") + sys.exit() + +try: + fs = float(options.fs) +except ValueError: + print("Wrong value for RATE entered.") + sys.exit() + +filterType = options.filterType +filterType = filterType.upper() + +#if condition to check on the data entered, exit if soemthing is wrong or missing +if filterType not in ["LPF","HPF"]: + print("Wrong type of filter selected.") + sys.exit() + +if order < 1: + print("Invalid value for filter's order.") + sys.exit() + + +#load data from stdin as string +data = np.loadtxt(sys.stdin, dtype=str) + +for i in range(0,vectorSize): + + #filter a column while skipping 1 (because of the preceding label) after converting it to float + if filterType == "HPF": + filteredData = butter_highpass_filter(data[:, i + 1].astype(np.float), cutoff, fs, order) + elif filterType == "LPF": + filteredData = butter_lowpass_filter(data[:, i + 1].astype(np.float), cutoff, fs, order) + + #replace the column used as input to the filter with the filtered values + data[:, i + 1] = filteredData + +#print to stdout +print('\n'.join(['\t'.join([item for item in row]) for row in data])) From 05cfbff76d39a2040783bb0b5d248d67b1368c0e Mon Sep 17 00:00:00 2001 From: HassanAmr Date: Wed, 25 Jan 2017 19:34:05 +0100 Subject: [PATCH 2/4] added some validations and exceptions handling before using the filter --- preprocess.py | 72 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 27 deletions(-) diff --git a/preprocess.py b/preprocess.py index 3ccacc5..d842ab6 100644 --- a/preprocess.py +++ b/preprocess.py @@ -1,7 +1,8 @@ #!/usr/bin/env python import numpy as np from scipy.signal import butter, lfilter, freqz, medfilt -from optparse import OptionParser +#from optparse import OptionParser +import argparse import sys import signal @@ -28,60 +29,77 @@ def butter_highpass_filter(data, cutoff, fs, order=5): return y -parser = OptionParser() -parser.add_option("-c", "--cutoff", dest="cutoff", - help="cutoff frequency", metavar="CUTOFF") -parser.add_option("-f", "--frequency", dest="fs", - help="The rate of incoming data", metavar="RATE") -parser.add_option("-n", "--size", dest="vectorSize", - help="The vector size of the incoming data", metavar="VECTOR_SIZE") -parser.add_option("-o", "--order", dest="order", default=5, - help="The filter's order", metavar="ORDER") -parser.add_option("-t", "--type", dest="filterType", default="HPF", - help="The type of filter to use, HPF (High-Pass Filter), or LPF (Low-Pass Filter).", metavar="FILTER_TYPE") +parser = argparse.ArgumentParser() +parser.add_argument("-c", "--cutoff", dest="cutoff", + help="cutoff frequency", metavar="CUTOFF", type=float) +parser.add_argument("-f", "--frequency", dest="fs", + help="The rate of incoming data", metavar="RATE", type=float) +#comment the next block for now and compute it straight from input +#parser.add_argument("-n", "--size", dest="vectorSize", +# help="The vector size of the incoming data", metavar="VECTOR_SIZE", type=int) +parser.add_argument("-o", "--order", dest="order", default=5, + help="The filter's order (optional)", metavar="ORDER", type=int) +parser.add_argument("-t", "--type", dest="filterType", default="LPF", + help="The type of filter to use, HPF (High-Pass Filter), or LPF (Low-Pass Filter). The default is set to LPF.", metavar="FILTER_TYPE") -(options, args) = parser.parse_args() +args = parser.parse_args() try: - order = int(float(options.order)) + order = int(args.order) except ValueError: print("Wrong value for order entered.") - sys.exit() + sys.exit(1) -try: - vectorSize = int(float(options.vectorSize)) -except ValueError: - print("Wrong value for VECTOR_SIZE entered.") - sys.exit() +#comment the next block for now and compute it straight from input +#try: +# vectorSize = args.vectorSize +#except ValueError: +# print("Wrong value for VECTOR_SIZE entered.") +# sys.exit() try: - cutoff = float(options.cutoff) + cutoff = args.cutoff except ValueError: print("Wrong value for cutoff frequency entered.") sys.exit() try: - fs = float(options.fs) + fs = args.fs except ValueError: print("Wrong value for RATE entered.") - sys.exit() + sys.exit(1) -filterType = options.filterType +filterType = args.filterType filterType = filterType.upper() #if condition to check on the data entered, exit if soemthing is wrong or missing if filterType not in ["LPF","HPF"]: print("Wrong type of filter selected.") - sys.exit() + sys.exit(1) if order < 1: print("Invalid value for filter's order.") - sys.exit() + sys.exit(1) + +if fs <= 0: + print("Invalid value for data frequency.") + sys.exit(1) +if cutoff <= 0: + print("Invalid value for cutoff frequency.") + sys.exit(1) #load data from stdin as string -data = np.loadtxt(sys.stdin, dtype=str) +try: + data = np.loadtxt(sys.stdin, dtype=str) + #loadtxt will throw an exception if the rows are not consistent. +except ValueError: + print("Invalid input data.") + sys.exit(1) + +#compute the vector size from the first line +vectorSize = len(data[0]) - 1 for i in range(0,vectorSize): From 627c23d98f528a5681a44c0d05aca1cea4bfd79f Mon Sep 17 00:00:00 2001 From: HassanAmr Date: Mon, 30 Jan 2017 05:22:49 +0100 Subject: [PATCH 3/4] modified tool to work with newer versions of python --- preprocess.py | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/preprocess.py b/preprocess.py index d842ab6..cf11589 100644 --- a/preprocess.py +++ b/preprocess.py @@ -6,24 +6,24 @@ import sys import signal -def butter_lowpass(cutoff, fs, order=5): +def butter_lowpass(cutoff, fs, order): nyq = 0.5 * fs normal_cutoff = cutoff / nyq b, a = butter(order, normal_cutoff, btype='low', analog=False) return b, a -def butter_highpass(cutoff, fs, order=5): +def butter_highpass(cutoff, fs, order): nyq = 0.5 * fs normal_cutoff = cutoff / nyq b, a = butter(order, normal_cutoff, btype='high', analog=False) return b, a -def butter_lowpass_filter(data, cutoff, fs, order=5): +def butter_lowpass_filter(data, cutoff, fs, order): b, a = butter_lowpass(cutoff, fs, order=order) y = lfilter(b, a, data) return y -def butter_highpass_filter(data, cutoff, fs, order=5): +def butter_highpass_filter(data, cutoff, fs, order): b, a = butter_highpass(cutoff, fs, order=order) y = lfilter(b, a, data) return y @@ -45,6 +45,8 @@ def butter_highpass_filter(data, cutoff, fs, order=5): args = parser.parse_args() +#this is a default value in case the user doesn't provide it. +order=5 try: order = int(args.order) except ValueError: @@ -92,25 +94,31 @@ def butter_highpass_filter(data, cutoff, fs, order=5): #load data from stdin as string try: - data = np.loadtxt(sys.stdin, dtype=str) +# data = np.loadtxt(sys.stdin, dtype=str) + data = np.loadtxt(sys.stdin, dtype=bytes).astype(str) #loadtxt will throw an exception if the rows are not consistent. except ValueError: print("Invalid input data.") sys.exit(1) +#take the label column and place it in the final array +finalData = data[:,0] +#take the remainder in a 2d float array +floats = np.delete(data, 0, 1).astype(np.float) + #compute the vector size from the first line -vectorSize = len(data[0]) - 1 +vectorSize = len(floats[0]) for i in range(0,vectorSize): - #filter a column while skipping 1 (because of the preceding label) after converting it to float + #filter a column if filterType == "HPF": - filteredData = butter_highpass_filter(data[:, i + 1].astype(np.float), cutoff, fs, order) + filteredData = butter_highpass_filter(floats[:, i], cutoff, fs, order) elif filterType == "LPF": - filteredData = butter_lowpass_filter(data[:, i + 1].astype(np.float), cutoff, fs, order) - - #replace the column used as input to the filter with the filtered values - data[:, i + 1] = filteredData + filteredData = butter_lowpass_filter(floats[:, i], cutoff, fs, order) + + #stack each newly filtered array to the final array + finalData = np.column_stack((finalData, filteredData)) #print to stdout -print('\n'.join(['\t'.join([item for item in row]) for row in data])) +print('\n'.join(['\t'.join([item for item in row]) for row in finalData])) From cce484a9cdb9ba82deb0c78f34591ae7ac0dde49 Mon Sep 17 00:00:00 2001 From: HassanAmr Date: Mon, 30 Jan 2017 18:16:53 +0100 Subject: [PATCH 4/4] modified the code to use slice instead of delete --- preprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/preprocess.py b/preprocess.py index cf11589..a87d708 100644 --- a/preprocess.py +++ b/preprocess.py @@ -104,7 +104,7 @@ def butter_highpass_filter(data, cutoff, fs, order): #take the label column and place it in the final array finalData = data[:,0] #take the remainder in a 2d float array -floats = np.delete(data, 0, 1).astype(np.float) +floats = data[:,1:].astype(np.float) #compute the vector size from the first line vectorSize = len(floats[0])