Skip to content

Commit

Permalink
Merge pull request #23 from anshuman23/dev
Browse files Browse the repository at this point in the history
Added fast C based direct CSV-to-matrix functionality with options
  • Loading branch information
anshuman23 authored Jul 18, 2018
2 parents f9962d5 + bcd323d commit eabb7cd
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 0 deletions.
71 changes: 71 additions & 0 deletions c_src/Tensorflex.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ typedef union
} mx_t;

#define POS(MX, ROW, COL) ((MX)->data[(ROW)* (MX)->ncols + (COL)])
#define BUF_SIZE 500000

static int get_number(ErlNifEnv* env, ERL_NIF_TERM term, double* dp);
static Matrix* alloc_matrix(ErlNifEnv* env, unsigned nrows, unsigned ncols);
Expand Down Expand Up @@ -726,6 +727,75 @@ static ERL_NIF_TERM load_image_as_tensor(ErlNifEnv *env, int argc, const ERL_NIF

}

static ERL_NIF_TERM load_csv_as_matrix(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[])
{
ERL_NIF_TERM mat_ret;
ErlNifBinary filepath;
enif_inspect_binary(env,argv[0], &filepath);
char* file = enif_alloc(filepath.size+1);
memset(file, 0, filepath.size+1);
memcpy(file, (void *) filepath.data, filepath.size);
char buf_init[BUF_SIZE], buf[BUF_SIZE];
char *val_init, *line_init, *val, *line;

unsigned int header_atom_len;
enif_get_atom_length(env, argv[1], &header_atom_len, ERL_NIF_LATIN1);
char* header_atom = (char*)enif_alloc(header_atom_len + 1);
enif_get_atom(env, argv[1], header_atom, header_atom_len + 1, ERL_NIF_LATIN1);

ErlNifBinary delimiter;
enif_inspect_binary(env,argv[2], &delimiter);
char* delimiter_str = enif_alloc(delimiter.size+1);
memset(delimiter_str, 0, delimiter.size+1);
memcpy(delimiter_str, (void *) delimiter.data, delimiter.size);

FILE *f_init = fopen(file, "rb");
unsigned i = 0, j = 0;
while((line_init=fgets(buf_init,sizeof(buf_init),f_init))!=NULL) {
j = 0;
val_init = strtok(line_init,delimiter_str);
while(val_init != NULL) {
val_init = strtok(NULL,delimiter_str);
j++;
}
i++;
}
fclose(f_init);

int flag = 0;
if(strcmp(header_atom, "true") == 0) {
i--;
flag = 1;
}

mx_t mx;
mx.p = alloc_matrix(env, i, j);
FILE *f = fopen(file, "rb");
i = 0;
while((line=fgets(buf,sizeof(buf),f))!=NULL) {
j = 0;
val = strtok(line,delimiter_str);
while(val != NULL) {
if(flag == 0) {
POS(mx.p, i, j) = atof(val);
j++;
}
val = strtok(NULL,delimiter_str);
}

if(flag == 1){
flag = 0;
i--;
}
i++;
}
fclose(f);

mat_ret = enif_make_resource(env, mx.p);
enif_release_resource(mx.p);
return mat_ret;
}


static ErlNifFunc nif_funcs[] =
{
Expand All @@ -747,6 +817,7 @@ static ErlNifFunc nif_funcs[] =
{ "float32_tensor_alloc", 1, float32_tensor_alloc },
{ "run_session", 5, run_session },
{ "load_image_as_tensor", 1, load_image_as_tensor },
{ "load_csv_as_matrix", 3, load_csv_as_matrix },
};

ERL_NIF_INIT(Elixir.Tensorflex.NIFs, nif_funcs, res_loader, NULL, NULL, NULL)
Expand Down
4 changes: 4 additions & 0 deletions lib/nifs.ex
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ defmodule Tensorflex.NIFs do
raise "NIF load_image_as_tensor/1 not implemented"
end

def load_csv_as_matrix(_filepath, _header, _delimiter) do
raise "NIF load_csv_as_matrix/3 not implemented"
end

def run_session(_graph, _input_tensor, _output_tensor, _input_opname, _output_opname) do
raise "NIF run_session/5 not implemented"
end
Expand Down
22 changes: 22 additions & 0 deletions lib/tensorflex.ex
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,28 @@ defmodule Tensorflex do
{:ok, %Tensor{datatype: :tf_uint8, tensor: ref}}
end

def load_csv_as_matrix(filepath, opts \\ []) do
unless File.exists?(filepath) do
raise ArgumentError, "csv file does not exist"
end

unless (Path.extname(filepath) == ".csv") do
raise ArgumentError, "file is not a CSV file"
end

defaults = [header: :true, delimiter: ","]
opts = Keyword.merge(defaults, opts) |> Enum.into(%{})
%{header: header, delimiter: delimiter} = opts

if(header != :true and header != :false) do
raise ArgumentError, "header indicator atom must be either :true or :false"
end

ref = NIFs.load_csv_as_matrix(filepath, header, delimiter)
{nrows, ncols} = NIFs.size_of_matrix(ref)
%Matrix{nrows: nrows, ncols: ncols, data: ref}
end

def run_session(%Graph{def: graphdef, name: filepath}, %Tensor{datatype: input_datatype, tensor: input_ref}, %Tensor{datatype: output_datatype, tensor: output_ref}, input_opname, output_opname) do
NIFs.run_session(graphdef, input_ref, output_ref, input_opname, output_opname)
end
Expand Down
3 changes: 3 additions & 0 deletions test/sample1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
1,2,3,4,5
6,7,8,9,10
11,12,13,14,15
4 changes: 4 additions & 0 deletions test/sample2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
col1-col2-col3-col4
1-2-3-4
5-6-7-8
9-10-11-12

0 comments on commit eabb7cd

Please sign in to comment.