JuliaData · cjprybol · Feb 14, 2017 · Feb 15, 2017 · Mar 17, 2017 · Mar 17, 2017
diff --git a/REQUIRE b/REQUIRE
@@ -1,5 +1,5 @@
 julia 0.5
 Compat 0.10.0
 DataStreams 0.1.0
-DataFrames
+DataTables
 WeakRefStrings 0.1.3
diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl
@@ -50,9 +50,9 @@ TYPES = !is_windows() ? (Int, Float64, WeakRefString{UInt8}, String, Date, DateT
     end
 
     @benchgroup "CSV.write" begin
-        df = CSV.read(FILE)
+        dt = CSV.read(FILE)
         t = tempname()
-        @bench "CSV.write" CSV.write(t, df)
+        @bench "CSV.write" CSV.write(t, dt)
     end
 
 end
diff --git a/src/CSV.jl b/src/CSV.jl
@@ -1,9 +1,9 @@
 __precompile__(true)
 module CSV
 
-using Compat, DataStreams, DataFrames, WeakRefStrings
+using Compat, DataStreams, DataTables, WeakRefStrings
 
-export Data, DataFrame
+export Data, DataTable
 
 immutable CSVError <: Exception
     msg::String
@@ -87,11 +87,11 @@ keyword arguments, see the docs for [`CSV.read`](@ref) or type `?CSV.read` at th
 
 An example of re-using a `CSV.Source` is:
 ```julia
-# manually construct a `CSV.Source` once, then stream its data to both a DataFrame
+# manually construct a `CSV.Source` once, then stream its data to both a DataTable
 # and SQLite table `sqlite_table` in the SQLite database `db`
 # note the use of `CSV.reset!` to ensure the `source` can be streamed from again
 source = CSV.Source(file)
-df1 = CSV.read(source, DataFrame)
+dt1 = CSV.read(source, DataTable)
 CSV.reset!(source)
 sq1 = CSV.read(source, SQLite.Sink, db, "sqlite_table")
 ```
@@ -123,11 +123,11 @@ keyword arguments, see the docs for [`CSV.write`](@ref) or type `?CSV.write` at
 
 An example of re-using a `CSV.Sink` is:
 ```julia
-# manually construct a `CSV.Source` once, then stream its data to both a DataFrame
+# manually construct a `CSV.Source` once, then stream its data to both a DataTable
 # and SQLite table `sqlite_table` in the SQLite database `db`
 # note the use of `CSV.reset!` to ensure the `source` can be streamed from again
 source = CSV.Source(file)
-df1 = CSV.read(source, DataFrame)
+dt1 = CSV.read(source, DataTable)
 CSV.reset!(source)
 sq1 = CSV.read(source, SQLite.Sink, db, "sqlite_table")
 ```

diff --git a/src/Sink.jl b/src/Sink.jl
@@ -102,26 +102,26 @@ Keyword Arguments:
 
 A few example invocations include:
 ```julia
-# write out a DataFrame `df` to a file name "out.csv" with all defaults, including comma as delimiter
-CSV.write("out.csv", df)
+# write out a DataTable `dt` to a file name "out.csv" with all defaults, including comma as delimiter
+CSV.write("out.csv", dt)
 
-# write out a DataFrame, this time as a tab-delimited file
-CSV.write("out.csv", df; delim='\t')
+# write out a DataTable, this time as a tab-delimited file
+CSV.write("out.csv", dt; delim='\t')
 
-# write out a DataFrame, with null values represented by the string "NA"
-CSV.write("out.csv", df; null="NA")
+# write out a DataTable, with null values represented by the string "NA"
+CSV.write("out.csv", dt; null="NA")
 
 # write out a "header-less" file, with actual data starting on row 1
-CSV.write("out.csv", df; header=false)
+CSV.write("out.csv", dt; header=false)
 
-# write out a DataFrame `df` twice to a file, the resulting file with have twice the # of rows as the DataFrame
+# write out a DataTable `dt` twice to a file, the resulting file with have twice the # of rows as the DataTable
 # note the usage of the keyword argument `append=true` in the 2nd call
-CSV.write("out.csv", df)
-CSV.write("out.csv", df; append=true)
+CSV.write("out.csv", dt)
+CSV.write("out.csv", dt; append=true)
 
-# write a DataFrame out to an IOBuffer instead of a file
+# write a DataTable out to an IOBuffer instead of a file
 io = IOBuffer
-CSV.write(io, df)
+CSV.write(io, dt)
 
 # write the result of an SQLite query out to a comma-delimited file
 db = SQLite.DB()

diff --git a/src/Source.jl b/src/Source.jl
@@ -10,7 +10,7 @@ function Source(fullpath::Union{AbstractString,IO};
               datarow::Int=-1, # by default, data starts immediately after header or start of file
               types::Union{Dict{Int,DataType},Dict{String,DataType},Vector{DataType}}=DataType[],
               nullable::Bool=true,
-              weakrefstrings::Bool=true,
+              weakrefstrings::Bool=false,
               dateformat::Union{AbstractString,Dates.DateFormat}=Dates.ISODateFormat,
 
               footerskip::Int=0,
@@ -38,7 +38,7 @@ function Source(;fullpath::Union{AbstractString,IO}="",
                 datarow::Int=-1, # by default, data starts immediately after header or start of file
                 types::Union{Dict{Int,DataType},Dict{String,DataType},Vector{DataType}}=DataType[],
                 nullable::Bool=true,
-                weakrefstrings::Bool=true,
+                weakrefstrings::Bool=false,
 
                 footerskip::Int=0,
                 rows_for_type_detect::Int=100,
@@ -194,17 +194,17 @@ Data.streamfrom(source::CSV.Source, ::Type{Data.Field}, ::Type{Nullable{WeakRefS
 Data.reference(source::CSV.Source) = source.io.data
 
 """
-`CSV.read(fullpath::Union{AbstractString,IO}, sink::Type{T}=DataFrame, args...; kwargs...)` => `typeof(sink)`
+`CSV.read(fullpath::Union{AbstractString,IO}, sink::Type{T}=DataTable, args...; kwargs...)` => `typeof(sink)`
 
 `CSV.read(fullpath::Union{AbstractString,IO}, sink::Data.Sink; kwargs...)` => `Data.Sink`
 
 
-parses a delimited file into a Julia structure (a DataFrame by default, but any valid `Data.Sink` may be requested).
+parses a delimited file into a Julia structure (a DataTable by default, but any valid `Data.Sink` may be requested).
 
 Positional arguments:
 
 * `fullpath`; can be a file name (string) or other `IO` instance
-* `sink::Type{T}`; `DataFrame` by default, but may also be other `Data.Sink` types that support streaming via `Data.Field` interface; note that the method argument can be the *type* of `Data.Sink`, plus any required arguments the sink may need (`args...`).
+* `sink::Type{T}`; `DataTable` by default, but may also be other `Data.Sink` types that support streaming via `Data.Field` interface; note that the method argument can be the *type* of `Data.Sink`, plus any required arguments the sink may need (`args...`).
                     or an already constructed `sink` may be passed (2nd method above)
 
 Keyword Arguments:
@@ -217,7 +217,7 @@ Keyword Arguments:
 * `datarow::Int`; specifies the row on which the actual data starts in the file; by default, the data is expected on the next row after the header row(s); for a file without column names (header), specify `datarow=1`
 * `types`; column types can be provided manually as a complete Vector{DataType}, or in a Dict to reference individual columns by name or number
 * `nullable::Bool`; indicates whether values can be nullable or not; `true` by default. If set to `false` and missing values are encountered, a `NullException` will be thrown
-* `weakrefstrings::Bool=true`: indicates whether string-type columns should use the `WeakRefString` (for efficiency) or a regular `String` type
+* `weakrefstrings::Bool=false`: indicates whether string-type columns should use the `WeakRefString` (for efficiency) or a regular `String` type
 * `dateformat::Union{AbstractString,Dates.DateFormat}`; how all dates/datetimes in the dataset are formatted
 * `footerskip::Int`; indicates the number of rows to skip at the end of the file
 * `rows_for_type_detect::Int=100`; indicates how many rows should be read to infer the types of columns
@@ -233,7 +233,7 @@ Oftentimes, however, it can be convenient to work with `WeakRefStrings` dependin
 Example usage:
 ```
 julia> dt = CSV.read("bids.csv")
-7656334×9 DataFrames.DataFrame
+7656334×9 DataTables.DataTable
 │ Row     │ bid_id  │ bidder_id                               │ auction │ merchandise      │ device      │
 ├─────────┼─────────┼─────────────────────────────────────────┼─────────┼──────────────────┼─────────────┤
 │ 1       │ 0       │ "8dac2b259fd1c6d1120e519fb1ac14fbqvax8" │ "ewmzr" │ "jewelry"        │ "phone0"    │
@@ -269,25 +269,25 @@ CSV.read(file; types=Dict("col3"=>Float64, "col6"=>String))
 # this is also a way to limit the # of rows to be read in a file if only a sample is needed
 CSV.read(file; rows=10000)
 
-# for data files, `file` and `file2`, with the same structure, read both into a single DataFrame
-# note that `df` is used as a 2nd argument in the 2nd call to `CSV.read` and the keyword argument
+# for data files, `file` and `file2`, with the same structure, read both into a single DataTable
+# note that `dt` is used as a 2nd argument in the 2nd call to `CSV.read` and the keyword argument
 # `append=true` is passed
-df = CSV.read(file)
-df = CSV.read(file2, df; append=true)
+dt = CSV.read(file)
+dt = CSV.read(file2, dt; append=true)
 
-# manually construct a `CSV.Source` once, then stream its data to both a DataFrame
+# manually construct a `CSV.Source` once, then stream its data to both a DataTable
 # and SQLite table `sqlite_table` in the SQLite database `db`
 # note the use of `CSV.reset!` to ensure the `source` can be streamed from again
 source = CSV.Source(file)
-df1 = CSV.read(source, DataFrame)
+dt1 = CSV.read(source, DataTable)
 CSV.reset!(source)
 db = SQLite.DB()
 sq1 = CSV.read(source, SQLite.Sink, db, "sqlite_table")
 ```
 """
 function read end
 
-function read(fullpath::Union{AbstractString,IO}, sink=DataFrame, args...; append::Bool=false, transforms::Dict=Dict{Int,Function}(), kwargs...)
+function read(fullpath::Union{AbstractString,IO}, sink=DataTable, args...; append::Bool=false, transforms::Dict=Dict{Int,Function}(), kwargs...)
     source = Source(fullpath; kwargs...)
     sink = Data.stream!(source, sink, append, transforms, args...)
     Data.close!(sink)
@@ -301,5 +301,5 @@ function read{T}(fullpath::Union{AbstractString,IO}, sink::T; append::Bool=false
     return sink
 end
 
-read(source::CSV.Source, sink=DataFrame, args...; append::Bool=false, transforms::Dict=Dict{Int,Function}()) = (sink = Data.stream!(source, sink, append, transforms, args...); Data.close!(sink); return sink)
+read(source::CSV.Source, sink=DataTable, args...; append::Bool=false, transforms::Dict=Dict{Int,Function}()) = (sink = Data.stream!(source, sink, append, transforms, args...); Data.close!(sink); return sink)
 read{T}(source::CSV.Source, sink::T; append::Bool=false, transforms::Dict=Dict{Int,Function}()) = (sink = Data.stream!(source, sink, append, transforms); Data.close!(sink); return sink)
diff --git a/src/io.jl b/src/io.jl
@@ -125,8 +125,8 @@ end
 immutable NullField end
 
 # try to infer the type of the value in `val`. The precedence of type checking is `Int` => `Float64` => `Date` => `DateTime` => `String`
-slottype{T}(df::Dates.Slot{T}) = T
-timetype(df::Dates.DateFormat) = any(slottype(T) in (Dates.Hour,Dates.Minute,Dates.Second,Dates.Millisecond) for T in df.slots) ? DateTime : Date
+slottype{T}(dt::Dates.Slot{T}) = T
+timetype(dt::Dates.DateFormat) = any(slottype(T) in (Dates.Hour,Dates.Minute,Dates.Second,Dates.Millisecond) for T in dt.slots) ? DateTime : Date
 
 function detecttype(val::AbstractString, format, datecheck, null)
     (val == "" || val == null) && return NullField

diff --git a/test/datastreams.jl b/test/datastreams.jl
@@ -1,17 +1,17 @@
 
-# DataFrames
+# DataTables
 FILE = joinpath(DSTESTDIR, "randoms_small.csv")
-DF = CSV.read(FILE)
-DF2 = CSV.read(FILE)
-dfsource = Tester("DataFrame", x->x, false, DataFrame, (:DF,), scalartransforms, vectortransforms, x->x, x->nothing)
-dfsink = Tester("DataFrame", x->x, false, DataFrame, (:DF2,), scalartransforms, vectortransforms, x->x, x->nothing)
-function DataFrames.DataFrame(sym::Symbol; append::Bool=false)
+DT = CSV.read(FILE)
+DT2 = CSV.read(FILE)
+dtsource = Tester("DataTable", x->x, false, DataTable, (:DT,), scalartransforms, vectortransforms, x->x, x->nothing)
+dtsink = Tester("DataTable", x->x, false, DataTable, (:DT2,), scalartransforms, vectortransforms, x->x, x->nothing)
+function DataTables.DataTable(sym::Symbol; append::Bool=false)
     return @eval $sym
 end
-function DataFrames.DataFrame(sch::Data.Schema, ::Type{Data.Field}, append::Bool, ref::Vector{UInt8}, sym::Symbol)
-    return DataFrame(DataFrame(sym), sch, Data.Field, append, ref)
+function DataTables.DataTable(sch::Data.Schema, ::Type{Data.Field}, append::Bool, ref::Vector{UInt8}, sym::Symbol)
+    return DataTable(DataTable(sym), sch, Data.Field, append, ref)
 end
-function DataFrame(sink, sch::Data.Schema, ::Type{Data.Field}, append::Bool, ref::Vector{UInt8})
+function DataTable(sink, sch::Data.Schema, ::Type{Data.Field}, append::Bool, ref::Vector{UInt8})
     rows, cols = size(sch)
     newsize = max(0, rows + (append ? size(sink, 1) : 0))
     # need to make sure we don't break a NullableVector{WeakRefString{UInt8}} when appending
@@ -46,4 +46,4 @@ FILE2 = joinpath(DSTESTDIR, "randoms2_small.csv")
 csvsource = Tester("CSV.Source", CSV.read, true, CSV.Source, (FILE,), scalartransforms, vectortransforms, x->x, x->nothing)
 csvsink = Tester("CSV.Sink", CSV.write, true, CSV.Sink, (FILE2,), scalartransforms, vectortransforms, x->CSV.read(FILE2; use_mmap=false), x->rm(FILE2))
 
-DataStreamsIntegrationTests.teststream([dfsource, csvsource], [dfsink, csvsink]; rows=99)
+DataStreamsIntegrationTests.teststream([dtsource, csvsource], [dtsink, csvsink]; rows=99)
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,5 +1,5 @@
 using CSV
-using Base.Test, DataStreams, DataFrames, NullableArrays, WeakRefStrings, Libz, DecFP
+using Base.Test, DataStreams, DataTables, NullableArrays, WeakRefStrings, Libz, DecFP
 
 include("parsefields.jl")
 include("io.jl")