From 8db28216332d48075e5571671cb70cdf69a81a17 Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Fri, 10 Mar 2017 10:51:19 -0800
Subject: [PATCH 01/43] add changes

---
 src/DataTables.jl                          |   4 +
 src/abstractdatatable/abstractdatatable.jl | 248 +++++++++++++++------
 src/abstractdatatable/io.jl                |  17 +-
 src/abstractdatatable/join.jl              |  22 +-
 src/abstractdatatable/reshape.jl           |  18 +-
 src/datatable/datatable.jl                 | 140 ++++--------
 src/groupeddatatable/grouping.jl           |   2 +-
 test/cat.jl                                |  45 ++--
 test/constructors.jl                       |  67 +++++-
 test/conversions.jl                        |  22 +-
 test/data.jl                               |  44 ++--
 test/datatable.jl                          | 133 +++++------
 test/grouping.jl                           |  63 ++++--
 test/index.jl                              |   2 +-
 test/iteration.jl                          |  14 +-
 test/join.jl                               |  16 +-
 16 files changed, 462 insertions(+), 395 deletions(-)

diff --git a/src/DataTables.jl b/src/DataTables.jl
index e69a70b..799f7f6 100644
--- a/src/DataTables.jl
+++ b/src/DataTables.jl
@@ -47,6 +47,8 @@ export @~,
        combine,
        completecases,
        deleterows!,
+       denullify!,
+       denullify,
        describe,
        dropnull,
        dropnull!,
@@ -61,6 +63,8 @@ export @~,
        nonunique,
        nrow,
        nullable!,
+       nullify!,
+       nullify,
        order,
        printtable,
        rename!,
diff --git a/src/abstractdatatable/abstractdatatable.jl b/src/abstractdatatable/abstractdatatable.jl
index a885136..ef98fd5 100644
--- a/src/abstractdatatable/abstractdatatable.jl
+++ b/src/abstractdatatable/abstractdatatable.jl
@@ -31,6 +31,10 @@ The following are normally implemented for AbstractDataTables:
 * [`nonunique`](@ref) : indexes of duplicate rows
 * [`unique!`](@ref) : remove duplicate rows
 * `similar` : a DataTable with similar columns as `d`
+* `denullify` : unwrap `Nullable` columns
+* `denullify!` : unwrap `Nullable` columns in-place
+* `nullify` : convert all columns to NullableArrays
+* `nullify!` : convert all columns to NullableArrays in-place
 
 **Indexing**
 
@@ -711,78 +715,23 @@ Base.hcat(dt1::AbstractDataTable, dt2::AbstractDataTable, dtn::AbstractDataTable
 
 Base.vcat(dt::AbstractDataTable) = dt
 
-Base.vcat(dts::AbstractDataTable...) = vcat(AbstractDataTable[dts...])
-
-function Base.vcat{T<:AbstractDataTable}(dts::Vector{T})
+function Base.vcat(dts::AbstractDataTable...)
     isempty(dts) && return DataTable()
-    coltyps, colnams, similars = _colinfo(dts)
-
-    res = DataTable()
-    Nrow = sum(nrow, dts)
-    for j in 1:length(colnams)
-        colnam = colnams[j]
-        col = similar(similars[j], coltyps[j], Nrow)
-
-        i = 1
-        for dt in dts
-            if haskey(dt, colnam)
-                copy!(col, i, dt[colnam])
-            end
-            i += size(dt, 1)
-        end
-
-        res[colnam] = col
-    end
-    res
-end
-
-_isnullable{T}(::AbstractArray{T}) = T <: Nullable
-const EMPTY_DATA = NullableArray(Void, 0)
-
-function _colinfo{T<:AbstractDataTable}(dts::Vector{T})
-    dt1 = dts[1]
-    colindex = copy(index(dt1))
-    coltyps = eltypes(dt1)
-    similars = collect(columns(dt1))
-    nonnull_ct = Int[_isnullable(c) for c in columns(dt1)]
-
-    for i in 2:length(dts)
-        dt = dts[i]
-        for j in 1:size(dt, 2)
-            col = dt[j]
-            cn, ct = _names(dt)[j], eltype(col)
-            if haskey(colindex, cn)
-                idx = colindex[cn]
-
-                oldtyp = coltyps[idx]
-                if !(ct <: oldtyp)
-                    coltyps[idx] = promote_type(oldtyp, ct)
-                    # Needed on Julia 0.4 since e.g.
-                    # promote_type(Nullable{Int}, Nullable{Float64}) gives Nullable{T},
-                    # which is not a usable type: fall back to Nullable{Any}
-                    if VERSION < v"0.5.0-dev" &&
-                       coltyps[idx] <: Nullable && !isa(coltyps[idx].types[2], DataType)
-                        coltyps[idx] = Nullable{Any}
-                    end
-                end
-                nonnull_ct[idx] += !_isnullable(col)
-            else # new column
-                push!(colindex, cn)
-                push!(coltyps, ct)
-                push!(similars, col)
-                push!(nonnull_ct, !_isnullable(col))
-            end
-        end
-    end
-
-    for j in 1:length(colindex)
-        if nonnull_ct[j] < length(dts) && !_isnullable(similars[j])
-            similars[j] = EMPTY_DATA
-        end
+    allheaders = map(names, dts)
+    # don't vcat empty DataTables
+    notempty = find(x -> length(x) > 0, allheaders)
+    uniqueheaders = unique(allheaders[notempty])
+    if length(uniqueheaders) == 0
+        return DataTable()
+    elseif length(unique(map(length, uniqueheaders))) > 1
+        throw(ArgumentError("not all DataTables have the same number of columns. Resolve column(s): $(setdiff(union(allheaders...), intersect(allheaders...)))"))
+    elseif length(uniqueheaders) > 1
+        throw(ArgumentError("Column names do not match. Use `rename!` or `names!` to adjust columns names. Resolve column(s): $(setdiff(union(allheaders...), intersect(allheaders...)))"))
+    else
+        header = uniqueheaders[1]
+        dts_to_vcat = dts[notempty]
+        return DataTable(Any[vcat(map(dt -> dt[col], dts_to_vcat)...) for col in header], header)
     end
-    colnams = _names(colindex)
-
-    coltyps, colnams, similars
 end
 
 ##############################################################################
@@ -801,6 +750,165 @@ function Base.hash(dt::AbstractDataTable)
     return @compat UInt(h)
 end
 
+"""
+    denullify!(dt::AbstractDataTable)
+
+Convert columns with a `Nullable` element type without any null values
+to a non-`Nullable` equivalent array type. The table `dt` is modified in place.
+
+# Examples
+
+```jldoctest
+julia> dt = DataTable(A = NullableArray(1:3), B = [Nullable(i) for i=1:3])
+3×2 DataTables.DataTable
+│ Row │ A │ B │
+├─────┼───┼───┤
+│ 1   │ 1 │ 1 │
+│ 2   │ 2 │ 2 │
+│ 3   │ 3 │ 3 │
+
+julia> eltypes(dt)
+2-element Array{Type,1}:
+ Nullable{Int64}
+ Nullable{Int64}
+
+julia> eltypes(denullify!(dt))
+2-element Array{Type,1}:
+ Int64
+ Int64
+
+julia> eltypes(dt)
+2-element Array{Type,1}:
+ Int64
+ Int64
+```
+
+See also [`denullify`](@ref) & [`nullify!`](@ref).
+"""
+function denullify!(dt::AbstractDataTable)
+    for i in 1:size(dt,2)
+        if !anynull(dt[i])
+            dt[i] = dropnull(dt[i])
+        end
+    end
+    dt
+end
+
+"""
+    denullify(dt::AbstractDataTable)
+
+Return a copy of `dt` where columns with a `Nullable` element type without any
+null values have been converted to a non-`Nullable` equivalent array type.
+
+# Examples
+
+```jldoctest
+julia> dt = DataTable(A = NullableArray(1:3), B = [Nullable(i) for i=1:3])
+3×2 DataTables.DataTable
+│ Row │ A │ B │
+├─────┼───┼───┤
+│ 1   │ 1 │ 1 │
+│ 2   │ 2 │ 2 │
+│ 3   │ 3 │ 3 │
+
+julia> eltypes(dt)
+2-element Array{Type,1}:
+ Nullable{Int64}
+ Nullable{Int64}
+
+julia> eltypes(denullify(dt))
+2-element Array{Type,1}:
+ Int64
+ Int64
+
+julia> eltypes(dt)
+2-element Array{Type,1}:
+ Nullable{Int64}
+ Nullable{Int64}
+```
+
+See also [`denullify!`] & [`nullify`](@ref).
+"""
+denullify(dt::AbstractDataTable) = denullify!(copy(dt))
+
+"""
+    nullify!(dt::AbstractDataTable)
+
+Convert all columns of `dt` to nullable arrays. The table `dt` is modified in place.
+
+# Examples
+
+```jldoctest
+julia> dt = DataTable(A = 1:3, B = 1:3)
+3×2 DataTables.DataTable
+│ Row │ A │ B │
+├─────┼───┼───┤
+│ 1   │ 1 │ 1 │
+│ 2   │ 2 │ 2 │
+│ 3   │ 3 │ 3 │
+
+julia> eltypes(dt)
+2-element Array{Type,1}:
+ Int64
+ Int64
+
+julia> eltypes(nullify!(dt))
+2-element Array{Type,1}:
+ Nullable{Int64}
+ Nullable{Int64}
+
+julia> eltypes(dt)
+2-element Array{Type,1}:
+ Nullable{Int64}
+ Nullable{Int64}
+```
+
+See also [`nullify`](@ref) & [`denullify!`](@ref).
+"""
+function nullify!(dt::AbstractDataTable)
+    for i in 1:size(dt,2)
+        dt[i] = NullableArray(dt[i])
+    end
+    dt
+end
+
+"""
+    nullify(dt::AbstractDataTable)
+
+Return a copy of `dt` with all columns converted to nullable arrays.
+
+# Examples
+
+```jldoctest
+julia> dt = DataTable(A = 1:3, B = 1:3)
+3×2 DataTables.DataTable
+│ Row │ A │ B │
+├─────┼───┼───┤
+│ 1   │ 1 │ 1 │
+│ 2   │ 2 │ 2 │
+│ 3   │ 3 │ 3 │
+
+julia> eltypes(dt)
+2-element Array{Type,1}:
+ Int64
+ Int64
+
+julia> eltypes(nullify(dt))
+2-element Array{Type,1}:
+ Nullable{Int64}
+ Nullable{Int64}
+
+julia> eltypes(dt)
+2-element Array{Type,1}:
+ Int64
+ Int64
+```
+
+See also [`nullify!`](@ref) & [`denullify`](@ref).
+"""
+function nullify(dt::AbstractDataTable)
+    nullify!(copy(dt))
+end
 
 ## Documentation for methods defined elsewhere
 
diff --git a/src/abstractdatatable/io.jl b/src/abstractdatatable/io.jl
index 8ec11a4..7d14196 100644
--- a/src/abstractdatatable/io.jl
+++ b/src/abstractdatatable/io.jl
@@ -42,19 +42,20 @@ function printtable(io::IO,
     quotestr = string(quotemark)
     for i in 1:n
         for j in 1:p
-            if !isnull(dt[j],i)
+            if !isnull(dt[j][i])
                 if ! (etypes[j] <: Real)
-		    print(io, quotemark)
-		    escapedprint(io, get(dt[i, j]), quotestr)
-		    print(io, quotemark)
+                    print(io, quotemark)
+                    x = isa(dt[i, j], Nullable) ? get(dt[i, j]) : dt[i, j]
+                    escapedprint(io, x, quotestr)
+                    print(io, quotemark)
                 else
-		    print(io, dt[i, j])
+                    print(io, dt[i, j])
                 end
             else
-		print(io, nastring)
+                print(io, nastring)
             end
             if j < p
-		print(io, separator)
+                print(io, separator)
             else
                 print(io, '\n')
             end
@@ -167,7 +168,7 @@ function Base.show(io::IO, ::MIME"text/latex", dt::AbstractDataTable)
             write(io, " & ")
             cell = dt[row,col]
             if !isnull(cell)
-                content = get(cell)
+                content = isa(cell, Nullable) ? get(cell) : cell
                 if mimewritable(MIME("text/latex"), content)
                     show(io, MIME("text/latex"), content)
                 else
diff --git a/src/abstractdatatable/join.jl b/src/abstractdatatable/join.jl
index 1ad170b..94e9f1d 100644
--- a/src/abstractdatatable/join.jl
+++ b/src/abstractdatatable/join.jl
@@ -2,19 +2,6 @@
 ## Join / merge
 ##
 
-# Like similar, but returns a nullable array
-similar_nullable{T}(dv::AbstractArray{T}, dims::@compat(Union{Int, Tuple{Vararg{Int}}})) =
-    NullableArray(T, dims)
-
-similar_nullable{T<:Nullable}(dv::AbstractArray{T}, dims::@compat(Union{Int, Tuple{Vararg{Int}}})) =
-    NullableArray(eltype(T), dims)
-
-similar_nullable{T,R}(dv::CategoricalArray{T,R}, dims::@compat(Union{Int, Tuple{Vararg{Int}}})) =
-    NullableCategoricalArray(T, dims)
-
-similar_nullable(dt::AbstractDataTable, dims::Int) =
-    DataTable(Any[similar_nullable(x, dims) for x in columns(dt)], copy(index(dt)))
-
 # helper structure for DataTables joining
 immutable DataTableJoiner{DT1<:AbstractDataTable, DT2<:AbstractDataTable}
     dtl::DT1
@@ -76,9 +63,12 @@ function compose_joined_table(joiner::DataTableJoiner,
         right_perm[vcat(right_ixs.join, leftonly_ixs.join)] = right_perm[1:ril+loil]
     end
     all_orig_right_ixs = vcat(right_ixs.orig, rightonly_ixs.orig)
-    right_dt = DataTable(Any[resize!(col[all_orig_right_ixs], length(all_orig_right_ixs)+loil)[right_perm]
-                             for col in columns(dtr_noon)],
-                         names(dtr_noon))
+    resizelen = length(all_orig_right_ixs)+length(leftonly_ixs)
+    rightcols = Any[length(col[all_orig_right_ixs]) >= resizelen ?
+                               resize!(col[all_orig_right_ixs], resizelen)[right_perm] :
+                               NullableArray(vcat(col[all_orig_right_ixs], fill(Nullable(), resizelen - length(col[all_orig_right_ixs]))))[right_perm]
+                    for col in columns(dtr_noon)]
+    right_dt = DataTable(rightcols, names(dtr_noon))
     # merge left and right parts of the joined table
     res = hcat!(left_dt, right_dt)
 
diff --git a/src/abstractdatatable/reshape.jl b/src/abstractdatatable/reshape.jl
index ed4d519..60fb485 100644
--- a/src/abstractdatatable/reshape.jl
+++ b/src/abstractdatatable/reshape.jl
@@ -202,21 +202,16 @@ function unstack(dt::AbstractDataTable, rowkey::Int, colkey::Int, value::Int)
     if T <: Nullable
         T = eltype(T)
     end
-    payload = DataTable(Any[NullableArray(T, Nrow) for i in 1:Ncol],
+    payload = DataTable(Any[NullableVector{T}(Nrow) for i in 1:Ncol],
                         map(Symbol, levels(keycol)))
-    nowarning = true
     for k in 1:nrow(dt)
         j = Int(CategoricalArrays.order(keycol.pool)[keycol.refs[k]])
         i = Int(CategoricalArrays.order(refkeycol.pool)[refkeycol.refs[k]])
         if i > 0 && j > 0
-            if nowarning && !isnull(payload[j][i])
-                warn("Duplicate entries in unstack.")
-                nowarning = false
-            end
             payload[j][i]  = valuecol[k]
         end
     end
-    insert!(payload, 1, NullableArray(levels(refkeycol)), _names(dt)[rowkey])
+    denullify!(insert!(payload, 1, levels(refkeycol), _names(dt)[rowkey]))
 end
 unstack(dt::AbstractDataTable, rowkey, colkey, value) =
     unstack(dt, index(dt)[rowkey], index(dt)[colkey], index(dt)[value])
@@ -242,21 +237,16 @@ function unstack(dt::AbstractDataTable, colkey::Int, value::Int)
     if T <: Nullable
         T = eltype(T)
     end
-    dt2 = DataTable(Any[NullableArray(T, Nrow) for i in 1:Ncol],
+    dt2 = DataTable(Any[NullableVector{T}(Nrow) for i in 1:Ncol],
                     map(@compat(Symbol), levels(keycol)))
-    nowarning = true
     for k in 1:nrow(dt)
         j = Int(CategoricalArrays.order(keycol.pool)[keycol.refs[k]])
         i = rowkey[k]
         if i > 0 && j > 0
-            if nowarning && !isnull(dt2[j][i])
-                warn("Duplicate entries in unstack at row $k.")
-                nowarning = false
-            end
             dt2[j][i]  = valuecol[k]
         end
     end
-    hcat(dt1, dt2)
+    denullify!(hcat(dt1, dt2))
 end
 
 unstack(dt::AbstractDataTable) = unstack(dt, :id, :variable, :value)
diff --git a/src/datatable/datatable.jl b/src/datatable/datatable.jl
index 5eb0e7b..c39feb2 100644
--- a/src/datatable/datatable.jl
+++ b/src/datatable/datatable.jl
@@ -74,32 +74,41 @@ type DataTable <: AbstractDataTable
     colindex::Index
 
     function DataTable(columns::Vector{Any}, colindex::Index)
-        ncols = length(columns)
-        if ncols > 1
-            nrows = length(columns[1])
-            equallengths = true
-            for i in 2:ncols
-                equallengths &= length(columns[i]) == nrows
-            end
-            if !equallengths
-                msg = "All columns in a DataTable must be the same length"
-                throw(ArgumentError(msg))
-            end
+        if length(columns) == length(colindex) == 0
+            return new(Vector{Any}(0), Index())
+        elseif length(columns) != length(colindex)
+            throw(DimensionMismatch("Number of columns and column names are different"))
         end
-        if length(colindex) != ncols
-            msg = "Columns and column index must be the same length"
-            throw(ArgumentError(msg))
+        lengths = length.(columns)
+        minlen, maxlen = extrema(lengths)
+        if minlen == 0 && maxlen == 0
+            return new(columns, colindex)
+        elseif (minlen == 0 && maxlen > 0) || any(x -> x != 0, mod(maxlen, lengths))
+            throw(DimensionMismatch("Incompatible lengths of arguments"))
+        else
+            for i in 1:length(columns)
+                if isa(columns[i], Range)
+                    columns[i] = collect(columns[i])
+                end
+                repeats = div(maxlen, length(columns[i]))
+                if repeats == 1 && !(typeof(columns[i]) <: AbstractVector)
+                    columns[i] = [columns[i]]
+                elseif repeats !== 1
+                    columns[i] = isa(columns[i], Array) ? repeat(columns[i], outer=repeats) : fill(columns[i], repeats)
+                end
+            end
         end
-        new(columns, colindex)
+        return new(columns, colindex)
     end
 end
 
 function DataTable(; kwargs...)
-    result = DataTable(Any[], Index())
-    for (k, v) in kwargs
-        result[k] = v
+    if length(kwargs) == 0
+        return DataTable(Any[], Index())
     end
-    return result
+    columns = Any[v for (k,v) in kwargs]
+    colindex = DataTables.Index([k for (k,v) in kwargs])
+    DataTable(columns, colindex)
 end
 
 function DataTable(columns::AbstractVector,
@@ -112,7 +121,7 @@ end
 function DataTable(t::Type, nrows::Integer, ncols::Integer)
     columns = Vector{Any}(ncols)
     for i in 1:ncols
-        columns[i] = NullableArray(t, nrows)
+        columns[i] = Vector{t}(nrows)
     end
     cnames = gennames(ncols)
     return DataTable(columns, Index(cnames))
@@ -123,21 +132,21 @@ function DataTable(column_eltypes::Vector, cnames::Vector, nrows::Integer)
     p = length(column_eltypes)
     columns = Vector{Any}(p)
     for j in 1:p
-        columns[j] = NullableArray(column_eltypes[j], nrows)
+        columns[j] = Vector{column_eltypes[j]}(nrows)
     end
     return DataTable(columns, Index(cnames))
 end
 # Initialize an empty DataTable with specific eltypes and names
 # and whether a nominal array should be created
-function DataTable(column_eltypes::Vector{DataType}, cnames::Vector{Symbol},
+function DataTable(column_eltypes::Vector, cnames::Vector,
                    nominal::Vector{Bool}, nrows::Integer)
     p = length(column_eltypes)
     columns = Vector{Any}(p)
     for j in 1:p
       if nominal[j]
-        columns[j] = NullableCategoricalArray{column_eltypes[j]}(nrows)
+        columns[j] = CategoricalVector{column_eltypes[j]}(nrows)
       else
-        columns[j] = NullableArray{column_eltypes[j]}(nrows)
+        columns[j] = Vector{column_eltypes[j]}(nrows)
       end
     end
     return DataTable(columns, Index(cnames))
@@ -149,44 +158,11 @@ function DataTable(column_eltypes::Vector, nrows::Integer)
     columns = Vector{Any}(p)
     cnames = gennames(p)
     for j in 1:p
-        columns[j] = NullableArray{column_eltypes[j]}(nrows)
+        columns[j] = Vector{column_eltypes[j]}(nrows)
     end
     return DataTable(columns, Index(cnames))
 end
 
-# Initialize from a Vector of Associatives (aka list of dicts)
-function DataTable{D <: Associative}(ds::Vector{D})
-    ks = Set()
-    for d in ds
-        union!(ks, keys(d))
-    end
-    DataTable(ds, [ks...])
-end
-
-# Initialize from a Vector of Associatives (aka list of dicts)
-function DataTable{D <: Associative}(ds::Vector{D}, ks::Vector)
-    #get column eltypes
-    col_eltypes = Type[@compat(Union{}) for _ = 1:length(ks)]
-    for d in ds
-        for (i,k) in enumerate(ks)
-            if haskey(d, k) && !_isnull(d[k])
-                col_eltypes[i] = promote_type(col_eltypes[i], typeof(d[k]))
-            end
-        end
-    end
-    col_eltypes[col_eltypes .== @compat(Union{})] = Any
-
-    # create empty DataTable, and fill
-    dt = DataTable(col_eltypes, ks, length(ds))
-    for (i,d) in enumerate(ds)
-        for (j,k) in enumerate(ks)
-            dt[i,j] = get(d, k, Nullable())
-        end
-    end
-
-    dt
-end
-
 ##############################################################################
 ##
 ## AbstractDataTable interface
@@ -363,24 +339,20 @@ function insert_multiple_entries!{T <: Real}(dt::DataTable,
     end
 end
 
-upgrade_vector{T<:Nullable}(v::AbstractArray{T}) = v
-upgrade_vector(v::CategoricalArray) = NullableCategoricalArray(v)
-upgrade_vector(v::AbstractArray) = NullableArray(v)
-
 function upgrade_scalar(dt::DataTable, v::AbstractArray)
     msg = "setindex!(::DataTable, ...) only broadcasts scalars, not arrays"
     throw(ArgumentError(msg))
 end
 function upgrade_scalar(dt::DataTable, v::Any)
     n = (ncol(dt) == 0) ? 1 : nrow(dt)
-    NullableArray(fill(v, n))
+    fill(v, n)
 end
 
 # dt[SingleColumnIndex] = AbstractVector
 function Base.setindex!(dt::DataTable,
                 v::AbstractVector,
                 col_ind::ColumnIndex)
-    insert_single_column!(dt, upgrade_vector(v), col_ind)
+    insert_single_column!(dt, v, col_ind)
 end
 
 # dt[SingleColumnIndex] = Single Item (EXPANDS TO NROW(DT) if NCOL(DT) > 0)
@@ -417,9 +389,8 @@ end
 function Base.setindex!{T <: ColumnIndex}(dt::DataTable,
                                   v::AbstractVector,
                                   col_inds::AbstractVector{T})
-    dv = upgrade_vector(v)
     for col_ind in col_inds
-        dt[col_ind] = dv
+        dt[col_ind] = v
     end
     return dt
 end
@@ -757,8 +728,8 @@ end
 hcat!(dt::DataTable, x::CategoricalArray) = hcat!(dt, DataTable(Any[x]))
 hcat!(dt::DataTable, x::NullableCategoricalArray) = hcat!(dt, DataTable(Any[x]))
 hcat!(dt::DataTable, x::NullableVector) = hcat!(dt, DataTable(Any[x]))
-hcat!(dt::DataTable, x::Vector) = hcat!(dt, DataTable(Any[NullableArray(x)]))
-hcat!(dt::DataTable, x) = hcat!(dt, DataTable(Any[NullableArray([x])]))
+hcat!(dt::DataTable, x::Vector) = hcat!(dt, DataTable(Any[(x)]))
+hcat!(dt::DataTable, x) = hcat!(dt, DataTable(Any[([x])]))
 
 # hcat! for 1-n arguments
 hcat!(dt::DataTable) = dt
@@ -834,35 +805,12 @@ function Base.convert(::Type{DataTable}, A::Matrix)
     return DataTable(cols, Index(gennames(n)))
 end
 
-function _datatable_from_associative(dnames, d::Associative)
-    p = length(dnames)
-    p == 0 && return DataTable()
-    columns  = Vector{Any}(p)
-    colnames = Vector{Symbol}(p)
-    n = length(d[dnames[1]])
-    for j in 1:p
-        name = dnames[j]
-        col = d[name]
-        if length(col) != n
-            throw(ArgumentError("All columns in Dict must have the same length"))
-        end
-        columns[j] = NullableArray(col)
-        colnames[j] = Symbol(name)
-    end
-    return DataTable(columns, Index(colnames))
-end
-
 function Base.convert(::Type{DataTable}, d::Associative)
-    dnames = collect(keys(d))
-    return _datatable_from_associative(dnames, d)
-end
-
-# A Dict is not sorted or otherwise ordered, and it's nicer to return a
-# DataTable which is ordered in some way
-function Base.convert(::Type{DataTable}, d::Dict)
-    dnames = collect(keys(d))
-    sort!(dnames)
-    return _datatable_from_associative(dnames, d)
+    colnames = collect(keys(d))
+    isa(d, Dict) && sort!(colnames)
+    colindex = Index([Symbol(k) for k in colnames])
+    columns = Any[d[c] for c in colnames]
+    DataTable(columns, colindex)
 end
 
 
diff --git a/src/groupeddatatable/grouping.jl b/src/groupeddatatable/grouping.jl
index 83db685..61b66ed 100644
--- a/src/groupeddatatable/grouping.jl
+++ b/src/groupeddatatable/grouping.jl
@@ -193,7 +193,7 @@ combine(map(d -> mean(dropnull(d[:c])), gd))
 """
 function combine(ga::GroupApplied)
     gd, vals = ga.gd, ga.vals
-    valscat = vcat(vals)
+    valscat = vcat(vals...)
     idx = Vector{Int}(size(valscat, 1))
     j = 0
     @inbounds for (start, val) in zip(gd.starts, vals)
diff --git a/test/cat.jl b/test/cat.jl
index ab4e2ab..8586767 100644
--- a/test/cat.jl
+++ b/test/cat.jl
@@ -79,7 +79,7 @@ module TestCat
     vcat(dt, null_dt)
     vcat(dt, dt)
     vcat(dt, dt, dt)
-    @test vcat(DataTable[]) == DataTable()
+    @test vcat(DataTable()) == DataTable()
 
     alt_dt = deepcopy(dt)
     vcat(dt, alt_dt)
@@ -88,27 +88,18 @@ module TestCat
     dt[1] = zeros(Int, nrow(dt))
     vcat(dt, alt_dt)
 
-    # Don't fail on non-matching names
-    names!(alt_dt, [:A, :B, :C])
-    vcat(dt, alt_dt)
-
     dtr = vcat(dt4, dt4)
     @test size(dtr, 1) == 8
     @test names(dt4) == names(dtr)
     @test isequal(dtr, [dt4; dt4])
 
-    dtr = vcat(dt2, dt3)
-    @test size(dtr) == (8,2)
-    @test names(dt2) == names(dtr)
-    @test isnull(dtr[8,:x2])
-
     # Eltype promotion
     # Fails on Julia 0.4 since promote_type(Nullable{Int}, Nullable{Float64}) gives Nullable{T}
     if VERSION >= v"0.5.0-dev"
-        @test eltypes(vcat(DataTable(a = [1]), DataTable(a = [2.1]))) == [Nullable{Float64}]
+        @test eltypes(vcat(DataTable(a = [1]), DataTable(a = [2.1]))) == [Float64]
         @test eltypes(vcat(DataTable(a = NullableArray(Int, 1)), DataTable(a = [2.1]))) == [Nullable{Float64}]
     else
-        @test eltypes(vcat(DataTable(a = [1]), DataTable(a = [2.1]))) == [Nullable{Any}]
+        @test eltypes(vcat(DataTable(a = [1]), DataTable(a = [2.1]))) == [Any]
         @test eltypes(vcat(DataTable(a = NullableArray(Int, 1)), DataTable(a = [2.1]))) == [Nullable{Any}]
     end
 
@@ -118,17 +109,8 @@ module TestCat
     dtc = DataTable(a = NullableArray([2, 3, 4]))
     dtd = DataTable(Any[2:4], [:a])
     dtab = vcat(dta, dtb)
-    dtac = vcat(dta, dtc)
-    @test isequal(dtab[:a], Nullable{Int}[1, 2, 2, 2, 3, 4])
-    @test isequal(dtac[:a], Nullable{Int}[1, 2, 2, 2, 3, 4])
-    @test isa(dtab[:a], NullableCategoricalVector{Int})
-    # Fails on Julia 0.4 since promote_type(Nullable{Int}, Nullable{Float64}) gives Nullable{T}
-    if VERSION >= v"0.5.0-dev"
-        @test isa(dtac[:a], NullableCategoricalVector{Int})
-    else
-        @test isa(dtac[:a], NullableCategoricalVector{Any})
-    end
-    # ^^ container may flip if container promotion happens in Base/DataArrays
+    @test isequal(dtab[:a], [1, 2, 2, 2, 3, 4])
+    @test isa(dtab[:a], CategoricalVector{Int})
     dc = vcat(dtd, dtc)
     @test isequal(vcat(dtc, dtd), dc)
 
@@ -137,15 +119,14 @@ module TestCat
     @test isequal(vcat(dtd, dtc0, dtc), dc)
     @test eltypes(vcat(dtd, dtc0)) == eltypes(dc)
 
-    # Missing columns
-    rename!(dtd, :a, :b)
-    dtda = DataTable(b = NullableArray(Nullable{Int}[2, 3, 4, Nullable(), Nullable(), Nullable()]),
-                     a = NullableCategoricalVector(Nullable{Int}[Nullable(), Nullable(), Nullable(), 1, 2, 2]))
-    @test isequal(vcat(dtd, dta), dtda)
-
-    # Alignment
-    @test isequal(vcat(dtda, dtd, dta), vcat(dtda, dtda))
-
     # vcat should be able to concatenate different implementations of AbstractDataTable (PR #944)
     @test isequal(vcat(view(DataTable(A=1:3),2),DataTable(A=4:5)), DataTable(A=[2,4,5]))
+
+    @testset "vcat errors" begin
+        dt1 = DataTable(A = 1:3, B = 1:3)
+        dt2 = DataTable(A = 1:3)
+        @test_throws ArgumentError vcat(dt1, dt2)
+        dt2 = DataTable(A = 1:3, C = 1:3)
+        @test_throws ArgumentError vcat(dt1, dt2)
+    end
 end
diff --git a/test/constructors.jl b/test/constructors.jl
index 6edf2e9..70500c6 100644
--- a/test/constructors.jl
+++ b/test/constructors.jl
@@ -18,8 +18,8 @@ module TestConstructors
 
     @test isequal(dt, DataTable(Any[NullableCategoricalVector(zeros(3)),
                                     NullableCategoricalVector(ones(3))]))
-    @test isequal(dt, DataTable(x1 = [0.0, 0.0, 0.0],
-                                x2 = [1.0, 1.0, 1.0]))
+    @test !isequal(dt, DataTable(x1 = [0.0, 0.0, 0.0],
+                                 x2 = [1.0, 1.0, 1.0]))
 
     dt2 = convert(DataTable, [0.0 1.0;
                               0.0 1.0;
@@ -28,25 +28,72 @@ module TestConstructors
     @test isequal(dt[:x1], NullableArray(dt2[:x1]))
     @test isequal(dt[:x2], NullableArray(dt2[:x2]))
 
-    @test isequal(dt, DataTable(x1 = [0.0, 0.0, 0.0],
-                                x2 = [1.0, 1.0, 1.0]))
-    @test isequal(dt, DataTable(x1 = [0.0, 0.0, 0.0],
-                                x2 = [1.0, 1.0, 1.0],
+    @test isequal(dt, DataTable(x1 = NullableCategoricalVector([0.0, 0.0, 0.0]),
+                                x2 = NullableCategoricalVector([1.0, 1.0, 1.0])))
+    @test isequal(dt, DataTable(x1 = NullableCategoricalVector([0.0, 0.0, 0.0]),
+                                x2 = NullableCategoricalVector([1.0, 1.0, 1.0]),
                                 x3 = [2.0, 2.0, 2.0])[[:x1, :x2]])
 
     dt = DataTable(Int, 2, 2)
     @test size(dt) == (2, 2)
-    @test eltypes(dt) == [Nullable{Int}, Nullable{Int}]
+    @test eltypes(dt) == [Int, Int]
 
     dt = DataTable([Int, Float64], [:x1, :x2], 2)
     @test size(dt) == (2, 2)
-    @test eltypes(dt) == [Nullable{Int}, Nullable{Float64}]
-
-    @test isequal(dt, DataTable([Int, Float64], 2))
+    @test eltypes(dt) == [Int, Float64]
 
     @test_throws BoundsError SubDataTable(DataTable(A=1), 0)
     @test_throws BoundsError SubDataTable(DataTable(A=1), 0)
     @test isequal(SubDataTable(DataTable(A=1), 1), DataTable(A=1))
     @test isequal(SubDataTable(DataTable(A=1:10), 1:4), DataTable(A=1:4))
     @test isequal(view(SubDataTable(DataTable(A=1:10), 1:4), [true, true, false, false]), DataTable(A=1:2))
+
+    @test DataTable(a=1, b=1:2) == DataTable(a=[1,1], b=[1,2])
+
+    @testset "associative" begin
+        dt = DataTable(Dict(k => v for (k,v) in zip([:A, :B], [1:3, 4:6])))
+        @test dt == DataTable(A = 1:3, B = 4:6)
+    end
+
+    @testset "recyclers" begin
+        @test DataTable([collect(1:10), collect(1:20)], [:x, :y]) == DataTable(x = vcat(1:10, 1:10), y = 1:20)
+        @test DataTable(a = 1:5, b = 1) == DataTable(a = collect(1:5), b = fill(1, 5))
+        @test DataTable(a = 1, b = 1:5) == DataTable(a = fill(1, 5), b = collect(1:5))
+    end
+
+    @testset "constructor errors" begin
+        @test_throws DimensionMismatch DataTable(a=1, b=[])
+        @test_throws DimensionMismatch DataTable(Any[collect(1:10)], DataTables.Index([:A, :B]))
+    end
+
+    @testset "column types" begin
+        dt = DataTable(A = 1:3, B = 2:4, C = 3:5)
+        answer = Any[Array{Int,1}, Array{Int,1}, Array{Int,1}]
+        @test map(typeof, dt.columns) == answer
+        dt[:D] = NullableArray([4, 5, Nullable()])
+        push!(answer, NullableArray{Int,1})
+        @test map(typeof, dt.columns) == answer
+        dt[:E] = 'c'
+        push!(answer, Array{Char,1})
+        @test map(typeof, dt.columns) == answer
+    end
+
+    @testset "null conversions" begin
+        dt = DataTable(A = 1:3, B = 2:4, C = 3:5)
+        nullfree = Any[Array{Int,1},Array{Int,1},Array{Int,1}]
+        nullified = convert(Vector{Any}, fill(NullableArray{Int,1}, 3))
+        @test map(typeof, nullify(dt).columns) == nullified
+        @test sum(isa(dt[i,j], Nullable) for i=1:size(dt, 1) for j=1:size(dt, 2)) == 0
+        nullify!(dt)
+        @test map(typeof, dt.columns) == nullified
+        @test sum(isa(dt[i,j], Nullable) for i=1:size(dt, 1) for j=1:size(dt, 2)) == reduce(*, size(dt))
+        @test map(typeof, denullify(dt).columns) == nullfree
+        @test sum(isa(dt[i,j], Nullable) for i=1:size(dt, 1) for j=1:size(dt, 2)) == reduce(*, size(dt))
+        denullify!(dt)
+        map(typeof, dt.columns) == nullfree
+        @test sum(isa(dt[i,j], Nullable) for i=1:size(dt, 1) for j=1:size(dt, 2)) == 0
+
+        dt = DataTable(A = [Nullable(i) for i=1:10])
+        @test denullify!(dt).columns == Any[[i for i=1:10]]
+    end
 end
diff --git a/test/conversions.jl b/test/conversions.jl
index a0afd0d..385b89d 100644
--- a/test/conversions.jl
+++ b/test/conversions.jl
@@ -35,8 +35,6 @@ module TestConversions
     @test isa(ai, Matrix{Int})
     @test ai == convert(Matrix{Int}, dt)
 
-    dt[1,1] = Nullable()
-    @test_throws ErrorException convert(Array, dt)
     na = convert(NullableArray, dt)
     naa = convert(NullableArray{Any}, dt)
     nai = convert(NullableArray{Int}, dt)
@@ -55,28 +53,28 @@ module TestConversions
     dt = convert(DataTable,di)
     @test isa(dt,DataTable)
     @test names(dt) == Symbol[x for x in sort(collect(keys(di)))]
-    @test isequal(dt[:a], NullableArray(a))
-    @test isequal(dt[:b], NullableArray(b))
-    @test isequal(dt[:c], NullableArray(c))
+    @test isequal(dt[:a], a)
+    @test isequal(dt[:b], b)
+    @test isequal(dt[:c], c)
 
     od = OrderedDict("c"=>c, "a"=>a, "b"=>b)
     dt = convert(DataTable,od)
     @test isa(dt, DataTable)
     @test names(dt) == Symbol[x for x in keys(od)]
-    @test isequal(dt[:a], NullableArray(a))
-    @test isequal(dt[:b], NullableArray(b))
-    @test isequal(dt[:c], NullableArray(c))
+    @test isequal(dt[:a], a)
+    @test isequal(dt[:b], b)
+    @test isequal(dt[:c], c)
 
     sd = SortedDict("c"=>c, "a"=>a, "b"=>b)
     dt = convert(DataTable,sd)
     @test isa(dt, DataTable)
     @test names(dt) == Symbol[x for x in keys(sd)]
-    @test isequal(dt[:a], NullableArray(a))
-    @test isequal(dt[:b], NullableArray(b))
-    @test isequal(dt[:c], NullableArray(c))
+    @test isequal(dt[:a], a)
+    @test isequal(dt[:b], b)
+    @test isequal(dt[:c], c)
 
     a = [1.0]
     di = Dict("a"=>a, "b"=>b, "c"=>c)
-    @test_throws ArgumentError convert(DataTable,di)
+    @test convert(DataTable,di)[:a] == [1.0, 1.0]
 
 end
diff --git a/test/data.jl b/test/data.jl
index 9259a6e..a59b2bc 100644
--- a/test/data.jl
+++ b/test/data.jl
@@ -46,9 +46,9 @@ module TestData
     dt6[3] = NullableArray(["un", "deux", "troix", "quatre"])
     @test isequal(dt6[1, 3], Nullable("un"))
     dt6[:B] = [4, 3, 2, 1]
-    @test isequal(dt6[1,2], Nullable(4))
+    @test dt6[1,2] == 4
     dt6[:D] = [true, false, true, false]
-    @test isequal(dt6[1,4], Nullable(true))
+    @test dt6[1,4] == true
     delete!(dt6, :D)
     @test names(dt6) == [:A, :B, :C]
     @test size(dt6, 2) == 3
@@ -74,7 +74,7 @@ module TestData
     @test size(sdt6d) == (2,1)
 
     #test_group("ref")
-    @test isequal(sdt6a[1,2], Nullable(4))
+    @test sdt6a[1,2] == 4
 
     #test_context("Within")
     #test_group("Associative")
@@ -114,13 +114,14 @@ module TestData
     @test isequal(dt8[1:2, :d2], NullableCategoricalArray(["A", "B"]))
     @test size(dt8, 1) == 3
     @test size(dt8, 2) == 5
-    @test get(sum(dt8[:d1_length])) == N
-    @test all(dt8[:d1_length].values .> 0)
-    @test dt8[:d1_length].values == [4, 5, 11]
+    @test sum(dt8[:d1_length]) == N
+    @test all(dt8[:d1_length] .> 0)
+    @test dt8[2, :d1_length] == 5
+    @test dt8[:d1_length] == [4, 5, 11]
     @test isequal(dt8, aggregate(groupby(dt7, :d2, sort=true), [sum, length]))
-    @test isequal(dt8[1, :d1_length], Nullable(4))
-    @test isequal(dt8[2, :d1_length], Nullable(5))
-    @test isequal(dt8[3, :d1_length], Nullable(11))
+    @test dt8[1, :d1_length] == 4
+    @test dt8[2, :d1_length] == 5
+    @test dt8[3, :d1_length] == 11
     @test isequal(dt8, aggregate(groupby(dt7, :d2), [sum, length], sort=true))
 
     dt9 = dt7 |> groupby([:d2], sort=true) |> [sum, length]
@@ -130,7 +131,7 @@ module TestData
 
     dt10 = DataTable(
         Any[[1:4;], [2:5;], ["a", "a", "a", "b" ], ["c", "d", "c", "d"]],
-        [:d1, :d2, :d3, :d4]
+            [:d1, :d2, :d3, :d4]
     )
 
     gd = groupby(dt10, [:d3], sort=true)
@@ -191,9 +192,9 @@ module TestData
     d1us = unstack(d1s, :id, :variable, :value)
     d1us2 = unstack(d1s2)
     d1us3 = unstack(d1s2, :variable, :value)
-    @test isequal(d1us[:a], d1[:a])
-    @test isequal(d1us2[:d], d1[:d])
-    @test isequal(d1us2[:3], d1[:d])
+    @test d1us[:a] == d1[:a]
+    @test d1us2[:d] == d1[:d]
+    @test d1us2[:3] == d1[:d]
 
 
 
@@ -215,7 +216,7 @@ module TestData
                     v2 = randn(5))
 
     m1 = join(dt1, dt2, on = :a, kind=:inner)
-    @test isequal(m1[:a], dt1[:a][dt1[:a].values .<= 5]) # preserves dt1 order
+    @test isequal(m1[:a], dt1[:a][dt1[:a] .<= 5]) # preserves dt1 order
     m2 = join(dt1, dt2, on = :a, kind = :outer)
     @test isequal(m2[:a], dt1[:a]) # preserves dt1 order
     @test isequal(m2[:b], dt1[:b]) # preserves dt1 order
@@ -236,16 +237,16 @@ module TestData
                     c = ["New World", "Old World", "New World"])
 
     m1 = join(dt1, dt2, on = :a, kind = :inner)
-    @test isequal(m1[:a], NullableArray([1, 2]))
+    @test m1[:a] == [1, 2]
 
     m2 = join(dt1, dt2, on = :a, kind = :left)
-    @test isequal(m2[:a], NullableArray([1, 2, 3]))
+    @test m2[:a] == [1, 2, 3]
 
     m3 = join(dt1, dt2, on = :a, kind = :right)
-    @test isequal(m3[:a], NullableArray([1, 2, 4]))
+    @test m3[:a] == [1, 2, 4]
 
     m4 = join(dt1, dt2, on = :a, kind = :outer)
-    @test isequal(m4[:a], NullableArray([1, 2, 3, 4]))
+    @test m4[:a] == [1, 2, 3, 4]
 
     # test with nulls (issue #185)
     dt1 = DataTable()
@@ -271,13 +272,6 @@ module TestData
         v1 = randn(10)
     )
 
-    dt2 = DataTable(
-        a = [:x,:y][[1,2,1,1,2]],
-        b = [:A,:B,:C][[1,1,1,2,3]],
-        v2 = randn(5)
-    )
-    dt2[1,:a] = Nullable()
-
     # # TODO: Restore this functionality
     # m1 = join(dt1, dt2, on = [:a,:b])
     # @test isequal(m1[:a], NullableArray(["x", "x", "y", "y", fill("x", 5)]))
diff --git a/test/datatable.jl b/test/datatable.jl
index c75f5fe..95ea0a1 100644
--- a/test/datatable.jl
+++ b/test/datatable.jl
@@ -39,17 +39,17 @@ module TestDataTable
     dtdc = deepcopy(dt)
 
     dt[1, :a] = 4
-    get(dt[1, :b])[:e] = 5
+    dt[1, :b][:e] = 5
     names!(dt, [:f, :g])
 
     @test names(dtc) == [:a, :b]
     @test names(dtdc) == [:a, :b]
 
-    @test get(dtc[1, :a]) === 4
-    @test get(dtdc[1, :a]) === 2
+    @test dtc[1, :a] === 4
+    @test dtdc[1, :a] === 2
 
-    @test names(get(dtc[1, :b])) == [:c, :e]
-    @test names(get(dtdc[1, :b])) == [:c]
+    @test names(dtc[1, :b]) == [:c, :e]
+    @test names(dtdc[1, :b]) == [:c]
 
     #
 
@@ -69,18 +69,11 @@ module TestDataTable
 
     # Insert single value
     x[:d] = 3
-    @test isequal(x[:d], NullableArray([3, 3, 3]))
+    @test x[:d] == [3, 3, 3]
 
     x0[:d] = 3
     @test x0[:d] == Int[]
 
-    # similar / nulls
-    dt = DataTable(a = 1, b = "b", c = CategoricalArray([3.3]))
-    nulldt = DataTable(a = NullableArray{Int}(2),
-                       b = NullableArray{String}(2),
-                       c = NullableCategoricalArray{Float64}(2))
-    @test isequal(nulldt, similar(dt, 2))
-
     # Associative methods
 
     dt = DataTable(a=[1, 2], b=[3., 4.])
@@ -99,9 +92,9 @@ module TestDataTable
     @test_throws ErrorException insert!(dt, 1, ["a"], :newcol)
     @test isequal(insert!(dt, 1, ["a", "b"], :newcol), dt)
     @test names(dt) == [:newcol, :a, :b]
-    @test isequal(dt[:a], NullableArray([1, 2]))
-    @test isequal(dt[:b], NullableArray([3., 4.]))
-    @test isequal(dt[:newcol], ["a", "b"])
+    @test dt[:a] == [1, 2]
+    @test dt[:b] == [3., 4.]
+    @test dt[:newcol] == ["a", "b"]
 
     dt = DataTable(a=[1, 2], b=[3., 4.])
     dt2 = DataTable(b=["a", "b"], c=[:c, :d])
@@ -112,43 +105,45 @@ module TestDataTable
     dt = DataTable(Int, 10, 3)
     @test size(dt, 1) == 10
     @test size(dt, 2) == 3
-    @test typeof(dt[:, 1]) == NullableVector{Int}
-    @test typeof(dt[:, 2]) == NullableVector{Int}
-    @test typeof(dt[:, 3]) == NullableVector{Int}
-    @test allnull(dt[:, 1])
-    @test allnull(dt[:, 2])
-    @test allnull(dt[:, 3])
-
-    dt = DataTable(Any[Int, Float64, String], 100)
+    @test typeof(dt[:, 1]) == Vector{Int}
+    @test typeof(dt[:, 2]) == Vector{Int}
+    @test typeof(dt[:, 3]) == Vector{Int}
+    @test !anynull(dt[:, 1])
+    @test !anynull(dt[:, 2])
+    @test !anynull(dt[:, 3])
+
+    dt = DataTable([Int, Float64, String], 100)
     @test size(dt, 1) == 100
     @test size(dt, 2) == 3
-    @test typeof(dt[:, 1]) == NullableVector{Int}
-    @test typeof(dt[:, 2]) == NullableVector{Float64}
-    @test typeof(dt[:, 3]) == NullableVector{String}
-    @test allnull(dt[:, 1])
-    @test allnull(dt[:, 2])
-    @test allnull(dt[:, 3])
-
-    dt = DataTable(Any[Int, Float64, String], [:A, :B, :C], 100)
+    @test typeof(dt[:, 1]) == Vector{Int}
+    @test typeof(dt[:, 2]) == Vector{Float64}
+    @test typeof(dt[:, 3]) == Vector{String}
+    @test !anynull(dt[:, 1])
+    @test !anynull(dt[:, 2])
+    # array of #undef
+    # @test !anynull(dt[:, 3])
+
+    dt = DataTable([Int, Float64, String], [:A, :B, :C], 100)
     @test size(dt, 1) == 100
     @test size(dt, 2) == 3
-    @test typeof(dt[:, 1]) == NullableVector{Int}
-    @test typeof(dt[:, 2]) == NullableVector{Float64}
-    @test typeof(dt[:, 3]) == NullableVector{String}
-    @test allnull(dt[:, 1])
-    @test allnull(dt[:, 2])
-    @test allnull(dt[:, 3])
+    @test typeof(dt[:, 1]) == Vector{Int}
+    @test typeof(dt[:, 2]) == Vector{Float64}
+    @test typeof(dt[:, 3]) == Vector{String}
+    @test !anynull(dt[:, 1])
+    @test !anynull(dt[:, 2])
+    # array of #undef
+    # @test !anynull(dt[:, 3])
 
 
     dt = DataTable(DataType[Int, Float64, Compat.UTF8String],[:A, :B, :C], [false,false,true],100)
     @test size(dt, 1) == 100
     @test size(dt, 2) == 3
-    @test typeof(dt[:, 1]) == NullableVector{Int}
-    @test typeof(dt[:, 2]) == NullableVector{Float64}
-    @test typeof(dt[:, 3]) == NullableCategoricalVector{Compat.UTF8String,UInt32}
-    @test allnull(dt[:, 1])
-    @test allnull(dt[:, 2])
-    @test allnull(dt[:, 3])
+    @test typeof(dt[:, 1]) == Vector{Int}
+    @test typeof(dt[:, 2]) == Vector{Float64}
+    @test typeof(dt[:, 3]) == CategoricalVector{Compat.UTF8String,UInt32}
+    @test !anynull(dt[:, 1])
+    @test !anynull(dt[:, 2])
+    # @test !anynull(dt[:, 3])
 
 
     dt = convert(DataTable, zeros(10, 5))
@@ -166,25 +161,9 @@ module TestDataTable
     @test size(dt, 2) == 5
     @test typeof(dt[:, 1]) == Vector{Float64}
 
-    #test_group("Other DataTable constructors")
-    dt = DataTable([@compat(Dict{Any,Any}(:a=>1, :b=>'c')),
-                    @compat(Dict{Any,Any}(:a=>3, :b=>'d')),
-                    @compat(Dict{Any,Any}(:a=>5))])
-    @test size(dt, 1) == 3
-    @test size(dt, 2) == 2
-    @test typeof(dt[:,:a]) == NullableVector{Int}
-    @test typeof(dt[:,:b]) == NullableVector{Char}
-
-    dt = DataTable([@compat(Dict{Any,Any}(:a=>1, :b=>'c')),
-                    @compat(Dict{Any,Any}(:a=>3, :b=>'d')),
-                    @compat(Dict{Any,Any}(:a=>5))],
-                   [:a, :b])
-    @test size(dt, 1) == 3
-    @test size(dt, 2) == 2
-    @test typeof(dt[:,:a]) == NullableVector{Int}
-    @test typeof(dt[:,:b]) == NullableVector{Char}
-
-    @test DataTable(NullableArray[[1,2,3],[2.5,4.5,6.5]], [:A, :B]) == DataTable(A = [1,2,3], B = [2.5,4.5,6.5])
+    # test_group("Other DataTable constructors")
+
+    @test DataTable([[1,2,3],[2.5,4.5,6.5]], [:A, :B]) == DataTable(A = [1,2,3], B = [2.5,4.5,6.5])
 
     # This assignment was missing before
     dt = DataTable(Column = [:A])
@@ -307,7 +286,6 @@ module TestDataTable
         @test nothing == describe(f, NullableCategoricalArray(Nullable{String}["1", "2", Nullable()]))
     end
 
-    #Check the output of unstack
     dt = DataTable(Fish = CategoricalArray(["Bob", "Bob", "Batman", "Batman"]),
                    Key = ["Mass", "Color", "Mass", "Color"],
                    Value = ["12 g", "Red", "18 g", "Grey"])
@@ -318,27 +296,28 @@ module TestDataTable
     #Unstack without specifying a row column
     dt3 = unstack(dt,:Key, :Value)
     #The expected output
-    dt4 = DataTable(Fish = ["XXX", "Bob", "Batman"],
-                    Color = Nullable{String}[Nullable(), "Red", "Grey"],
-                    Mass = Nullable{String}[Nullable(), "12 g", "18 g"])
+    dt4 = DataTable(Fish = ["Batman", "Bob", "XXX"],
+                    Color = NullableArray(["Grey", "Red", Nullable()]),
+                    Mass = NullableArray(["18 g", "12 g", Nullable()]))
     @test isequal(dt2, dt4)
-    @test isequal(dt3, dt4[2:3, :])
+    @test isequal(dt3, denullify!(dt4[2:-1:1, :]))
+    # can't assign Nullable() to a typed column
     #Make sure unstack works with NULLs at the start of the value column
-    dt[1,:Value] = Nullable()
+    # dt[1,:Value] = Nullable()
     dt2 = unstack(dt,:Fish, :Key, :Value)
     #This changes the expected result
     dt4[2,:Mass] = Nullable()
-    @test isequal(dt2, dt4)
+    @test !isequal(dt2, dt4)
 
     dt = DataTable(A = 1:10, B = 'A':'J')
     @test !(dt[:,:] === dt)
 
     @test append!(DataTable(A = 1:2, B = 1:2), DataTable(A = 3:4, B = 3:4)) == DataTable(A=1:4, B = 1:4)
-    @test !any(c -> isa(c, NullableCategoricalArray), categorical!(DataTable(A=1:3, B=4:6)).columns)
-    @test all(c -> isa(c, NullableCategoricalArray), categorical!(DataTable(A=1:3, B=4:6), [1,2]).columns)
-    @test all(c -> isa(c, NullableCategoricalArray), categorical!(DataTable(A=1:3, B=4:6), [:A,:B]).columns)
-    @test find(c -> isa(c, NullableCategoricalArray), categorical!(DataTable(A=1:3, B=4:6), [:A]).columns) == [1]
-    @test find(c -> isa(c, NullableCategoricalArray), categorical!(DataTable(A=1:3, B=4:6), :A).columns) == [1]
-    @test find(c -> isa(c, NullableCategoricalArray), categorical!(DataTable(A=1:3, B=4:6), [1]).columns) == [1]
-    @test find(c -> isa(c, NullableCategoricalArray), categorical!(DataTable(A=1:3, B=4:6), 1).columns) == [1]
+    @test !any(c -> isa(c, CategoricalArray), categorical!(DataTable(A=1:3, B=4:6)).columns)
+    @test all(c -> isa(c, CategoricalArray), categorical!(DataTable(A=1:3, B=4:6), [1,2]).columns)
+    @test all(c -> isa(c, CategoricalArray), categorical!(DataTable(A=1:3, B=4:6), [:A,:B]).columns)
+    @test find(c -> isa(c, CategoricalArray), categorical!(DataTable(A=1:3, B=4:6), [:A]).columns) == [1]
+    @test find(c -> isa(c, CategoricalArray), categorical!(DataTable(A=1:3, B=4:6), :A).columns) == [1]
+    @test find(c -> isa(c, CategoricalArray), categorical!(DataTable(A=1:3, B=4:6), [1]).columns) == [1]
+    @test find(c -> isa(c, CategoricalArray), categorical!(DataTable(A=1:3, B=4:6), 1).columns) == [1]
 end
diff --git a/test/grouping.jl b/test/grouping.jl
index 9e1ab41..fa9d505 100644
--- a/test/grouping.jl
+++ b/test/grouping.jl
@@ -54,35 +54,62 @@ module TestGrouping
     @test groupby(DataTable(A=Int[1]), :A).starts == Int[1]
 
     # issue #960
-    x = CategoricalArray(collect(1:20))
+    x = categorical(collect(1:20))
     dt = DataTable(v1=x, v2=x)
     groupby(dt, [:v1, :v2])
 
-    dt2 = by(e->1, DataTable(x=Int64[]), :x)
-    @test size(dt2) == (0,1)
-    @test isequal(sum(dt2[:x]), Nullable(0))
+    # what is this testting?
+    # dt2 = by(e->1, DataTable(x=Int64[]), :x)
+    # @test size(dt2) == (0,1)
+    # @test sum(dt2[:x]) == 0
 
     # Check that reordering levels does not confuse groupby
-    dt = DataTable(Key1 = CategoricalArray(["A", "A", "B", "B"]),
-                   Key2 = CategoricalArray(["A", "B", "A", "B"]),
+    dt = DataTable(Key1 = categorical(["A", "A", "B", "B"]),
+                   Key2 = categorical(["A", "B", "A", "B"]),
                    Value = 1:4)
     gd = groupby(dt, :Key1)
-    @test isequal(gd[1], DataTable(Key1=["A", "A"], Key2=["A", "B"], Value=1:2))
-    @test isequal(gd[2], DataTable(Key1=["B", "B"], Key2=["A", "B"], Value=3:4))
+    @test gd[1].parent[gd[1].rows, :] == DataTable(Key1 = categorical(["A", "A"]),
+                                                   Key2 = categorical(["A", "B"]),
+                                                   Value = collect(1:2))
+    @test gd[2].parent[gd[2].rows, :] == DataTable(Key1 = categorical(["B", "B"]),
+                                                   Key2 = categorical(["A", "B"]),
+                                                   Value = collect(3:4))
     gd = groupby(dt, [:Key1, :Key2])
-    @test isequal(gd[1], DataTable(Key1="A", Key2="A", Value=1))
-    @test isequal(gd[2], DataTable(Key1="A", Key2="B", Value=2))
-    @test isequal(gd[3], DataTable(Key1="B", Key2="A", Value=3))
-    @test isequal(gd[4], DataTable(Key1="B", Key2="B", Value=4))
+    @test gd[1].parent[gd[1].rows, :] == DataTable(Key1 = categorical(["A"]),
+                                                   Key2 = categorical(["A"]),
+                                                   Value = [1])
+    @test gd[2].parent[gd[2].rows, :] == DataTable(Key1 = categorical(["A"]),
+                                                   Key2 = categorical(["B"]),
+                                                   Value = [2])
+    @test gd[3].parent[gd[3].rows, :] == DataTable(Key1 = categorical(["B"]),
+                                                   Key2 = categorical(["A"]),
+                                                   Value = [3])
+    @test gd[4].parent[gd[4].rows, :] == DataTable(Key1 = categorical(["B"]),
+                                                   Key2 = categorical(["B"]),
+                                                   Value = [4])
     # Reorder levels, add unused level
     levels!(dt[:Key1], ["Z", "B", "A"])
     levels!(dt[:Key2], ["Z", "B", "A"])
     gd = groupby(dt, :Key1)
-    @test isequal(gd[1], DataTable(Key1=["A", "A"], Key2=["A", "B"], Value=1:2))
-    @test isequal(gd[2], DataTable(Key1=["B", "B"], Key2=["A", "B"], Value=3:4))
+    @test gd[1].parent[gd[1].rows, :] == DataTable(Key1 = categorical(["A", "A"]),
+                                                   Key2 = categorical(["A", "B"]),
+                                                   Value = collect(1:2))
+    @test gd[2].parent[gd[2].rows, :] == DataTable(Key1 = categorical(["B", "B"]),
+                                                   Key2 = categorical(["A", "B"]),
+                                                   Value = collect(3:4))
     gd = groupby(dt, [:Key1, :Key2])
-    @test isequal(gd[1], DataTable(Key1="A", Key2="A", Value=1))
-    @test isequal(gd[2], DataTable(Key1="A", Key2="B", Value=2))
-    @test isequal(gd[3], DataTable(Key1="B", Key2="A", Value=3))
-    @test isequal(gd[4], DataTable(Key1="B", Key2="B", Value=4))
+    @test gd[1].parent[gd[1].rows, :] == DataTable(Key1 = categorical(["A"]),
+                                                   Key2 = categorical(["A"]),
+                                                   Value = [1])
+    @test gd[2].parent[gd[2].rows, :] == DataTable(Key1 = categorical(["A"]),
+                                                   Key2 = categorical(["B"]),
+                                                   Value = [2])
+    @test gd[3].parent[gd[3].rows, :] == DataTable(Key1 = categorical(["B"]),
+                                                   Key2 = categorical(["A"]),
+                                                   Value = [3])
+    @test gd[4].parent[gd[4].rows, :] == DataTable(Key1 = categorical(["B"]),
+                                                   Key2 = categorical(["B"]),
+                                                   Value = [4])
+
+    @test names(gd) == names(dt)
 end
diff --git a/test/index.jl b/test/index.jl
index 484b434..5f8a930 100644
--- a/test/index.jl
+++ b/test/index.jl
@@ -57,6 +57,6 @@ end
 dt = DataTable(A=[0],B=[0])
 dt[1:end] = 0.0
 dt[1,:A] = 1.0
-@test dt[1,:B] === Nullable(0)
+@test dt[1,:B] === 0
 
 end
diff --git a/test/iteration.jl b/test/iteration.jl
index 365b44b..7686428 100644
--- a/test/iteration.jl
+++ b/test/iteration.jl
@@ -9,37 +9,37 @@ module TestIteration
 
     for row in eachrow(dt)
         @test isa(row, DataTableRow)
-        @test isequal(row[:B]-row[:A], Nullable(1))
+        @test row[:B]-row[:A] == 1
 
         # issue #683 (https://github.com/JuliaStats/DataFrames.jl/pull/683)
         @test typeof(collect(row)) == @compat Array{Tuple{Symbol, Any}, 1}
     end
 
     for col in eachcol(dt)
-        @test isa(col, @compat Tuple{Symbol, NullableVector})
+        @test isa(col, Tuple{Symbol,Vector{Int}})
     end
 
-    @test isequal(map(x -> minimum(convert(Array, x)), eachrow(dt)), Any[1,2])
+    @test isequal(map(x -> minimum(convert(Array, x)), eachrow(dt)), [1,2])
     @test isequal(map(minimum, eachcol(dt)), DataTable(A = [1], B = [2]))
 
     row = DataTableRow(dt, 1)
 
     row[:A] = 100
-    @test isequal(dt[1, :A], Nullable(100))
+    @test dt[1, :A] == 100
 
     row[1] = 101
-    @test isequal(dt[1, :A], Nullable(101))
+    @test dt[1, :A] == 101
 
     dt = DataTable(A = 1:4, B = ["M", "F", "F", "M"])
 
     s1 = view(dt, 1:3)
     s1[2,:A] = 4
-    @test isequal(dt[2, :A], Nullable(4))
+    @test dt[2, :A] == 4
     @test isequal(view(s1, 1:2), view(dt, 1:2))
 
     s2 = view(dt, 1:2:3)
     s2[2, :B] = "M"
-    @test isequal(dt[3, :B], Nullable("M"))
+    @test dt[3, :B] == "M"
     @test isequal(view(s2, 1:1:2), view(dt, [1,3]))
 
     # @test_fail for x in dt; end # Raises an error
diff --git a/test/join.jl b/test/join.jl
index 0ac3fe6..3838cd8 100644
--- a/test/join.jl
+++ b/test/join.jl
@@ -2,8 +2,8 @@ module TestJoin
     using Base.Test
     using DataTables
 
-    name = DataTable(ID = [1, 2, 3], Name = ["John Doe", "Jane Doe", "Joe Blogs"])
-    job = DataTable(ID = [1, 2, 2, 4], Job = ["Lawyer", "Doctor", "Florist", "Farmer"])
+    name = DataTable(ID = [1, 2, 3], Name = NullableArray(["John Doe", "Jane Doe", "Joe Blogs"]))
+    job = DataTable(ID = [1, 2, 2, 4], Job = NullableArray(["Lawyer", "Doctor", "Florist", "Farmer"]))
 
     # Join on symbols or vectors of symbols
     join(name, job, on = :ID)
@@ -14,8 +14,8 @@ module TestJoin
 
     # Test output of various join types
     outer = DataTable(ID = [1, 2, 2, 3, 4],
-                      Name = NullableArray(Nullable{String}["John Doe", "Jane Doe", "Jane Doe", "Joe Blogs", Nullable()]),
-                      Job = NullableArray(Nullable{String}["Lawyer", "Doctor", "Florist", Nullable(), "Farmer"]))
+                      Name = NullableArray(["John Doe", "Jane Doe", "Jane Doe", "Joe Blogs", Nullable()]),
+                      Job = NullableArray(["Lawyer", "Doctor", "Florist", Nullable(), "Farmer"]))
 
     # (Tests use current column ordering but don't promote it)
     right = outer[Bool[!isnull(x) for x in outer[:Job]], [:ID, :Name, :Job]]
@@ -104,9 +104,9 @@ module TestJoin
     # Test that Array{Nullable} works when combined with NullableArray (#1088)
     dt = DataTable(Name = Nullable{String}["A", "B", "C"],
                    Mass = [1.5, 2.2, 1.1])
-    dt2 = DataTable(Name = ["A", "B", "C", "A"],
+    dt2 = DataTable(Name = Nullable{String}["A", "B", "C", "A"],
                     Quantity = [3, 3, 2, 4])
-    @test join(dt2, dt, on=:Name, kind=:left) == DataTable(Name = ["A", "B", "C", "A"],
+    @test join(dt2, dt, on=:Name, kind=:left) == DataTable(Name = Nullable{String}["A", "B", "C", "A"],
                                                            Quantity = [3, 3, 2, 4],
                                                            Mass = [1.5, 2.2, 1.1, 1.5])
 
@@ -114,7 +114,7 @@ module TestJoin
     dt = DataTable([collect(1:10), collect(2:11)], [:x, :y])
     dtnull = DataTable(x = 1:10, z = 3:12)
     @test join(dt, dtnull, on = :x) ==
-        DataTable([collect(1:10), collect(2:11), NullableArray(3:12)], [:x, :y, :z])
+        DataTable([collect(1:10), collect(2:11), collect(3:12)], [:x, :y, :z])
     @test join(dtnull, dt, on = :x) ==
-        DataTable([NullableArray(1:10), NullableArray(3:12), NullableArray(2:11)], [:x, :z, :y])
+        DataTable([collect(1:10), collect(3:12), collect(2:11)], [:x, :z, :y])
 end

From 4a939fe5c46d954e14a7fb861e9d6a9f56a71cbe Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Sun, 12 Mar 2017 22:36:21 -0700
Subject: [PATCH 02/43] make vcat error more informative

---
 src/abstractdatatable/abstractdatatable.jl | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/abstractdatatable/abstractdatatable.jl b/src/abstractdatatable/abstractdatatable.jl
index ef98fd5..66e14f6 100644
--- a/src/abstractdatatable/abstractdatatable.jl
+++ b/src/abstractdatatable/abstractdatatable.jl
@@ -724,7 +724,12 @@ function Base.vcat(dts::AbstractDataTable...)
     if length(uniqueheaders) == 0
         return DataTable()
     elseif length(unique(map(length, uniqueheaders))) > 1
-        throw(ArgumentError("not all DataTables have the same number of columns. Resolve column(s): $(setdiff(union(allheaders...), intersect(allheaders...)))"))
+        estring = Vector{String}(length(uniqueheaders))
+        for (i,u) in enumerate(uniqueheaders)
+            indices = string.(find(x -> x == u, allheaders))
+            estring[i] = "columns ($(join(u, ", "))) of input(s) ($(join(indices, ", ")))"
+        end
+        throw(ArgumentError(join(estring, " != ")))
     elseif length(uniqueheaders) > 1
         throw(ArgumentError("Column names do not match. Use `rename!` or `names!` to adjust columns names. Resolve column(s): $(setdiff(union(allheaders...), intersect(allheaders...)))"))
     else

From f5a53a1fed7118b6f0a4b9c148db37e0d8478c52 Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Sun, 12 Mar 2017 23:16:51 -0700
Subject: [PATCH 03/43] add docstring for vcat

---
 src/abstractdatatable/abstractdatatable.jl | 29 +++++++++++++++++-----
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/src/abstractdatatable/abstractdatatable.jl b/src/abstractdatatable/abstractdatatable.jl
index 66e14f6..104fac0 100644
--- a/src/abstractdatatable/abstractdatatable.jl
+++ b/src/abstractdatatable/abstractdatatable.jl
@@ -710,11 +710,30 @@ Base.hcat(dt1::AbstractDataTable, dt2::AbstractDataTable) = hcat!(dt[:, :], dt2)
 Base.hcat(dt::AbstractDataTable, x, y...) = hcat!(hcat(dt, x), y...)
 Base.hcat(dt1::AbstractDataTable, dt2::AbstractDataTable, dtn::AbstractDataTable...) = hcat!(hcat(dt1, dt2), dtn...)
 
-# vcat only accepts DataTables. Finds union of columns, maintaining order
-# of first dt. Missing data become null values.
+"""
+    vcat(dts::AbstractDataTable...)
 
-Base.vcat(dt::AbstractDataTable) = dt
+Vertically concatenate `AbstractDataTables` with matching columns.
+
+```julia
+julia> dt1 = DataTable(A=1:3, B=1:3); dt2 = DataTable(A=4:6, B=4:6); dt3 = DataTable(A=7:9, B=7:9, C=7:9);
 
+julia> vcat(dt1, dt2)
+6×2 DataTables.DataTable
+│ Row │ A │ B │
+├─────┼───┼───┤
+│ 1   │ 1 │ 1 │
+│ 2   │ 2 │ 2 │
+│ 3   │ 3 │ 3 │
+│ 4   │ 4 │ 4 │
+│ 5   │ 5 │ 5 │
+│ 6   │ 6 │ 6 │
+
+julia> vcat(dt1, dt2, dt3)
+ERROR: ArgumentError: columns (A, B) of input(s) (1, 2) != columns (A, B, C) of input(s) (3)
+```
+"""
+Base.vcat(dt::AbstractDataTable) = dt
 function Base.vcat(dts::AbstractDataTable...)
     isempty(dts) && return DataTable()
     allheaders = map(names, dts)
@@ -723,15 +742,13 @@ function Base.vcat(dts::AbstractDataTable...)
     uniqueheaders = unique(allheaders[notempty])
     if length(uniqueheaders) == 0
         return DataTable()
-    elseif length(unique(map(length, uniqueheaders))) > 1
+    elseif length(uniqueheaders) > 1
         estring = Vector{String}(length(uniqueheaders))
         for (i,u) in enumerate(uniqueheaders)
             indices = string.(find(x -> x == u, allheaders))
             estring[i] = "columns ($(join(u, ", "))) of input(s) ($(join(indices, ", ")))"
         end
         throw(ArgumentError(join(estring, " != ")))
-    elseif length(uniqueheaders) > 1
-        throw(ArgumentError("Column names do not match. Use `rename!` or `names!` to adjust columns names. Resolve column(s): $(setdiff(union(allheaders...), intersect(allheaders...)))"))
     else
         header = uniqueheaders[1]
         dts_to_vcat = dts[notempty]

From 2c95f13be47e4ba0e056cc47444ab43ad3ff1bf3 Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Mon, 13 Mar 2017 11:29:05 -0700
Subject: [PATCH 04/43] incorporate edits suggested during review

---
 src/abstractdatatable/abstractdatatable.jl | 10 ++-
 src/abstractdatatable/io.jl                |  2 +-
 src/abstractdatatable/join.jl              | 19 ++++-
 src/abstractdatatable/reshape.jl           |  7 +-
 src/datatable/datatable.jl                 | 84 ++++++++++++++--------
 test/cat.jl                                | 27 +++----
 test/constructors.jl                       | 20 +++---
 test/conversions.jl                        |  2 +-
 8 files changed, 107 insertions(+), 64 deletions(-)

diff --git a/src/abstractdatatable/abstractdatatable.jl b/src/abstractdatatable/abstractdatatable.jl
index 104fac0..19bfe20 100644
--- a/src/abstractdatatable/abstractdatatable.jl
+++ b/src/abstractdatatable/abstractdatatable.jl
@@ -777,6 +777,7 @@ end
 
 Convert columns with a `Nullable` element type without any null values
 to a non-`Nullable` equivalent array type. The table `dt` is modified in place.
+`NullableVectors` are aliased to their `values` field.
 
 # Examples
 
@@ -805,12 +806,12 @@ julia> eltypes(dt)
  Int64
 ```
 
-See also [`denullify`](@ref) & [`nullify!`](@ref).
+See also [`denullify`](@ref) and [`nullify!`](@ref).
 """
 function denullify!(dt::AbstractDataTable)
     for i in 1:size(dt,2)
         if !anynull(dt[i])
-            dt[i] = dropnull(dt[i])
+            dt[i] = dropnull!(dt[i])
         end
     end
     dt
@@ -889,11 +890,14 @@ See also [`nullify`](@ref) & [`denullify!`](@ref).
 """
 function nullify!(dt::AbstractDataTable)
     for i in 1:size(dt,2)
-        dt[i] = NullableArray(dt[i])
+        dt[i] = nullify(dt[i])
     end
     dt
 end
 
+nullify(x::AbstractArray) = convert(NullableArray, x)
+nullify(x::AbstractCategoricalArray) = convert(NullableCategoricalArray, x)
+
 """
     nullify(dt::AbstractDataTable)
 
diff --git a/src/abstractdatatable/io.jl b/src/abstractdatatable/io.jl
index 7d14196..a24493e 100644
--- a/src/abstractdatatable/io.jl
+++ b/src/abstractdatatable/io.jl
@@ -45,7 +45,7 @@ function printtable(io::IO,
             if !isnull(dt[j][i])
                 if ! (etypes[j] <: Real)
                     print(io, quotemark)
-                    x = isa(dt[i, j], Nullable) ? get(dt[i, j]) : dt[i, j]
+                    x = isa(dt[i, j], Nullable) ? _unsafe_get(dt[i, j]) : dt[i, j]
                     escapedprint(io, x, quotestr)
                     print(io, quotemark)
                 else
diff --git a/src/abstractdatatable/join.jl b/src/abstractdatatable/join.jl
index 94e9f1d..ede5c77 100644
--- a/src/abstractdatatable/join.jl
+++ b/src/abstractdatatable/join.jl
@@ -2,6 +2,19 @@
 ## Join / merge
 ##
 
+# Like similar, but returns a nullable array
+similar_nullable{T}(dv::AbstractArray{T}, dims::Union{Int, Tuple{Vararg{Int}}}) =
+    NullableArray(T, dims)
+
+similar_nullable{T<:Nullable}(dv::AbstractArray{T}, dims::Union{Int, Tuple{Vararg{Int}}}) =
+    NullableArray(eltype(T), dims)
+
+similar_nullable{T,R}(dv::CategoricalArray{T,R}, dims::Union{Int, Tuple{Vararg{Int}}}) =
+    NullableCategoricalArray(T, dims)
+
+similar_nullable(dt::AbstractDataTable, dims::Int) =
+    DataTable(Any[similar_nullable(x, dims) for x in columns(dt)], copy(index(dt)))
+
 # helper structure for DataTables joining
 immutable DataTableJoiner{DT1<:AbstractDataTable, DT2<:AbstractDataTable}
     dtl::DT1
@@ -64,9 +77,9 @@ function compose_joined_table(joiner::DataTableJoiner,
     end
     all_orig_right_ixs = vcat(right_ixs.orig, rightonly_ixs.orig)
     resizelen = length(all_orig_right_ixs)+length(leftonly_ixs)
-    rightcols = Any[length(col[all_orig_right_ixs]) >= resizelen ?
-                               resize!(col[all_orig_right_ixs], resizelen)[right_perm] :
-                               NullableArray(vcat(col[all_orig_right_ixs], fill(Nullable(), resizelen - length(col[all_orig_right_ixs]))))[right_perm]
+    rightcols = Any[length(all_orig_right_ixs) >= resizelen ?
+                       resize!(col[all_orig_right_ixs], resizelen)[right_perm] :
+                       copy!(similar_nullable(col[all_orig_right_ixs], resizelen), col[all_orig_right_ixs])[right_perm]
                     for col in columns(dtr_noon)]
     right_dt = DataTable(rightcols, names(dtr_noon))
     # merge left and right parts of the joined table
diff --git a/src/abstractdatatable/reshape.jl b/src/abstractdatatable/reshape.jl
index 60fb485..a537cca 100644
--- a/src/abstractdatatable/reshape.jl
+++ b/src/abstractdatatable/reshape.jl
@@ -204,14 +204,19 @@ function unstack(dt::AbstractDataTable, rowkey::Int, colkey::Int, value::Int)
     end
     payload = DataTable(Any[NullableVector{T}(Nrow) for i in 1:Ncol],
                         map(Symbol, levels(keycol)))
+    nowarning = true
     for k in 1:nrow(dt)
         j = Int(CategoricalArrays.order(keycol.pool)[keycol.refs[k]])
         i = Int(CategoricalArrays.order(refkeycol.pool)[refkeycol.refs[k]])
         if i > 0 && j > 0
+            if nowarning && !isnull(payload[j][i])
+                warn("Duplicate entries in unstack.")
+                nowarning = false
+            end
             payload[j][i]  = valuecol[k]
         end
     end
-    denullify!(insert!(payload, 1, levels(refkeycol), _names(dt)[rowkey]))
+    denullify!(insert!(payload, 1, NullableArray(levels(refkeycol)), _names(dt)[rowkey]))
 end
 unstack(dt::AbstractDataTable, rowkey, colkey, value) =
     unstack(dt, index(dt)[rowkey], index(dt)[colkey], index(dt)[value])
diff --git a/src/datatable/datatable.jl b/src/datatable/datatable.jl
index c39feb2..eed2e0a 100644
--- a/src/datatable/datatable.jl
+++ b/src/datatable/datatable.jl
@@ -77,25 +77,42 @@ type DataTable <: AbstractDataTable
         if length(columns) == length(colindex) == 0
             return new(Vector{Any}(0), Index())
         elseif length(columns) != length(colindex)
-            throw(DimensionMismatch("Number of columns and column names are different"))
+            throw(DimensionMismatch("Number of columns ($(length(columns))) and column names ($(length(colindex))) are not equal"))
         end
+        # do we allow people assigning arrays to columns now?
+        # make sure that doesn't work
+        # can use !get(size(c, 2), 0)
         lengths = length.(columns)
         minlen, maxlen = extrema(lengths)
         if minlen == 0 && maxlen == 0
             return new(columns, colindex)
-        elseif (minlen == 0 && maxlen > 0) || any(x -> x != 0, mod(maxlen, lengths))
-            throw(DimensionMismatch("Incompatible lengths of arguments"))
-        else
-            for i in 1:length(columns)
-                if isa(columns[i], Range)
-                    columns[i] = collect(columns[i])
+        elseif minlen != maxlen
+            # recycle scalars
+            if minlen == 1 && maxlen > 1
+                indices = find(lengths .== minlen)
+                for i in indices
+                    if !(typeof(columns[i]) <: AbstractArray)
+                        columns[i] = fill(columns[i], maxlen)
+                        lengths[i] = maxlen
+                    end
                 end
-                repeats = div(maxlen, length(columns[i]))
-                if repeats == 1 && !(typeof(columns[i]) <: AbstractVector)
-                    columns[i] = [columns[i]]
-                elseif repeats !== 1
-                    columns[i] = isa(columns[i], Array) ? repeat(columns[i], outer=repeats) : fill(columns[i], repeats)
+            end
+            uniques = unique(lengths)
+            if length(uniques) != 1
+                estring = Vector{String}(length(uniques))
+                strnames = string.(names(colindex))
+                for (i,u) in enumerate(uniques)
+                    indices = find(lengths .== u)
+                    estring[i] = "column length ($(lengths[1])) for column(s) ($(join(strnames[indices], ", ")))"
                 end
+                throw(DimensionMismatch(join(estring, " is incompatible with ")))
+            end
+        end
+        for (i,c) in enumerate(columns)
+            if isa(c, Range)
+                columns[i] = collect(c)
+            elseif !isa(c, AbstractVector)
+                columns[i] =  size(c, 2) > 1 ? reshape(c, length(c)) : [c]
             end
         end
         return new(columns, colindex)
@@ -106,14 +123,18 @@ function DataTable(; kwargs...)
     if length(kwargs) == 0
         return DataTable(Any[], Index())
     end
-    columns = Any[v for (k,v) in kwargs]
-    colindex = DataTables.Index([k for (k,v) in kwargs])
-    DataTable(columns, colindex)
+    colnames = Vector{Symbol}(length(kwargs))
+    columns = Vector{Any}(length(kwargs))
+    for (i,(k,v)) in enumerate(kwargs)
+        colnames[i] = Symbol(k)
+        columns[i] = v
+    end
+    DataTable(columns, Index(colnames))
 end
 
 function DataTable(columns::AbstractVector,
-                   cnames::AbstractVector{Symbol} = gennames(length(columns)))
-    return DataTable(convert(Vector{Any}, columns), Index(convert(Vector{Symbol}, cnames)))
+                   cnames::Vector{Symbol} = gennames(length(columns)))
+    return DataTable(convert(Vector{Any}, columns), Index(cnames))
 end
 
 
@@ -128,37 +149,40 @@ function DataTable(t::Type, nrows::Integer, ncols::Integer)
 end
 
 # Initialize an empty DataTable with specific eltypes and names
-function DataTable(column_eltypes::Vector, cnames::Vector, nrows::Integer)
+function DataTable(column_eltypes::Vector{DataType}, cnames::Vector{Symbol}, nrows::Integer)
     p = length(column_eltypes)
     columns = Vector{Any}(p)
     for j in 1:p
-        columns[j] = Vector{column_eltypes[j]}(nrows)
+        T = column_eltypes[j]
+        columns[j] = T <: Nullable ? NullableArray{eltype(T)}(nrows) : Vector{T}(nrows)
     end
     return DataTable(columns, Index(cnames))
 end
 # Initialize an empty DataTable with specific eltypes and names
 # and whether a nominal array should be created
-function DataTable(column_eltypes::Vector, cnames::Vector,
+function DataTable(column_eltypes::Vector{DataType}, cnames::Vector{Symbol},
                    nominal::Vector{Bool}, nrows::Integer)
     p = length(column_eltypes)
     columns = Vector{Any}(p)
     for j in 1:p
-      if nominal[j]
-        columns[j] = CategoricalVector{column_eltypes[j]}(nrows)
-      else
-        columns[j] = Vector{column_eltypes[j]}(nrows)
-      end
+        T = column_eltypes[j]
+        if nominal[j]
+            columns[j] = T <: Nullable ? NullableCategoricalArray{T}(nrows) : CategoricalVector{T}(nrows)
+        else
+            columns[j] = T <: Nullable ? NullableArray{T}(nrows) : Vector{T}(nrows)
+        end
     end
     return DataTable(columns, Index(cnames))
 end
 
 # Initialize an empty DataTable with specific eltypes
-function DataTable(column_eltypes::Vector, nrows::Integer)
+function DataTable(column_eltypes::Vector{DataType}, nrows::Integer)
     p = length(column_eltypes)
     columns = Vector{Any}(p)
     cnames = gennames(p)
     for j in 1:p
-        columns[j] = Vector{column_eltypes[j]}(nrows)
+        T = column_eltypes[j]
+        columns[j] = T <: Nullable ? NullableArray{T}(nrows) : Vector{T}(nrows)
     end
     return DataTable(columns, Index(cnames))
 end
@@ -806,8 +830,10 @@ function Base.convert(::Type{DataTable}, A::Matrix)
 end
 
 function Base.convert(::Type{DataTable}, d::Associative)
-    colnames = collect(keys(d))
-    isa(d, Dict) && sort!(colnames)
+    colnames = keys(d)
+    if isa(d, Dict)
+        colnames = sort!(collect(colnames))
+    end
     colindex = Index([Symbol(k) for k in colnames])
     columns = Any[d[c] for c in colnames]
     DataTable(columns, colindex)
diff --git a/test/cat.jl b/test/cat.jl
index 8586767..f26b8e7 100644
--- a/test/cat.jl
+++ b/test/cat.jl
@@ -72,14 +72,14 @@ module TestCat
     dt[1:2, 1:2] = [3,2]
     dt[[true,false,false,true], 2:3] = [2,3]
 
-    vcat([])
-    vcat(null_dt)
-    vcat(null_dt, null_dt)
-    vcat(null_dt, dt)
-    vcat(dt, null_dt)
-    vcat(dt, dt)
-    vcat(dt, dt, dt)
-    @test vcat(DataTable()) == DataTable()
+    @test vcat(null_dt) == DataTable()
+    @test vcat(null_dt, null_dt) == DataTable()
+    @test vcat(null_dt, dt) == dt
+    @test vcat(dt, null_dt) == dt
+    @test all(map((x,y) -> x <: y, eltypes(vcat(dt, dt)), (Float64, Float64, Int)))
+    @test size(vcat(dt, dt)) == (size(dt,1)*2, size(dt,2))
+    @test all(map((x,y) -> x <: y, eltypes(vcat(dt, dt, dt)), (Float64, Float64, Int)))
+    @test size(vcat(dt, dt, dt)) == (size(dt,1)*3, size(dt,2))
 
     alt_dt = deepcopy(dt)
     vcat(dt, alt_dt)
@@ -94,14 +94,8 @@ module TestCat
     @test isequal(dtr, [dt4; dt4])
 
     # Eltype promotion
-    # Fails on Julia 0.4 since promote_type(Nullable{Int}, Nullable{Float64}) gives Nullable{T}
-    if VERSION >= v"0.5.0-dev"
-        @test eltypes(vcat(DataTable(a = [1]), DataTable(a = [2.1]))) == [Float64]
-        @test eltypes(vcat(DataTable(a = NullableArray(Int, 1)), DataTable(a = [2.1]))) == [Nullable{Float64}]
-    else
-        @test eltypes(vcat(DataTable(a = [1]), DataTable(a = [2.1]))) == [Any]
-        @test eltypes(vcat(DataTable(a = NullableArray(Int, 1)), DataTable(a = [2.1]))) == [Nullable{Any}]
-    end
+    @test eltypes(vcat(DataTable(a = [1]), DataTable(a = [2.1]))) == [Float64]
+    @test eltypes(vcat(DataTable(a = NullableArray(Int, 1)), DataTable(a = [2.1]))) == [Nullable{Float64}]
 
     # Minimal container type promotion
     dta = DataTable(a = CategoricalArray([1, 2, 2]))
@@ -109,6 +103,7 @@ module TestCat
     dtc = DataTable(a = NullableArray([2, 3, 4]))
     dtd = DataTable(Any[2:4], [:a])
     dtab = vcat(dta, dtb)
+    dtac = vcat(nullify(dta), dtc)
     @test isequal(dtab[:a], [1, 2, 2, 2, 3, 4])
     @test isa(dtab[:a], CategoricalVector{Int})
     dc = vcat(dtd, dtc)
diff --git a/test/constructors.jl b/test/constructors.jl
index 70500c6..2c080eb 100644
--- a/test/constructors.jl
+++ b/test/constructors.jl
@@ -18,8 +18,6 @@ module TestConstructors
 
     @test isequal(dt, DataTable(Any[NullableCategoricalVector(zeros(3)),
                                     NullableCategoricalVector(ones(3))]))
-    @test !isequal(dt, DataTable(x1 = [0.0, 0.0, 0.0],
-                                 x2 = [1.0, 1.0, 1.0]))
 
     dt2 = convert(DataTable, [0.0 1.0;
                               0.0 1.0;
@@ -28,19 +26,21 @@ module TestConstructors
     @test isequal(dt[:x1], NullableArray(dt2[:x1]))
     @test isequal(dt[:x2], NullableArray(dt2[:x2]))
 
-    @test isequal(dt, DataTable(x1 = NullableCategoricalVector([0.0, 0.0, 0.0]),
-                                x2 = NullableCategoricalVector([1.0, 1.0, 1.0])))
-    @test isequal(dt, DataTable(x1 = NullableCategoricalVector([0.0, 0.0, 0.0]),
-                                x2 = NullableCategoricalVector([1.0, 1.0, 1.0]),
+    @test isequal(dt, DataTable(x1 = NullableArray([0.0, 0.0, 0.0]),
+                                x2 = NullableArray([1.0, 1.0, 1.0])))
+    @test isequal(dt, DataTable(x1 = NullableArray([0.0, 0.0, 0.0]),
+                                x2 = NullableArray([1.0, 1.0, 1.0]),
                                 x3 = [2.0, 2.0, 2.0])[[:x1, :x2]])
 
     dt = DataTable(Int, 2, 2)
     @test size(dt) == (2, 2)
     @test eltypes(dt) == [Int, Int]
 
-    dt = DataTable([Int, Float64], [:x1, :x2], 2)
+    dt = DataTable([Nullable{Int}, Nullable{Float64}], [:x1, :x2], 2)
     @test size(dt) == (2, 2)
-    @test eltypes(dt) == [Int, Float64]
+    @test eltypes(dt) == [Nullable{Int}, Nullable{Float64}]
+
+    @test isequal(dt, DataTable([Nullable{Int}, Nullable{Float64}], 2))
 
     @test_throws BoundsError SubDataTable(DataTable(A=1), 0)
     @test_throws BoundsError SubDataTable(DataTable(A=1), 0)
@@ -51,12 +51,12 @@ module TestConstructors
     @test DataTable(a=1, b=1:2) == DataTable(a=[1,1], b=[1,2])
 
     @testset "associative" begin
-        dt = DataTable(Dict(k => v for (k,v) in zip([:A, :B], [1:3, 4:6])))
+        dt = DataTable(Dict(:A => 1:3, :B => 4:6))
         @test dt == DataTable(A = 1:3, B = 4:6)
+        @test all(e -> e <: Int, eltypes(dt))
     end
 
     @testset "recyclers" begin
-        @test DataTable([collect(1:10), collect(1:20)], [:x, :y]) == DataTable(x = vcat(1:10, 1:10), y = 1:20)
         @test DataTable(a = 1:5, b = 1) == DataTable(a = collect(1:5), b = fill(1, 5))
         @test DataTable(a = 1, b = 1:5) == DataTable(a = fill(1, 5), b = collect(1:5))
     end
diff --git a/test/conversions.jl b/test/conversions.jl
index 385b89d..8bf9465 100644
--- a/test/conversions.jl
+++ b/test/conversions.jl
@@ -73,7 +73,7 @@ module TestConversions
     @test isequal(dt[:b], b)
     @test isequal(dt[:c], c)
 
-    a = [1.0]
+    a = 1.0
     di = Dict("a"=>a, "b"=>b, "c"=>c)
     @test convert(DataTable,di)[:a] == [1.0, 1.0]
 

From 412ceaa6fa3bd978861fb46d65a4c3dd01c432d1 Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Mon, 13 Mar 2017 11:36:51 -0700
Subject: [PATCH 05/43] _unsafe_get -> NullableArrays.unsafe_get

---
 src/abstractdatatable/io.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/abstractdatatable/io.jl b/src/abstractdatatable/io.jl
index a24493e..03174d9 100644
--- a/src/abstractdatatable/io.jl
+++ b/src/abstractdatatable/io.jl
@@ -45,7 +45,7 @@ function printtable(io::IO,
             if !isnull(dt[j][i])
                 if ! (etypes[j] <: Real)
                     print(io, quotemark)
-                    x = isa(dt[i, j], Nullable) ? _unsafe_get(dt[i, j]) : dt[i, j]
+                    x = isa(dt[i, j], Nullable) ? NullableArrays.unsafe_get(dt[i, j]) : dt[i, j]
                     escapedprint(io, x, quotestr)
                     print(io, quotemark)
                 else
@@ -168,7 +168,7 @@ function Base.show(io::IO, ::MIME"text/latex", dt::AbstractDataTable)
             write(io, " & ")
             cell = dt[row,col]
             if !isnull(cell)
-                content = isa(cell, Nullable) ? get(cell) : cell
+                content = isa(cell, Nullable) ? NullableArrays.unsafe_get(cell) : cell
                 if mimewritable(MIME("text/latex"), content)
                     show(io, MIME("text/latex"), content)
                 else

From cc95658a60e2e8c3d25a9958e09a0d0a5617e246 Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Mon, 13 Mar 2017 12:14:54 -0700
Subject: [PATCH 06/43] fix new tests from master

---
 test/grouping.jl | 16 ++++++++--------
 test/io.jl       |  2 +-
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/test/grouping.jl b/test/grouping.jl
index 6fd058f..b7e22a5 100644
--- a/test/grouping.jl
+++ b/test/grouping.jl
@@ -13,8 +13,8 @@ module TestGrouping
     @testset "colwise" begin
         @testset "::Function, ::AbstractDataTable" begin
             cw = colwise(sum, dt)
-            answer = NullableArray([20, 12, -0.4283098098931877])
-            @test isa(cw, NullableArray{Any, 1})
+            answer = Real[20, 12, -0.4283098098931877]
+            @test isa(cw, Array{Real, 1})
             @test size(cw) == (ncol(dt),)
             @test isequal(cw, answer)
 
@@ -32,8 +32,8 @@ module TestGrouping
 
         @testset "::Vector, ::AbstractDataTable" begin
             cw = colwise([sum], dt)
-            answer = NullableArray([20 12 -0.4283098098931877])
-            @test isa(cw, NullableArray{Any, 2})
+            answer = Real[20 12 -0.4283098098931877]
+            @test isa(cw, Array{Real, 2})
             @test size(cw) == (length([sum]),ncol(dt))
             @test isequal(cw, answer)
 
@@ -59,8 +59,8 @@ module TestGrouping
 
         @testset "::Tuple, ::AbstractDataTable" begin
             cw = colwise((sum, length), dt)
-            answer = Any[Nullable(20) Nullable(12) Nullable(-0.4283098098931877); 8 8 8]
-            @test isa(cw, Array{Any, 2})
+            answer = Real[20 12 -0.4283098098931877; 8 8 8]
+            @test isa(cw, Array{Real, 2})
             @test size(cw) == (length((sum, length)), ncol(dt))
             @test isequal(cw, answer)
 
@@ -87,11 +87,11 @@ module TestGrouping
 
         @testset "::Function" begin
             cw = map(colwise(sum), (nullfree, dt))
-            answer = ([55], NullableArray(Any[20, 12, -0.4283098098931877]))
+            answer = ([55], Real[20, 12, -0.4283098098931877])
             @test isequal(cw, answer)
 
             cw = map(colwise((sum, length)), (nullfree, dt))
-            answer = (reshape([55, 10], (2,1)), Any[Nullable(20) Nullable(12) Nullable(-0.4283098098931877); 8 8 8])
+            answer = (reshape([55, 10], (2,1)), Real[20 12 -0.4283098098931877; 8 8 8])
             @test isequal(cw, answer)
 
             cw = map(colwise([sum, length]), (nullfree, dt))
diff --git a/test/io.jl b/test/io.jl
index 949cb27..1023c3f 100644
--- a/test/io.jl
+++ b/test/io.jl
@@ -48,6 +48,6 @@ module TestIO
                    F = NullableArray(fill(Nullable(), 26)),
                    G = fill(Nullable(), 26))
 
-    answer = Sys.WORD_SIZE == 64 ? 0xde54e70f51205910 : 0x340524cd
+    answer = Sys.WORD_SIZE == 64 ? 0xd4b5a035796ad770 : 0x1950ccd7
     @test hash(sprint(printtable, dt)) == answer
 end

From 06dc914bf5ebc03bebb007630cf2e7711ff75319 Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Mon, 13 Mar 2017 22:54:45 -0700
Subject: [PATCH 07/43] remove RepeatedVector, StackedVector, unstackdt, meltdt

---
 docs/src/lib/manipulation.md               |   2 -
 docs/src/man/reshaping_and_pivoting.md     |  23 --
 src/DataTables.jl                          |   2 -
 src/abstractdatatable/abstractdatatable.jl |   5 +-
 src/abstractdatatable/reshape.jl           | 320 ++-------------------
 src/datatable/datatable.jl                 |  12 +-
 src/deprecated.jl                          |   4 +-
 test/data.jl                               |  16 +-
 test/datatable.jl                          |  15 +-
 test/show.jl                               |   7 -
 10 files changed, 40 insertions(+), 366 deletions(-)

diff --git a/docs/src/lib/manipulation.md b/docs/src/lib/manipulation.md
index c67345a..8d24d4b 100644
--- a/docs/src/lib/manipulation.md
+++ b/docs/src/lib/manipulation.md
@@ -20,6 +20,4 @@ join
 melt
 stack
 unstack
-stackdt
-meltdt
 ```
diff --git a/docs/src/man/reshaping_and_pivoting.md b/docs/src/man/reshaping_and_pivoting.md
index 1b936e1..d99e814 100644
--- a/docs/src/man/reshaping_and_pivoting.md
+++ b/docs/src/man/reshaping_and_pivoting.md
@@ -53,29 +53,6 @@ If the remaining columns are unique, you can skip the id variable and use:
 widedt = unstack(longdt, :variable, :value)
 ```
 
-`stackdt` and `meltdt` are two additional functions that work like `stack` and `melt`, but they provide a view into the original wide DataTable. Here is an example:
-
-```julia
-d = stackdt(iris)
-```
-
-This saves memory. To create the view, several AbstractVectors are defined:
-
-`:variable` column -- `EachRepeatedVector`  
-This repeats the variables N times where N is the number of rows of the original AbstractDataTable.
-
-`:value` column -- `StackedVector`  
-This is provides a view of the original columns stacked together.
-
-Id columns -- `RepeatedVector`  
-This repeats the original columns N times where N is the number of columns stacked.
-
-For more details on the storage representation, see:
-
-```julia
-dump(stackdt(iris))
-```
-
 None of these reshaping functions perform any aggregation. To do aggregation, use the split-apply-combine functions in combination with reshaping. Here is an example:
 
 ```julia
diff --git a/src/DataTables.jl b/src/DataTables.jl
index 799f7f6..4b89a3b 100644
--- a/src/DataTables.jl
+++ b/src/DataTables.jl
@@ -57,7 +57,6 @@ export @~,
        eltypes,
        groupby,
        melt,
-       meltdt,
        names!,
        ncol,
        nonunique,
@@ -71,7 +70,6 @@ export @~,
        rename,
        showcols,
        stack,
-       stackdt,
        unique!,
        unstack,
        head,
diff --git a/src/abstractdatatable/abstractdatatable.jl b/src/abstractdatatable/abstractdatatable.jl
index 19bfe20..583ecba 100644
--- a/src/abstractdatatable/abstractdatatable.jl
+++ b/src/abstractdatatable/abstractdatatable.jl
@@ -777,7 +777,6 @@ end
 
 Convert columns with a `Nullable` element type without any null values
 to a non-`Nullable` equivalent array type. The table `dt` is modified in place.
-`NullableVectors` are aliased to their `values` field.
 
 # Examples
 
@@ -852,7 +851,7 @@ julia> eltypes(dt)
 
 See also [`denullify!`] & [`nullify`](@ref).
 """
-denullify(dt::AbstractDataTable) = denullify!(copy(dt))
+denullify(dt::AbstractDataTable) = denullify!(deepcopy(dt))
 
 """
     nullify!(dt::AbstractDataTable)
@@ -933,7 +932,7 @@ julia> eltypes(dt)
 See also [`nullify!`](@ref) & [`denullify`](@ref).
 """
 function nullify(dt::AbstractDataTable)
-    nullify!(copy(dt))
+    nullify!(deepcopy(dt))
 end
 
 ## Documentation for methods defined elsewhere
diff --git a/src/abstractdatatable/reshape.jl b/src/abstractdatatable/reshape.jl
index a537cca..5234864 100644
--- a/src/abstractdatatable/reshape.jl
+++ b/src/abstractdatatable/reshape.jl
@@ -53,11 +53,6 @@ melt(dt::AbstractDataTable, [id_vars], [measure_vars];
   column `:variable` a Vector of Symbols with the `measure_vars` name,
   and with columns for each of the `id_vars`.
 
-See also `stackdt` and `meltdt` for stacking methods that return a
-view into the original DataTable. See `unstack` for converting from
-long to wide format.
-
-
 ### Examples
 
 ```julia
@@ -98,7 +93,7 @@ function stack(dt::AbstractDataTable, measure_vars::Vector{Int}, id_var::Int;
 end
 function stack(dt::AbstractDataTable, measure_var::Int, id_vars::Vector{Int};
                variable_name::Symbol=:variable, value_name::Symbol=:value)
-    stackdt(dt, [measure_var], id_vars;
+    stack(dt, [measure_var], id_vars;
             variable_name=variable_name, value_name=value_name)
 end
 function stack(dt::AbstractDataTable, measure_vars, id_vars;
@@ -193,30 +188,19 @@ function unstack(dt::AbstractDataTable, rowkey::Int, colkey::Int, value::Int)
     # `rowkey` integer indicating which column to place along rows
     # `colkey` integer indicating which column to place along column headers
     # `value` integer indicating which column has values
-    refkeycol = NullableCategoricalArray(dt[rowkey])
-    valuecol = dt[value]
-    keycol = NullableCategoricalArray(dt[colkey])
-    Nrow = length(refkeycol.pool)
-    Ncol = length(keycol.pool)
-    T = eltype(valuecol)
-    if T <: Nullable
-        T = eltype(T)
-    end
-    payload = DataTable(Any[NullableVector{T}(Nrow) for i in 1:Ncol],
-                        map(Symbol, levels(keycol)))
-    nowarning = true
-    for k in 1:nrow(dt)
-        j = Int(CategoricalArrays.order(keycol.pool)[keycol.refs[k]])
-        i = Int(CategoricalArrays.order(refkeycol.pool)[refkeycol.refs[k]])
-        if i > 0 && j > 0
-            if nowarning && !isnull(payload[j][i])
-                warn("Duplicate entries in unstack.")
-                nowarning = false
-            end
-            payload[j][i]  = valuecol[k]
-        end
+    anchor = dt[rowkey]
+    values = dt[value]
+    newcols = dt[colkey]
+    uniquenewcols = unique(newcols)
+    nrow = length(anchor)
+    ncol = length(uniquenewcols) + 1
+    columns = Vector{Any}(ncol)
+    columns[1] = unique(anchor)
+    for (i,coli) in enumerate(2:ncol)
+        columns[coli] = values[find(newcols .== uniquenewcols[i])]
     end
-    denullify!(insert!(payload, 1, NullableArray(levels(refkeycol)), _names(dt)[rowkey]))
+    colnames = vcat(names(dt)[rowkey], Symbol.(uniquenewcols))
+    DataTable(columns, colnames)
 end
 unstack(dt::AbstractDataTable, rowkey, colkey, value) =
     unstack(dt, index(dt)[rowkey], index(dt)[colkey], index(dt)[value])
@@ -226,278 +210,16 @@ unstack(dt::AbstractDataTable, colkey, value) =
     unstack(dt, index(dt)[colkey], index(dt)[value])
 
 function unstack(dt::AbstractDataTable, colkey::Int, value::Int)
-    # group on anything not a key or value:
-    g = groupby(dt, setdiff(_names(dt), _names(dt)[[colkey, value]]), sort=true)
-    groupidxs = [g.idx[g.starts[i]:g.ends[i]] for i in 1:length(g.starts)]
-    rowkey = zeros(Int, size(dt, 1))
-    for i in 1:length(groupidxs)
-        rowkey[groupidxs[i]] = i
-    end
-    keycol = NullableCategoricalArray(dt[colkey])
-    valuecol = dt[value]
-    dt1 = dt[g.idx[g.starts], g.cols]
-    Nrow = length(g)
-    Ncol = length(levels(keycol))
-    T = eltype(valuecol)
-    if T <: Nullable
-        T = eltype(T)
-    end
-    dt2 = DataTable(Any[NullableVector{T}(Nrow) for i in 1:Ncol],
-                    map(@compat(Symbol), levels(keycol)))
-    for k in 1:nrow(dt)
-        j = Int(CategoricalArrays.order(keycol.pool)[keycol.refs[k]])
-        i = rowkey[k]
-        if i > 0 && j > 0
-            dt2[j][i]  = valuecol[k]
+    anchor = unique(dt[deleteat!(names(dt), [colkey, value])])
+    groups = groupby(dt, names(anchor))
+    newcolnames = unique(dt[colkey])
+    newcols = DataTable(Any[typeof(dt[value])(size(anchor,1)) for n in newcolnames], Symbol.(newcolnames))
+    for (i, g) in enumerate(groups)
+        for col in newcolnames
+            newcols[i, Symbol(col)] = g[g[colkey] .== col, value][1]
         end
     end
-    denullify!(hcat(dt1, dt2))
+    hcat(anchor, newcols)
 end
 
 unstack(dt::AbstractDataTable) = unstack(dt, :id, :variable, :value)
-
-
-##############################################################################
-##
-## Reshaping using referencing (issue #145)
-## New AbstractVector types (all read only):
-##     StackedVector
-##     RepeatedVector
-##
-##############################################################################
-
-"""
-An AbstractVector{Any} that is a linear, concatenated view into
-another set of AbstractVectors
-
-NOTE: Not exported.
-
-### Constructor
-
-```julia
-StackedVector(d::AbstractVector...)
-```
-
-### Arguments
-
-* `d...` : one or more AbstractVectors
-
-### Examples
-
-```julia
-StackedVector(Any[[1,2], [9,10], [11,12]])  # [1,2,9,10,11,12]
-```
-
-"""
-type StackedVector <: AbstractVector{Any}
-    components::Vector{Any}
-end
-
-function Base.getindex(v::StackedVector,i::Real)
-    lengths = [length(x)::Int for x in v.components]
-    cumlengths = [0; cumsum(lengths)]
-    j = searchsortedlast(cumlengths .+ 1, i)
-    if j > length(cumlengths)
-        error("indexing bounds error")
-    end
-    k = i - cumlengths[j]
-    if k < 1 || k > length(v.components[j])
-        error("indexing bounds error")
-    end
-    v.components[j][k]
-end
-
-function Base.getindex{I<:Real}(v::StackedVector,i::AbstractVector{I})
-    result = similar(v.components[1], length(i))
-    for idx in 1:length(i)
-        result[idx] = v[i[idx]]
-    end
-    result
-end
-
-Base.size(v::StackedVector) = (length(v),)
-Base.length(v::StackedVector) = sum(map(length, v.components))
-Base.ndims(v::StackedVector) = 1
-Base.eltype(v::StackedVector) = promote_type(map(eltype, v.components)...)
-Base.similar(v::StackedVector, T, dims::Dims) = similar(v.components[1], T, dims)
-
-CategoricalArrays.CategoricalArray(v::StackedVector) = CategoricalArray(v[:]) # could be more efficient
-
-
-"""
-An AbstractVector that is a view into another AbstractVector with
-repeated elements
-
-NOTE: Not exported.
-
-### Constructor
-
-```julia
-RepeatedVector(parent::AbstractVector, inner::Int, outer::Int)
-```
-
-### Arguments
-
-* `parent` : the AbstractVector that's repeated
-* `inner` : the numer of times each element is repeated
-* `outer` : the numer of times the whole vector is repeated after
-  expanded by `inner`
-
-`inner` and `outer` have the same meaning as similarly named arguments
-to `repeat`.
-
-### Examples
-
-```julia
-RepeatedVector([1,2], 3, 1)   # [1,1,1,2,2,2]
-RepeatedVector([1,2], 1, 3)   # [1,2,1,2,1,2]
-RepeatedVector([1,2], 2, 2)   # [1,2,1,2,1,2,1,2]
-```
-
-"""
-type RepeatedVector{T} <: AbstractVector{T}
-    parent::AbstractVector{T}
-    inner::Int
-    outer::Int
-end
-
-function Base.getindex{T,I<:Real}(v::RepeatedVector{T},i::AbstractVector{I})
-    N = length(v.parent)
-    idx = Int[Base.fld1(mod1(j,v.inner*N),v.inner) for j in i]
-    v.parent[idx]
-end
-function Base.getindex{T}(v::RepeatedVector{T},i::Real)
-    N = length(v.parent)
-    idx = Base.fld1(mod1(i,v.inner*N),v.inner)
-    v.parent[idx]
-end
-Base.getindex(v::RepeatedVector,i::Range) = getindex(v, [i;])
-
-Base.size(v::RepeatedVector) = (length(v),)
-Base.length(v::RepeatedVector) = v.inner * v.outer * length(v.parent)
-Base.ndims(v::RepeatedVector) = 1
-Base.eltype{T}(v::RepeatedVector{T}) = T
-Base.reverse(v::RepeatedVector) = RepeatedVector(reverse(v.parent), v.inner, v.outer)
-Base.similar(v::RepeatedVector, T, dims::Dims) = similar(v.parent, T, dims)
-Base.unique(v::RepeatedVector) = unique(v.parent)
-
-function CategoricalArrays.CategoricalArray(v::RepeatedVector)
-    res = CategoricalArrays.CategoricalArray(v.parent)
-    res.refs = repeat(res.refs, inner = [v.inner], outer = [v.outer])
-    res
-end
-
-##############################################################################
-##
-## stackdt()
-## meltdt()
-## Reshaping using referencing (issue #145), using the above vector types
-##
-##############################################################################
-
-"""
-A stacked view of a DataTable (long format)
-
-Like `stack` and `melt`, but a view is returned rather than data
-copies.
-
-```julia
-stackdt(dt::AbstractDataTable, [measure_vars], [id_vars];
-        variable_name::Symbol=:variable, value_name::Symbol=:value)
-meltdt(dt::AbstractDataTable, [id_vars], [measure_vars];
-       variable_name::Symbol=:variable, value_name::Symbol=:value)
-```
-
-### Arguments
-
-* `dt` : the wide AbstractDataTable
-
-* `measure_vars` : the columns to be stacked (the measurement
-  variables), a normal column indexing type, like a Symbol,
-  Vector{Symbol}, Int, etc.; for `melt`, defaults to all
-  variables that are not `id_vars`
-
-* `id_vars` : the identifier columns that are repeated during
-  stacking, a normal column indexing type; for `stack` defaults to all
-  variables that are not `measure_vars`
-
-### Result
-
-* `::DataTable` : the long-format datatable with column `:value`
-  holding the values of the stacked columns (`measure_vars`), with
-  column `:variable` a Vector of Symbols with the `measure_vars` name,
-  and with columns for each of the `id_vars`.
-
-The result is a view because the columns are special AbstractVectors
-that return indexed views into the original DataTable.
-
-### Examples
-
-```julia
-d1 = DataTable(a = repeat([1:3;], inner = [4]),
-               b = repeat([1:4;], inner = [3]),
-               c = randn(12),
-               d = randn(12),
-               e = map(string, 'a':'l'))
-
-d1s = stackdt(d1, [:c, :d])
-d1s2 = stackdt(d1, [:c, :d], [:a])
-d1m = meltdt(d1, [:a, :b, :e])
-```
-
-"""
-function stackdt(dt::AbstractDataTable, measure_vars::Vector{Int},
-                 id_vars::Vector{Int}; variable_name::Symbol=:variable,
-                 value_name::Symbol=:value)
-    N = length(measure_vars)
-    cnames = names(dt)[id_vars]
-    insert!(cnames, 1, value_name)
-    insert!(cnames, 1, variable_name)
-    DataTable(Any[RepeatedVector(_names(dt)[measure_vars], nrow(dt), 1),   # variable
-                  StackedVector(Any[dt[:,c] for c in measure_vars]),     # value
-                  [RepeatedVector(dt[:,c], 1, N) for c in id_vars]...],     # id_var columns
-              cnames)
-end
-function stackdt(dt::AbstractDataTable, measure_var::Int, id_var::Int;
-                 variable_name::Symbol=:variable, value_name::Symbol=:value)
-    stackdt(dt, [measure_var], [id_var]; variable_name=variable_name,
-            value_name=value_name)
-end
-function stackdt(dt::AbstractDataTable, measure_vars, id_var::Int;
-                 variable_name::Symbol=:variable, value_name::Symbol=:value)
-    stackdt(dt, measure_vars, [id_var]; variable_name=variable_name,
-            value_name=value_name)
-end
-function stackdt(dt::AbstractDataTable, measure_var::Int, id_vars;
-                 variable_name::Symbol=:variable, value_name::Symbol=:value)
-    stackdt(dt, [measure_var], id_vars; variable_name=variable_name,
-            value_name=value_name)
-end
-function stackdt(dt::AbstractDataTable, measure_vars, id_vars;
-                 variable_name::Symbol=:variable, value_name::Symbol=:value)
-    stackdt(dt, index(dt)[measure_vars], index(dt)[id_vars];
-            variable_name=variable_name, value_name=value_name)
-end
-function stackdt(dt::AbstractDataTable, measure_vars = numeric_vars(dt);
-                 variable_name::Symbol=:variable, value_name::Symbol=:value)
-    m_inds = index(dt)[measure_vars]
-    stackdt(dt, m_inds, _setdiff(1:ncol(dt), m_inds);
-            variable_name=variable_name, value_name=value_name)
-end
-
-"""
-A stacked view of a DataTable (long format); see `stackdt`
-"""
-function meltdt(dt::AbstractDataTable, id_vars; variable_name::Symbol=:variable,
-                value_name::Symbol=:value)
-    id_inds = index(dt)[id_vars]
-    stackdt(dt, _setdiff(1:ncol(dt), id_inds), id_inds;
-            variable_name=variable_name, value_name=value_name)
-end
-function meltdt(dt::AbstractDataTable, id_vars, measure_vars;
-                variable_name::Symbol=:variable, value_name::Symbol=:value)
-    stackdt(dt, measure_vars, id_vars; variable_name=variable_name,
-            value_name=value_name)
-end
-meltdt(dt::AbstractDataTable; variable_name::Symbol=:variable, value_name::Symbol=:value) =
-    stackdt(dt; variable_name=variable_name, value_name=value_name)
diff --git a/src/datatable/datatable.jl b/src/datatable/datatable.jl
index eed2e0a..9d8dd37 100644
--- a/src/datatable/datatable.jl
+++ b/src/datatable/datatable.jl
@@ -103,7 +103,7 @@ type DataTable <: AbstractDataTable
                 strnames = string.(names(colindex))
                 for (i,u) in enumerate(uniques)
                     indices = find(lengths .== u)
-                    estring[i] = "column length ($(lengths[1])) for column(s) ($(join(strnames[indices], ", ")))"
+                    estring[i] = "column length ($(uniques[i])) for column(s) ($(join(strnames[indices], ", ")))"
                 end
                 throw(DimensionMismatch(join(estring, " is incompatible with ")))
             end
@@ -638,16 +638,6 @@ function Base.insert!(dt::DataTable, col_ind::Int, item::AbstractVector, name::S
     dt
 end
 
-# FIXME: Needed to work around a crash: JuliaLang/julia#18299
-function Base.insert!(dt::DataTable, col_ind::Int, item::NullableArray, name::Symbol)
-    0 < col_ind <= ncol(dt) + 1 || throw(BoundsError())
-    size(dt, 1) == length(item) || size(dt, 1) == 0 || error("number of rows does not match")
-
-    insert!(index(dt), col_ind, name)
-    insert!(dt.columns, col_ind, item)
-    dt
-end
-
 function Base.insert!(dt::DataTable, col_ind::Int, item, name::Symbol)
     insert!(dt, col_ind, upgrade_scalar(dt, item), name)
 end
diff --git a/src/deprecated.jl b/src/deprecated.jl
index 6f176a8..83912d7 100644
--- a/src/deprecated.jl
+++ b/src/deprecated.jl
@@ -18,5 +18,5 @@ import Base: keys, values, insert!
 
 @deprecate sub(dt::AbstractDataTable, rows) view(dt, rows)
 
-@deprecate stackdf stackdt
-@deprecate meltdf meltdt
+@deprecate stackdf stack
+@deprecate meltdf melt
diff --git a/test/data.jl b/test/data.jl
index a59b2bc..5f57b8a 100644
--- a/test/data.jl
+++ b/test/data.jl
@@ -169,22 +169,22 @@ module TestData
     d1m_named = melt(d1[[1,3,4]], :a, variable_name=:letter, value_name=:someval)
     @test names(d1m_named) == [:letter, :someval, :a]
 
-    stackdt(d1, :a)
-    d1s = stackdt(d1, [:a, :b])
-    d1s2 = stackdt(d1, [:c, :d])
-    d1s3 = stackdt(d1)
-    d1m = meltdt(d1, [:c, :d, :e])
+    stack(d1, :a)
+    d1s = stack(d1, [:a, :b])
+    d1s2 = stack(d1, [:c, :d])
+    d1s3 = stack(d1)
+    d1m = melt(d1, [:c, :d, :e])
     @test isequal(d1s[1:12, :c], d1[:c])
     @test isequal(d1s[13:24, :c], d1[:c])
     @test isequal(d1s2, d1s3)
     @test names(d1s) == [:variable, :value, :c, :d, :e]
     @test isequal(d1s, d1m)
-    d1m = meltdt(d1[[1,3,4]], :a)
+    d1m = melt(d1[[1,3,4]], :a)
     @test names(d1m) == [:variable, :value, :a]
 
-    d1s_named = stackdt(d1, [:a, :b], variable_name=:letter, value_name=:someval)
+    d1s_named = stack(d1, [:a, :b], variable_name=:letter, value_name=:someval)
     @test names(d1s_named) == [:letter, :someval, :c, :d, :e]
-    d1m_named = meltdt(d1, [:c, :d, :e], variable_name=:letter, value_name=:someval)
+    d1m_named = melt(d1, [:c, :d, :e], variable_name=:letter, value_name=:someval)
     @test names(d1m_named) == [:letter, :someval, :c, :d, :e]
 
     d1s[:id] = [1:12; 1:12]
diff --git a/test/datatable.jl b/test/datatable.jl
index 95ea0a1..6769733 100644
--- a/test/datatable.jl
+++ b/test/datatable.jl
@@ -292,22 +292,19 @@ module TestDataTable
     # Check that reordering levels does not confuse unstack
     levels!(dt[1], ["XXX", "Bob", "Batman"])
     #Unstack specifying a row column
-    dt2 = unstack(dt,:Fish, :Key, :Value)
+    dt2 = unstack(dt, :Fish, :Key, :Value)
     #Unstack without specifying a row column
-    dt3 = unstack(dt,:Key, :Value)
+    dt3 = unstack(dt, :Key, :Value)
     #The expected output
-    dt4 = DataTable(Fish = ["Batman", "Bob", "XXX"],
-                    Color = NullableArray(["Grey", "Red", Nullable()]),
-                    Mass = NullableArray(["18 g", "12 g", Nullable()]))
+    dt4 = DataTable(Fish = ["Bob", "Batman"],
+                    Mass = ["12 g", "18 g"],
+                    Color = ["Red", "Grey"] )
     @test isequal(dt2, dt4)
-    @test isequal(dt3, denullify!(dt4[2:-1:1, :]))
+    @test isequal(dt3, dt4)
     # can't assign Nullable() to a typed column
     #Make sure unstack works with NULLs at the start of the value column
     # dt[1,:Value] = Nullable()
     dt2 = unstack(dt,:Fish, :Key, :Value)
-    #This changes the expected result
-    dt4[2,:Mass] = Nullable()
-    @test !isequal(dt2, dt4)
 
     dt = DataTable(A = 1:10, B = 'A':'J')
     @test !(dt[:,:] === dt)
diff --git a/test/show.jl b/test/show.jl
index 8bbbd78..abad44c 100644
--- a/test/show.jl
+++ b/test/show.jl
@@ -30,13 +30,6 @@ module TestShow
 
     dt = DataTable(A = Vector{String}(3))
 
-    A = DataTables.StackedVector(Any[[1, 2, 3], [4, 5, 6], [7, 8, 9]])
-    show(io, A)
-    A = DataTables.RepeatedVector([1, 2, 3], 5, 1)
-    show(io, A)
-    A = DataTables.RepeatedVector([1, 2, 3], 1, 5)
-    show(io, A)
-
     #Test show output for REPL and similar
     dt = DataTable(Fish = ["Suzy", "Amir"], Mass = [1.5, Nullable()])
     io = IOBuffer()

From c4e218ecb2a0edd8221f6be1d4523bffe6d2be7a Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Mon, 13 Mar 2017 23:26:18 -0700
Subject: [PATCH 08/43] DataFrames doensn't reshape 2d Arrays -> Vectors so
 don't do it here

---
 src/datatable/datatable.jl | 7 ++-----
 test/constructors.jl       | 1 +
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/datatable/datatable.jl b/src/datatable/datatable.jl
index 9d8dd37..5c037d4 100644
--- a/src/datatable/datatable.jl
+++ b/src/datatable/datatable.jl
@@ -79,9 +79,6 @@ type DataTable <: AbstractDataTable
         elseif length(columns) != length(colindex)
             throw(DimensionMismatch("Number of columns ($(length(columns))) and column names ($(length(colindex))) are not equal"))
         end
-        # do we allow people assigning arrays to columns now?
-        # make sure that doesn't work
-        # can use !get(size(c, 2), 0)
         lengths = length.(columns)
         minlen, maxlen = extrema(lengths)
         if minlen == 0 && maxlen == 0
@@ -91,7 +88,7 @@ type DataTable <: AbstractDataTable
             if minlen == 1 && maxlen > 1
                 indices = find(lengths .== minlen)
                 for i in indices
-                    if !(typeof(columns[i]) <: AbstractArray)
+                    if !(typeof(columns[i]) <: AbstractVector)
                         columns[i] = fill(columns[i], maxlen)
                         lengths[i] = maxlen
                     end
@@ -112,7 +109,7 @@ type DataTable <: AbstractDataTable
             if isa(c, Range)
                 columns[i] = collect(c)
             elseif !isa(c, AbstractVector)
-                columns[i] =  size(c, 2) > 1 ? reshape(c, length(c)) : [c]
+                columns[i] =  size(c, 2) > 1 ? throw(DimensionMismatch("columns must be 1-dimensional")) : [c]
             end
         end
         return new(columns, colindex)
diff --git a/test/constructors.jl b/test/constructors.jl
index 2c080eb..c1520d4 100644
--- a/test/constructors.jl
+++ b/test/constructors.jl
@@ -64,6 +64,7 @@ module TestConstructors
     @testset "constructor errors" begin
         @test_throws DimensionMismatch DataTable(a=1, b=[])
         @test_throws DimensionMismatch DataTable(Any[collect(1:10)], DataTables.Index([:A, :B]))
+        @test_throws DimensionMismatch DataTable(A = rand(2,2))
     end
 
     @testset "column types" begin

From e9542261d2d3d532af7e47a1b889ab8fcd5c1d46 Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Tue, 14 Mar 2017 09:42:10 -0700
Subject: [PATCH 09/43] minor cleanup

---
 src/abstractdatatable/reshape.jl | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/abstractdatatable/reshape.jl b/src/abstractdatatable/reshape.jl
index 5234864..e8399d2 100644
--- a/src/abstractdatatable/reshape.jl
+++ b/src/abstractdatatable/reshape.jl
@@ -76,9 +76,9 @@ function stack(dt::AbstractDataTable, measure_vars::Vector{Int},
     cnames = names(dt)[id_vars]
     insert!(cnames, 1, value_name)
     insert!(cnames, 1, variable_name)
-    DataTable(Any[Compat.repeat(_names(dt)[measure_vars], inner=nrow(dt)),   # variable
+    DataTable(Any[repeat(_names(dt)[measure_vars], inner=nrow(dt)),   # variable
                   vcat([dt[c] for c in measure_vars]...),                    # value
-                  [Compat.repeat(dt[c], outer=N) for c in id_vars]...],      # id_var columns
+                  [repeat(dt[c], outer=N) for c in id_vars]...],      # id_var columns
               cnames)
 end
 function stack(dt::AbstractDataTable, measure_var::Int, id_var::Int;
@@ -188,14 +188,12 @@ function unstack(dt::AbstractDataTable, rowkey::Int, colkey::Int, value::Int)
     # `rowkey` integer indicating which column to place along rows
     # `colkey` integer indicating which column to place along column headers
     # `value` integer indicating which column has values
-    anchor = dt[rowkey]
     values = dt[value]
     newcols = dt[colkey]
     uniquenewcols = unique(newcols)
-    nrow = length(anchor)
     ncol = length(uniquenewcols) + 1
     columns = Vector{Any}(ncol)
-    columns[1] = unique(anchor)
+    columns[1] = unique(dt[rowkey])
     for (i,coli) in enumerate(2:ncol)
         columns[coli] = values[find(newcols .== uniquenewcols[i])]
     end

From ed8a5156debcefe6726d091a669e4f8d88f2f0e6 Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Wed, 15 Mar 2017 14:02:16 -0700
Subject: [PATCH 10/43] change (de)nullify back to copy and cleanup docstrings

---
 src/abstractdatatable/abstractdatatable.jl | 25 ++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/src/abstractdatatable/abstractdatatable.jl b/src/abstractdatatable/abstractdatatable.jl
index 583ecba..b51c017 100644
--- a/src/abstractdatatable/abstractdatatable.jl
+++ b/src/abstractdatatable/abstractdatatable.jl
@@ -713,10 +713,15 @@ Base.hcat(dt1::AbstractDataTable, dt2::AbstractDataTable, dtn::AbstractDataTable
 """
     vcat(dts::AbstractDataTable...)
 
-Vertically concatenate `AbstractDataTables` with matching columns.
+Vertically concatenate `AbstractDataTables` that have the same column names in
+the same order.
 
 ```julia
-julia> dt1 = DataTable(A=1:3, B=1:3); dt2 = DataTable(A=4:6, B=4:6); dt3 = DataTable(A=7:9, B=7:9, C=7:9);
+julia> dt1 = DataTable(A=1:3, B=1:3);
+
+julia> dt2 = DataTable(A=4:6, B=4:6);
+
+julia> dt3 = DataTable(A=7:9, B=7:9, C=7:9);
 
 julia> vcat(dt1, dt2)
 6×2 DataTables.DataTable
@@ -778,6 +783,9 @@ end
 Convert columns with a `Nullable` element type without any null values
 to a non-`Nullable` equivalent array type. The table `dt` is modified in place.
 
+Columns in the returned `AbstractDataTable` may alias the columns of the
+input `dt`.
+
 # Examples
 
 ```jldoctest
@@ -822,6 +830,9 @@ end
 Return a copy of `dt` where columns with a `Nullable` element type without any
 null values have been converted to a non-`Nullable` equivalent array type.
 
+Columns in the returned `AbstractDataTable` may alias the columns of the
+input `dt`. If no aliasing is desired, use `denullify!(deepcopy(dt))`.
+
 # Examples
 
 ```jldoctest
@@ -851,13 +862,16 @@ julia> eltypes(dt)
 
 See also [`denullify!`] & [`nullify`](@ref).
 """
-denullify(dt::AbstractDataTable) = denullify!(deepcopy(dt))
+denullify(dt::AbstractDataTable) = denullify!(copy(dt))
 
 """
     nullify!(dt::AbstractDataTable)
 
 Convert all columns of `dt` to nullable arrays. The table `dt` is modified in place.
 
+Columns in the returned `AbstractDataTable` may alias the columns of the
+input `dt`.
+
 # Examples
 
 ```jldoctest
@@ -902,6 +916,9 @@ nullify(x::AbstractCategoricalArray) = convert(NullableCategoricalArray, x)
 
 Return a copy of `dt` with all columns converted to nullable arrays.
 
+Columns in the returned `AbstractDataTable` may alias the columns of the
+input `dt`. If no aliasing is desired, use `nullify!(deepcopy(dt))`.
+
 # Examples
 
 ```jldoctest
@@ -932,7 +949,7 @@ julia> eltypes(dt)
 See also [`nullify!`](@ref) & [`denullify`](@ref).
 """
 function nullify(dt::AbstractDataTable)
-    nullify!(deepcopy(dt))
+    nullify!(copy(dt))
 end
 
 ## Documentation for methods defined elsewhere

From 1636a0cfbd415161de48eb0ccc3e68684296057a Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Wed, 15 Mar 2017 14:02:50 -0700
Subject: [PATCH 11/43] NullableArrays.unsafe_get -> compat(unsafe_get)

---
 src/abstractdatatable/io.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/abstractdatatable/io.jl b/src/abstractdatatable/io.jl
index 03174d9..6f3222b 100644
--- a/src/abstractdatatable/io.jl
+++ b/src/abstractdatatable/io.jl
@@ -45,7 +45,7 @@ function printtable(io::IO,
             if !isnull(dt[j][i])
                 if ! (etypes[j] <: Real)
                     print(io, quotemark)
-                    x = isa(dt[i, j], Nullable) ? NullableArrays.unsafe_get(dt[i, j]) : dt[i, j]
+                    x = isa(dt[i, j], Nullable) ? @compat(unsafe_get(dt[i, j])) : dt[i, j]
                     escapedprint(io, x, quotestr)
                     print(io, quotemark)
                 else
@@ -168,7 +168,7 @@ function Base.show(io::IO, ::MIME"text/latex", dt::AbstractDataTable)
             write(io, " & ")
             cell = dt[row,col]
             if !isnull(cell)
-                content = isa(cell, Nullable) ? NullableArrays.unsafe_get(cell) : cell
+                content = isa(cell, Nullable) ? @compat(unsafe_get(cell)) : cell
                 if mimewritable(MIME("text/latex"), content)
                     show(io, MIME("text/latex"), content)
                 else

From 91233d325967c818d812225aa67ada53fdf4b0cb Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Wed, 15 Mar 2017 14:03:26 -0700
Subject: [PATCH 12/43] default to NullableArray for joins that may introduce
 missing data

---
 src/abstractdatatable/join.jl | 31 ++++++++++++++-----------------
 1 file changed, 14 insertions(+), 17 deletions(-)

diff --git a/src/abstractdatatable/join.jl b/src/abstractdatatable/join.jl
index ede5c77..c8292cc 100644
--- a/src/abstractdatatable/join.jl
+++ b/src/abstractdatatable/join.jl
@@ -12,9 +12,6 @@ similar_nullable{T<:Nullable}(dv::AbstractArray{T}, dims::Union{Int, Tuple{Varar
 similar_nullable{T,R}(dv::CategoricalArray{T,R}, dims::Union{Int, Tuple{Vararg{Int}}}) =
     NullableCategoricalArray(T, dims)
 
-similar_nullable(dt::AbstractDataTable, dims::Int) =
-    DataTable(Any[similar_nullable(x, dims) for x in columns(dt)], copy(index(dt)))
-
 # helper structure for DataTables joining
 immutable DataTableJoiner{DT1<:AbstractDataTable, DT2<:AbstractDataTable}
     dtl::DT1
@@ -44,7 +41,7 @@ Base.length(x::RowIndexMap) = length(x.orig)
 
 # composes the joined data table using the maps between the left and right
 # table rows and the indices of rows in the result
-function compose_joined_table(joiner::DataTableJoiner,
+function compose_joined_table(joiner::DataTableJoiner, kind::Symbol,
                               left_ixs::RowIndexMap, leftonly_ixs::RowIndexMap,
                               right_ixs::RowIndexMap, rightonly_ixs::RowIndexMap)
     @assert length(left_ixs) == length(right_ixs)
@@ -77,9 +74,9 @@ function compose_joined_table(joiner::DataTableJoiner,
     end
     all_orig_right_ixs = vcat(right_ixs.orig, rightonly_ixs.orig)
     resizelen = length(all_orig_right_ixs)+length(leftonly_ixs)
-    rightcols = Any[length(all_orig_right_ixs) >= resizelen ?
-                       resize!(col[all_orig_right_ixs], resizelen)[right_perm] :
-                       copy!(similar_nullable(col[all_orig_right_ixs], resizelen), col[all_orig_right_ixs])[right_perm]
+    rightcols = Any[kind == :inner ?
+                        col[all_orig_right_ixs][right_perm] :
+                        copy!(similar_nullable(col, resizelen), col[all_orig_right_ixs])[right_perm]
                     for col in columns(dtr_noon)]
     right_dt = DataTable(rightcols, names(dtr_noon))
     # merge left and right parts of the joined table
@@ -246,22 +243,22 @@ function Base.join(dt1::AbstractDataTable,
     joiner = DataTableJoiner(dt1, dt2, on)
 
     if kind == :inner
-        compose_joined_table(joiner, update_row_maps!(joiner.dtl_on, joiner.dtr_on,
-                                                      group_rows(joiner.dtr_on),
-                                                      true, false, true, false)...)
+        compose_joined_table(joiner, kind, update_row_maps!(joiner.dtl_on, joiner.dtr_on,
+                                                            group_rows(joiner.dtr_on),
+                                                            true, false, true, false)...)
     elseif kind == :left
-        compose_joined_table(joiner, update_row_maps!(joiner.dtl_on, joiner.dtr_on,
-                                                      group_rows(joiner.dtr_on),
-                                                      true, true, true, false)...)
+        compose_joined_table(joiner, kind, update_row_maps!(joiner.dtl_on, joiner.dtr_on,
+                                                            group_rows(joiner.dtr_on),
+                                                            true, true, true, false)...)
     elseif kind == :right
         right_ixs, rightonly_ixs, left_ixs, leftonly_ixs = update_row_maps!(joiner.dtr_on, joiner.dtl_on,
                                                                             group_rows(joiner.dtl_on),
                                                                             true, true, true, false)
-        compose_joined_table(joiner, left_ixs, leftonly_ixs, right_ixs, rightonly_ixs)
+        compose_joined_table(joiner, kind, left_ixs, leftonly_ixs, right_ixs, rightonly_ixs)
     elseif kind == :outer
-        compose_joined_table(joiner, update_row_maps!(joiner.dtl_on, joiner.dtr_on,
-                                                      group_rows(joiner.dtr_on),
-                                                      true, true, true, true)...)
+        compose_joined_table(joiner, kind, update_row_maps!(joiner.dtl_on, joiner.dtr_on,
+                                                            group_rows(joiner.dtr_on),
+                                                            true, true, true, true)...)
     elseif kind == :semi
         # hash the right rows
         dtr_on_grp = group_rows(joiner.dtr_on)

From 7462612c50bcc436232c2efac2c8275ec2a9b59d Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Wed, 15 Mar 2017 14:04:04 -0700
Subject: [PATCH 13/43] align comments

---
 src/abstractdatatable/reshape.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/abstractdatatable/reshape.jl b/src/abstractdatatable/reshape.jl
index e8399d2..381825b 100644
--- a/src/abstractdatatable/reshape.jl
+++ b/src/abstractdatatable/reshape.jl
@@ -76,9 +76,9 @@ function stack(dt::AbstractDataTable, measure_vars::Vector{Int},
     cnames = names(dt)[id_vars]
     insert!(cnames, 1, value_name)
     insert!(cnames, 1, variable_name)
-    DataTable(Any[repeat(_names(dt)[measure_vars], inner=nrow(dt)),   # variable
-                  vcat([dt[c] for c in measure_vars]...),                    # value
-                  [repeat(dt[c], outer=N) for c in id_vars]...],      # id_var columns
+    DataTable(Any[repeat(_names(dt)[measure_vars], inner=nrow(dt)), # variable
+                  vcat([dt[c] for c in measure_vars]...),           # value
+                  [repeat(dt[c], outer=N) for c in id_vars]...],    # id_var columns
               cnames)
 end
 function stack(dt::AbstractDataTable, measure_var::Int, id_var::Int;

From 9b65533073e6b2feefd53cd69c6ee8ef7a71b3e0 Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Wed, 15 Mar 2017 14:08:10 -0700
Subject: [PATCH 14/43] lots of edits

clarify error messages, fix spacing, address case of length(::Symbol)
and length(::String) not giving the desired output (Vector length), and
unify constructors in accepting AbstractVector{Symbol} for colnames
---
 src/datatable/datatable.jl | 70 ++++++++++++++++----------------------
 1 file changed, 30 insertions(+), 40 deletions(-)

diff --git a/src/datatable/datatable.jl b/src/datatable/datatable.jl
index 5c037d4..28f01b2 100644
--- a/src/datatable/datatable.jl
+++ b/src/datatable/datatable.jl
@@ -77,31 +77,25 @@ type DataTable <: AbstractDataTable
         if length(columns) == length(colindex) == 0
             return new(Vector{Any}(0), Index())
         elseif length(columns) != length(colindex)
-            throw(DimensionMismatch("Number of columns ($(length(columns))) and column names ($(length(colindex))) are not equal"))
+            throw(DimensionMismatch("Number of columns ($(length(columns))) and number of column names ($(length(colindex))) are not equal"))
         end
-        lengths = length.(columns)
+        lengths = [isa(col, AbstractArray) ? length(col) : 1 for col in columns]
         minlen, maxlen = extrema(lengths)
         if minlen == 0 && maxlen == 0
             return new(columns, colindex)
         elseif minlen != maxlen
             # recycle scalars
-            if minlen == 1 && maxlen > 1
-                indices = find(lengths .== minlen)
-                for i in indices
-                    if !(typeof(columns[i]) <: AbstractVector)
-                        columns[i] = fill(columns[i], maxlen)
-                        lengths[i] = maxlen
-                    end
-                end
+            for i in 1:length(columns)
+                typeof(columns[i]) <: AbstractArray && continue
+                columns[i] = fill(columns[i], maxlen)
+                lengths[i] = maxlen
             end
-            uniques = unique(lengths)
-            if length(uniques) != 1
-                estring = Vector{String}(length(uniques))
+            uls = unique(lengths)
+            if length(uls) != 1
+                # estring = Vector{String}(length(uniques))
                 strnames = string.(names(colindex))
-                for (i,u) in enumerate(uniques)
-                    indices = find(lengths .== u)
-                    estring[i] = "column length ($(uniques[i])) for column(s) ($(join(strnames[indices], ", ")))"
-                end
+                estring = ["column length ($(uls[i])) for column(s) ($(join(strnames[find(uls .== u)], ", ")))"
+                           for (i,u) in enumerate(uls)]
                 throw(DimensionMismatch(join(estring, " is incompatible with ")))
             end
         end
@@ -109,7 +103,7 @@ type DataTable <: AbstractDataTable
             if isa(c, Range)
                 columns[i] = collect(c)
             elseif !isa(c, AbstractVector)
-                columns[i] =  size(c, 2) > 1 ? throw(DimensionMismatch("columns must be 1-dimensional")) : [c]
+                columns[i] = size(c, 2) > 1 ? throw(DimensionMismatch("columns must be 1-dimensional")) : [c]
             end
         end
         return new(columns, colindex)
@@ -120,21 +114,16 @@ function DataTable(; kwargs...)
     if length(kwargs) == 0
         return DataTable(Any[], Index())
     end
-    colnames = Vector{Symbol}(length(kwargs))
-    columns = Vector{Any}(length(kwargs))
-    for (i,(k,v)) in enumerate(kwargs)
-        colnames[i] = Symbol(k)
-        columns[i] = v
-    end
+    colnames = [Symbol(k) for (k,v) in kwargs]
+    columns = Any[v for (k,v) in kwargs]
     DataTable(columns, Index(colnames))
 end
 
 function DataTable(columns::AbstractVector,
-                   cnames::Vector{Symbol} = gennames(length(columns)))
-    return DataTable(convert(Vector{Any}, columns), Index(cnames))
+                   cnames::AbstractVector{Symbol} = gennames(length(columns)))
+    return DataTable(convert(Vector{Any}, columns), Index(convert(Vector{Symbol}, cnames)))
 end
 
-
 # Initialize empty DataTable objects of arbitrary size
 function DataTable(t::Type, nrows::Integer, ncols::Integer)
     columns = Vector{Any}(ncols)
@@ -146,40 +135,41 @@ function DataTable(t::Type, nrows::Integer, ncols::Integer)
 end
 
 # Initialize an empty DataTable with specific eltypes and names
-function DataTable(column_eltypes::Vector{DataType}, cnames::Vector{Symbol}, nrows::Integer)
+function DataTable{T<:Type}(column_eltypes::AbstractVector{T}, cnames::AbstractVector{Symbol}, nrows::Integer)
     p = length(column_eltypes)
     columns = Vector{Any}(p)
     for j in 1:p
-        T = column_eltypes[j]
-        columns[j] = T <: Nullable ? NullableArray{eltype(T)}(nrows) : Vector{T}(nrows)
+        colT = column_eltypes[j]
+        columns[j] = colT <: Nullable ? NullableArray{eltype(colT)}(nrows) : Vector{colT}(nrows)
     end
-    return DataTable(columns, Index(cnames))
+    return DataTable(columns, Index(convert(Vector{Symbol}, cnames)))
 end
+
 # Initialize an empty DataTable with specific eltypes and names
 # and whether a nominal array should be created
-function DataTable(column_eltypes::Vector{DataType}, cnames::Vector{Symbol},
-                   nominal::Vector{Bool}, nrows::Integer)
+function DataTable{T<:Type}(column_eltypes::AbstractVector{T}, cnames::AbstractVector{Symbol},
+                              nominal::Vector{Bool}, nrows::Integer)
     p = length(column_eltypes)
     columns = Vector{Any}(p)
     for j in 1:p
-        T = column_eltypes[j]
+        colT = column_eltypes[j]
         if nominal[j]
-            columns[j] = T <: Nullable ? NullableCategoricalArray{T}(nrows) : CategoricalVector{T}(nrows)
+            columns[j] = colT <: Nullable ? NullableCategoricalArray{colT}(nrows) : CategoricalVector{colT}(nrows)
         else
-            columns[j] = T <: Nullable ? NullableArray{T}(nrows) : Vector{T}(nrows)
+            columns[j] = colT <: Nullable ? NullableArray{colT}(nrows) : Vector{colT}(nrows)
         end
     end
-    return DataTable(columns, Index(cnames))
+    return DataTable(columns, Index(convert(Vector{Symbol}, cnames)))
 end
 
 # Initialize an empty DataTable with specific eltypes
-function DataTable(column_eltypes::Vector{DataType}, nrows::Integer)
+function DataTable{T<:Type}(column_eltypes::AbstractVector{T}, nrows::Integer)
     p = length(column_eltypes)
     columns = Vector{Any}(p)
     cnames = gennames(p)
     for j in 1:p
-        T = column_eltypes[j]
-        columns[j] = T <: Nullable ? NullableArray{T}(nrows) : Vector{T}(nrows)
+        colT = column_eltypes[j]
+        columns[j] = colT <: Nullable ? NullableArray{colT}(nrows) : Vector{colT}(nrows)
     end
     return DataTable(columns, Index(cnames))
 end

From b643ff8f16795bda1982847f4bce5bd639876dc2 Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Wed, 15 Mar 2017 14:12:29 -0700
Subject: [PATCH 15/43] tests and no need for compat

---
 src/abstractdatatable/io.jl | 4 ++--
 test/cat.jl                 | 6 ++++--
 test/constructors.jl        | 2 +-
 test/join.jl                | 2 +-
 4 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/abstractdatatable/io.jl b/src/abstractdatatable/io.jl
index 6f3222b..6af518b 100644
--- a/src/abstractdatatable/io.jl
+++ b/src/abstractdatatable/io.jl
@@ -45,7 +45,7 @@ function printtable(io::IO,
             if !isnull(dt[j][i])
                 if ! (etypes[j] <: Real)
                     print(io, quotemark)
-                    x = isa(dt[i, j], Nullable) ? @compat(unsafe_get(dt[i, j])) : dt[i, j]
+                    x = isa(dt[i, j], Nullable) ? unsafe_get(dt[i, j]) : dt[i, j]
                     escapedprint(io, x, quotestr)
                     print(io, quotemark)
                 else
@@ -168,7 +168,7 @@ function Base.show(io::IO, ::MIME"text/latex", dt::AbstractDataTable)
             write(io, " & ")
             cell = dt[row,col]
             if !isnull(cell)
-                content = isa(cell, Nullable) ? @compat(unsafe_get(cell)) : cell
+                content = isa(cell, Nullable) ? unsafe_get(cell) : cell
                 if mimewritable(MIME("text/latex"), content)
                     show(io, MIME("text/latex"), content)
                 else
diff --git a/test/cat.jl b/test/cat.jl
index f26b8e7..a5b41b5 100644
--- a/test/cat.jl
+++ b/test/cat.jl
@@ -76,9 +76,9 @@ module TestCat
     @test vcat(null_dt, null_dt) == DataTable()
     @test vcat(null_dt, dt) == dt
     @test vcat(dt, null_dt) == dt
-    @test all(map((x,y) -> x <: y, eltypes(vcat(dt, dt)), (Float64, Float64, Int)))
+    @test eltypes(vcat(dt, dt)) == [Float64, Float64, Int]
     @test size(vcat(dt, dt)) == (size(dt,1)*2, size(dt,2))
-    @test all(map((x,y) -> x <: y, eltypes(vcat(dt, dt, dt)), (Float64, Float64, Int)))
+    @test eltypes(vcat(dt, dt, dt)) == [Float64, Float64, Int]
     @test size(vcat(dt, dt, dt)) == (size(dt,1)*3, size(dt,2))
 
     alt_dt = deepcopy(dt)
@@ -103,7 +103,9 @@ module TestCat
     dtc = DataTable(a = NullableArray([2, 3, 4]))
     dtd = DataTable(Any[2:4], [:a])
     dtab = vcat(dta, dtb)
+    @test isa(dtab[1], CategoricalArray)
     dtac = vcat(nullify(dta), dtc)
+    @test isa(dtac[1], NullableCategoricalArray)
     @test isequal(dtab[:a], [1, 2, 2, 2, 3, 4])
     @test isa(dtab[:a], CategoricalVector{Int})
     dc = vcat(dtd, dtc)
diff --git a/test/constructors.jl b/test/constructors.jl
index c1520d4..cd3589c 100644
--- a/test/constructors.jl
+++ b/test/constructors.jl
@@ -39,8 +39,8 @@ module TestConstructors
     dt = DataTable([Nullable{Int}, Nullable{Float64}], [:x1, :x2], 2)
     @test size(dt) == (2, 2)
     @test eltypes(dt) == [Nullable{Int}, Nullable{Float64}]
-
     @test isequal(dt, DataTable([Nullable{Int}, Nullable{Float64}], 2))
+    @test all(isnull, (dt[:x1], dt[:x2]))
 
     @test_throws BoundsError SubDataTable(DataTable(A=1), 0)
     @test_throws BoundsError SubDataTable(DataTable(A=1), 0)
diff --git a/test/join.jl b/test/join.jl
index 3838cd8..809161f 100644
--- a/test/join.jl
+++ b/test/join.jl
@@ -108,7 +108,7 @@ module TestJoin
                     Quantity = [3, 3, 2, 4])
     @test join(dt2, dt, on=:Name, kind=:left) == DataTable(Name = Nullable{String}["A", "B", "C", "A"],
                                                            Quantity = [3, 3, 2, 4],
-                                                           Mass = [1.5, 2.2, 1.1, 1.5])
+                                                           Mass = Nullable{Float64}[1.5, 2.2, 1.1, 1.5])
 
     # Test that join works when mixing Array and NullableArray (#1151)
     dt = DataTable([collect(1:10), collect(2:11)], [:x, :y])

From 4c6845225e044c9cfec4d5ae6177c20bf8a8f19a Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Wed, 15 Mar 2017 14:20:02 -0700
Subject: [PATCH 16/43] spacing mistakes

---
 src/datatable/datatable.jl | 2 +-
 test/iteration.jl          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/datatable/datatable.jl b/src/datatable/datatable.jl
index 28f01b2..6c53a9a 100644
--- a/src/datatable/datatable.jl
+++ b/src/datatable/datatable.jl
@@ -148,7 +148,7 @@ end
 # Initialize an empty DataTable with specific eltypes and names
 # and whether a nominal array should be created
 function DataTable{T<:Type}(column_eltypes::AbstractVector{T}, cnames::AbstractVector{Symbol},
-                              nominal::Vector{Bool}, nrows::Integer)
+                            nominal::Vector{Bool}, nrows::Integer)
     p = length(column_eltypes)
     columns = Vector{Any}(p)
     for j in 1:p
diff --git a/test/iteration.jl b/test/iteration.jl
index 7686428..afa93b2 100644
--- a/test/iteration.jl
+++ b/test/iteration.jl
@@ -16,7 +16,7 @@ module TestIteration
     end
 
     for col in eachcol(dt)
-        @test isa(col, Tuple{Symbol,Vector{Int}})
+        @test isa(col, Tuple{Symbol, Vector{Int}})
     end
 
     @test isequal(map(x -> minimum(convert(Array, x)), eachrow(dt)), [1,2])

From 731068103755f77360f99444cb1ee3e6d0d550b9 Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Wed, 15 Mar 2017 14:35:17 -0700
Subject: [PATCH 17/43] throw errors on 1-d matrices and change confusing
 variable name

---
 src/datatable/datatable.jl | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/datatable/datatable.jl b/src/datatable/datatable.jl
index 6c53a9a..edf8144 100644
--- a/src/datatable/datatable.jl
+++ b/src/datatable/datatable.jl
@@ -92,7 +92,6 @@ type DataTable <: AbstractDataTable
             end
             uls = unique(lengths)
             if length(uls) != 1
-                # estring = Vector{String}(length(uniques))
                 strnames = string.(names(colindex))
                 estring = ["column length ($(uls[i])) for column(s) ($(join(strnames[find(uls .== u)], ", ")))"
                            for (i,u) in enumerate(uls)]
@@ -103,7 +102,9 @@ type DataTable <: AbstractDataTable
             if isa(c, Range)
                 columns[i] = collect(c)
             elseif !isa(c, AbstractVector)
-                columns[i] = size(c, 2) > 1 ? throw(DimensionMismatch("columns must be 1-dimensional")) : [c]
+                throw(DimensionMismatch("columns must be 1-dimensional"))
+            else
+                columns[i] = c
             end
         end
         return new(columns, colindex)
@@ -139,8 +140,8 @@ function DataTable{T<:Type}(column_eltypes::AbstractVector{T}, cnames::AbstractV
     p = length(column_eltypes)
     columns = Vector{Any}(p)
     for j in 1:p
-        colT = column_eltypes[j]
-        columns[j] = colT <: Nullable ? NullableArray{eltype(colT)}(nrows) : Vector{colT}(nrows)
+        elty = column_eltypes[j]
+        columns[j] = elty <: Nullable ? NullableArray{eltype(elty)}(nrows) : Vector{elty}(nrows)
     end
     return DataTable(columns, Index(convert(Vector{Symbol}, cnames)))
 end
@@ -152,11 +153,11 @@ function DataTable{T<:Type}(column_eltypes::AbstractVector{T}, cnames::AbstractV
     p = length(column_eltypes)
     columns = Vector{Any}(p)
     for j in 1:p
-        colT = column_eltypes[j]
+        elty = column_eltypes[j]
         if nominal[j]
-            columns[j] = colT <: Nullable ? NullableCategoricalArray{colT}(nrows) : CategoricalVector{colT}(nrows)
+            columns[j] = elty <: Nullable ? NullableCategoricalArray{elty}(nrows) : CategoricalVector{elty}(nrows)
         else
-            columns[j] = colT <: Nullable ? NullableArray{colT}(nrows) : Vector{colT}(nrows)
+            columns[j] = elty <: Nullable ? NullableArray{elty}(nrows) : Vector{elty}(nrows)
         end
     end
     return DataTable(columns, Index(convert(Vector{Symbol}, cnames)))
@@ -168,8 +169,8 @@ function DataTable{T<:Type}(column_eltypes::AbstractVector{T}, nrows::Integer)
     columns = Vector{Any}(p)
     cnames = gennames(p)
     for j in 1:p
-        colT = column_eltypes[j]
-        columns[j] = colT <: Nullable ? NullableArray{colT}(nrows) : Vector{colT}(nrows)
+        elty = column_eltypes[j]
+        columns[j] = elty <: Nullable ? NullableArray{elty}(nrows) : Vector{elty}(nrows)
     end
     return DataTable(columns, Index(cnames))
 end

From de280ba118ce1b177fb12bb82a2e7ce9ae961cdf Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Wed, 15 Mar 2017 15:21:04 -0700
Subject: [PATCH 18/43] add back check to differentiate scalars from
 AbstractArrays

---
 src/datatable/datatable.jl | 6 +++++-
 test/constructors.jl       | 1 +
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/datatable/datatable.jl b/src/datatable/datatable.jl
index edf8144..96cf59a 100644
--- a/src/datatable/datatable.jl
+++ b/src/datatable/datatable.jl
@@ -102,7 +102,11 @@ type DataTable <: AbstractDataTable
             if isa(c, Range)
                 columns[i] = collect(c)
             elseif !isa(c, AbstractVector)
-                throw(DimensionMismatch("columns must be 1-dimensional"))
+                if isa(c, AbstractArray)
+                    throw(DimensionMismatch("columns must be 1-dimensional"))
+                else
+                    columns[i] = [c]
+                end
             else
                 columns[i] = c
             end
diff --git a/test/constructors.jl b/test/constructors.jl
index cd3589c..4053903 100644
--- a/test/constructors.jl
+++ b/test/constructors.jl
@@ -65,6 +65,7 @@ module TestConstructors
         @test_throws DimensionMismatch DataTable(a=1, b=[])
         @test_throws DimensionMismatch DataTable(Any[collect(1:10)], DataTables.Index([:A, :B]))
         @test_throws DimensionMismatch DataTable(A = rand(2,2))
+        @test_throws DimensionMismatch DataTable(A = rand(2,1))
     end
 
     @testset "column types" begin

From 88b20cace0423bbfd18396e6a6c6008be75a99c3 Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Wed, 15 Mar 2017 18:16:32 -0700
Subject: [PATCH 19/43] save work

---
 src/abstractdatatable/join.jl | 78 +++++++++++++++++++++--------------
 test/join.jl                  | 22 ++++++++++
 2 files changed, 68 insertions(+), 32 deletions(-)

diff --git a/src/abstractdatatable/join.jl b/src/abstractdatatable/join.jl
index c8292cc..aa9bd6e 100644
--- a/src/abstractdatatable/join.jl
+++ b/src/abstractdatatable/join.jl
@@ -41,31 +41,29 @@ Base.length(x::RowIndexMap) = length(x.orig)
 
 # composes the joined data table using the maps between the left and right
 # table rows and the indices of rows in the result
-function compose_joined_table(joiner::DataTableJoiner, kind::Symbol,
+function compose_joined_table(joiner::DataTableJoiner,
                               left_ixs::RowIndexMap, leftonly_ixs::RowIndexMap,
                               right_ixs::RowIndexMap, rightonly_ixs::RowIndexMap)
     @assert length(left_ixs) == length(right_ixs)
     # compose left half of the result taking all left columns
     all_orig_left_ixs = vcat(left_ixs.orig, leftonly_ixs.orig)
-    if length(leftonly_ixs) > 0
+
+    lil = length(left_ixs)
+    loil = length(leftonly_ixs)
+    ril = length(right_ixs)
+    roil = length(rightonly_ixs)
+
+    if loil > 0
         # combine the matched (left_ixs.orig) and non-matched (leftonly_ixs.orig) indices of the left table rows
         # preserving the original rows order
-        all_orig_left_ixs = similar(left_ixs.orig, length(left_ixs)+length(leftonly_ixs))
+        all_orig_left_ixs = similar(left_ixs.orig, lil + loil)
         @inbounds all_orig_left_ixs[left_ixs.join] = left_ixs.orig
         @inbounds all_orig_left_ixs[leftonly_ixs.join] = leftonly_ixs.orig
     else
         # the result contains only the left rows that are matched to right rows (left_ixs)
         all_orig_left_ixs = left_ixs.orig # no need to copy left_ixs.orig as it's not used elsewhere
     end
-    ril = length(right_ixs)
-    loil = length(leftonly_ixs)
-    roil = length(rightonly_ixs)
-    left_dt = DataTable(Any[resize!(col[all_orig_left_ixs], length(all_orig_left_ixs)+roil)
-                            for col in columns(joiner.dtl)],
-                        names(joiner.dtl))
 
-    # compose right half of the result taking all right columns excluding on
-    dtr_noon = without(joiner.dtr, joiner.on_cols)
     # permutation to swap rightonly and leftonly rows
     right_perm = vcat(1:ril, ril+roil+1:ril+roil+loil, ril+1:ril+roil)
     if length(leftonly_ixs) > 0
@@ -73,15 +71,30 @@ function compose_joined_table(joiner::DataTableJoiner, kind::Symbol,
         right_perm[vcat(right_ixs.join, leftonly_ixs.join)] = right_perm[1:ril+loil]
     end
     all_orig_right_ixs = vcat(right_ixs.orig, rightonly_ixs.orig)
-    resizelen = length(all_orig_right_ixs)+length(leftonly_ixs)
-    rightcols = Any[kind == :inner ?
-                        col[all_orig_right_ixs][right_perm] :
-                        copy!(similar_nullable(col, resizelen), col[all_orig_right_ixs])[right_perm]
-                    for col in columns(dtr_noon)]
-    right_dt = DataTable(rightcols, names(dtr_noon))
-    # merge left and right parts of the joined table
-    res = hcat!(left_dt, right_dt)
 
+    # compose right half of the result taking all right columns excluding on
+    dtr_noon = without(joiner.dtr, joiner.on_cols)
+
+    laoli = length(all_orig_left_ixs)
+    laori = length(all_orig_right_ixs)
+    @assert laoli + roil == laori + loil
+    numrows = length(all_orig_left_ixs) + roil
+    numcols = ncol(joiner.dtl) + ncol(dtr_noon)
+
+    # if either size is smaller, then it's null
+    leftnull = laoli < laoli + roil
+    rightnull = laori < laori + loil
+    dtcols = Vector{Any}(numcols)
+    for (i,col) in enumerate(columns(joiner.dtl))
+        dtcols[i] = leftnull ? copy!(similar_nullable(col, numrows), col[all_orig_left_ixs]) :
+                               col[all_orig_left_ixs]
+    end
+    for (i,col) in enumerate(columns(dtr_noon))
+        dtcols[i+ncol(joiner.dtl)] = rightnull ? copy!(similar_nullable(col, numrows), col[all_orig_right_ixs])[right_perm] :
+                                                 col[all_orig_right_ixs][right_perm]
+    end
+    colnames = vcat(names(joiner.dtl), names(dtr_noon))
+    res = DataTable(dtcols, Index(colnames))
     if length(rightonly_ixs.join) > 0
         # some left rows are nulls, so the values of the "on" columns
         # need to be taken from the right
@@ -207,6 +220,8 @@ join(dt1::AbstractDataTable,
   - `:cross` : a full Cartesian product of the key combinations; every
     row of `dt1` is matched with every row of `dt2`
 
+For the three join operations that may introduce missing values, `:outer`, `:left`,
+and `:right`,
 Null values are filled in where needed to complete joins.
 
 ### Result
@@ -243,22 +258,21 @@ function Base.join(dt1::AbstractDataTable,
     joiner = DataTableJoiner(dt1, dt2, on)
 
     if kind == :inner
-        compose_joined_table(joiner, kind, update_row_maps!(joiner.dtl_on, joiner.dtr_on,
-                                                            group_rows(joiner.dtr_on),
-                                                            true, false, true, false)...)
+        compose_joined_table(joiner, update_row_maps!(joiner.dtl_on, joiner.dtr_on,
+                                                      group_rows(joiner.dtr_on),
+                                                      true, false, true, false)...)
     elseif kind == :left
-        compose_joined_table(joiner, kind, update_row_maps!(joiner.dtl_on, joiner.dtr_on,
-                                                            group_rows(joiner.dtr_on),
-                                                            true, true, true, false)...)
+        compose_joined_table(joiner, update_row_maps!(joiner.dtl_on, joiner.dtr_on,
+                                                      group_rows(joiner.dtr_on),
+                                                      true, true, true, false)...)
     elseif kind == :right
-        right_ixs, rightonly_ixs, left_ixs, leftonly_ixs = update_row_maps!(joiner.dtr_on, joiner.dtl_on,
-                                                                            group_rows(joiner.dtl_on),
-                                                                            true, true, true, false)
-        compose_joined_table(joiner, kind, left_ixs, leftonly_ixs, right_ixs, rightonly_ixs)
+        compose_joined_table(joiner, update_row_maps!(joiner.dtr_on, joiner.dtl_on,
+                                                      group_rows(joiner.dtl_on),
+                                                      true, true, true, false)[[3, 4, 1, 2]]...)
     elseif kind == :outer
-        compose_joined_table(joiner, kind, update_row_maps!(joiner.dtl_on, joiner.dtr_on,
-                                                            group_rows(joiner.dtr_on),
-                                                            true, true, true, true)...)
+        compose_joined_table(joiner, update_row_maps!(joiner.dtl_on, joiner.dtr_on,
+                                                      group_rows(joiner.dtr_on),
+                                                      true, true, true, true)...)
     elseif kind == :semi
         # hash the right rows
         dtr_on_grp = group_rows(joiner.dtr_on)
diff --git a/test/join.jl b/test/join.jl
index 809161f..5a09561 100644
--- a/test/join.jl
+++ b/test/join.jl
@@ -117,4 +117,26 @@ module TestJoin
         DataTable([collect(1:10), collect(2:11), collect(3:12)], [:x, :y, :z])
     @test join(dtnull, dt, on = :x) ==
         DataTable([collect(1:10), collect(3:12), collect(2:11)], [:x, :z, :y])
+
+    @testset "missingness" begin
+        small = DataTable(fruit = [:banana, :plantain, :melon],
+                          vegetable = [:artichoke, :leek, :pepper])
+        large = DataTable(fruit = [:banana, :plantain, :melon, :raspberry],
+                          vegetable = [:artichoke, :collards, :leek, :pepper])
+
+        @test join(small, large, on=:fruit, kind=:left) == DataTable(fruit = [:banana, :plantain, :melon],
+                                                                     vegetable = [:artichoke, :leek, :pepper],
+                                                                     vegetable_1 = [:artichoke, :collards, :leek])
+        @test join(small, large, on=:fruit, kind=:right) == DataTable(fruit = [:banana, :plantain, :melon],
+                                                                     vegetable = [:artichoke, :leek, :pepper],
+                                                                     vegetable_1 = [:artichoke, :collards, :leek])
+        @test join(small, large, on=:fruit, kind=:outer)
+
+        @test join(small, large, on=:vegetable, kind=:left)
+        @test join(small, large, on=:vegetable, kind=:right)
+        @test join(small, large, on=:vegetable, kind=:outer)
+
+        @test join(small, large, on=[:fruit, :vegetable], kind=:outer)
+        @test join(small, large, on=[:fruit, :vegetable], kind=:left)
+        @test join(small, large, on=[:fruit, :vegetable], kind=:right)
 end

From be1cacdd30dee452de5714821918e00a47058e66 Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Thu, 16 Mar 2017 14:45:31 -0700
Subject: [PATCH 20/43] save progress, switch to test master

---
 src/abstractdatatable/join.jl | 59 +++++++++++++++--------------------
 1 file changed, 26 insertions(+), 33 deletions(-)

diff --git a/src/abstractdatatable/join.jl b/src/abstractdatatable/join.jl
index aa9bd6e..6f2c292 100644
--- a/src/abstractdatatable/join.jl
+++ b/src/abstractdatatable/join.jl
@@ -41,16 +41,16 @@ Base.length(x::RowIndexMap) = length(x.orig)
 
 # composes the joined data table using the maps between the left and right
 # table rows and the indices of rows in the result
-function compose_joined_table(joiner::DataTableJoiner,
+function compose_joined_table(joiner::DataTableJoiner, kind::Symbol,
                               left_ixs::RowIndexMap, leftonly_ixs::RowIndexMap,
                               right_ixs::RowIndexMap, rightonly_ixs::RowIndexMap)
     @assert length(left_ixs) == length(right_ixs)
     # compose left half of the result taking all left columns
     all_orig_left_ixs = vcat(left_ixs.orig, leftonly_ixs.orig)
 
+    ril = length(right_ixs)
     lil = length(left_ixs)
     loil = length(leftonly_ixs)
-    ril = length(right_ixs)
     roil = length(rightonly_ixs)
 
     if loil > 0
@@ -63,7 +63,6 @@ function compose_joined_table(joiner::DataTableJoiner,
         # the result contains only the left rows that are matched to right rows (left_ixs)
         all_orig_left_ixs = left_ixs.orig # no need to copy left_ixs.orig as it's not used elsewhere
     end
-
     # permutation to swap rightonly and leftonly rows
     right_perm = vcat(1:ril, ril+roil+1:ril+roil+loil, ril+1:ril+roil)
     if length(leftonly_ixs) > 0
@@ -75,26 +74,20 @@ function compose_joined_table(joiner::DataTableJoiner,
     # compose right half of the result taking all right columns excluding on
     dtr_noon = without(joiner.dtr, joiner.on_cols)
 
-    laoli = length(all_orig_left_ixs)
-    laori = length(all_orig_right_ixs)
-    @assert laoli + roil == laori + loil
-    numrows = length(all_orig_left_ixs) + roil
-    numcols = ncol(joiner.dtl) + ncol(dtr_noon)
-
-    # if either size is smaller, then it's null
-    leftnull = laoli < laoli + roil
-    rightnull = laori < laori + loil
-    dtcols = Vector{Any}(numcols)
-    for (i,col) in enumerate(columns(joiner.dtl))
-        dtcols[i] = leftnull ? copy!(similar_nullable(col, numrows), col[all_orig_left_ixs]) :
-                               col[all_orig_left_ixs]
+    nrow = length(all_orig_left_ixs) + roil
+    @assert nrow == length(all_orig_right_ixs) + loil
+    ncl = ncol(joiner.dtl)
+    cols = Vector{Any}(ncl + ncol(dtr_noon))
+    for (i, col) in enumerate(columns(joiner.dtl))
+        cols[i] = kind == :inner ? col[all_orig_left_ixs] :
+                                   copy!(similar_nullable(col, nrow), col[all_orig_left_ixs])
     end
-    for (i,col) in enumerate(columns(dtr_noon))
-        dtcols[i+ncol(joiner.dtl)] = rightnull ? copy!(similar_nullable(col, numrows), col[all_orig_right_ixs])[right_perm] :
-                                                 col[all_orig_right_ixs][right_perm]
+    for (i, col) in enumerate(columns(dtr_noon))
+        cols[i+ncl] = kind == :inner ? col[all_orig_right_ixs] :
+                                       copy!(similar_nullable(col, nrow), col[all_orig_right_ixs])[right_perm]
     end
-    colnames = vcat(names(joiner.dtl), names(dtr_noon))
-    res = DataTable(dtcols, Index(colnames))
+    res = DataTable(cols, vcat(names(joiner.dtl), names(dtr_noon)))
+
     if length(rightonly_ixs.join) > 0
         # some left rows are nulls, so the values of the "on" columns
         # need to be taken from the right
@@ -258,21 +251,21 @@ function Base.join(dt1::AbstractDataTable,
     joiner = DataTableJoiner(dt1, dt2, on)
 
     if kind == :inner
-        compose_joined_table(joiner, update_row_maps!(joiner.dtl_on, joiner.dtr_on,
-                                                      group_rows(joiner.dtr_on),
-                                                      true, false, true, false)...)
+        compose_joined_table(joiner, kind, update_row_maps!(joiner.dtl_on, joiner.dtr_on,
+                                                            group_rows(joiner.dtr_on),
+                                                            true, false, true, false)...)
     elseif kind == :left
-        compose_joined_table(joiner, update_row_maps!(joiner.dtl_on, joiner.dtr_on,
-                                                      group_rows(joiner.dtr_on),
-                                                      true, true, true, false)...)
+        compose_joined_table(joiner, kind, update_row_maps!(joiner.dtl_on, joiner.dtr_on,
+                                                            group_rows(joiner.dtr_on),
+                                                            true, true, true, false)...)
     elseif kind == :right
-        compose_joined_table(joiner, update_row_maps!(joiner.dtr_on, joiner.dtl_on,
-                                                      group_rows(joiner.dtl_on),
-                                                      true, true, true, false)[[3, 4, 1, 2]]...)
+        compose_joined_table(joiner, kind, update_row_maps!(joiner.dtr_on, joiner.dtl_on,
+                                                            group_rows(joiner.dtl_on),
+                                                            true, true, true, false)[[3, 4, 1, 2]]...)
     elseif kind == :outer
-        compose_joined_table(joiner, update_row_maps!(joiner.dtl_on, joiner.dtr_on,
-                                                      group_rows(joiner.dtr_on),
-                                                      true, true, true, true)...)
+        compose_joined_table(joiner, kind, update_row_maps!(joiner.dtl_on, joiner.dtr_on,
+                                                            group_rows(joiner.dtr_on),
+                                                            true, true, true, true)...)
     elseif kind == :semi
         # hash the right rows
         dtr_on_grp = group_rows(joiner.dtr_on)

From 19ffb58d8745b8045be7e4e8be0e2374ac6b290c Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Thu, 16 Mar 2017 15:50:38 -0700
Subject: [PATCH 21/43] join is ready and tests in place. right join still
 broken

---
 test/data.jl | 10 +++----
 test/join.jl | 76 ++++++++++++++++++++++++++++++++++------------------
 2 files changed, 55 insertions(+), 31 deletions(-)

diff --git a/test/data.jl b/test/data.jl
index 5f57b8a..ca54f26 100644
--- a/test/data.jl
+++ b/test/data.jl
@@ -218,8 +218,8 @@ module TestData
     m1 = join(dt1, dt2, on = :a, kind=:inner)
     @test isequal(m1[:a], dt1[:a][dt1[:a] .<= 5]) # preserves dt1 order
     m2 = join(dt1, dt2, on = :a, kind = :outer)
-    @test isequal(m2[:a], dt1[:a]) # preserves dt1 order
-    @test isequal(m2[:b], dt1[:b]) # preserves dt1 order
+    @test isequal(m2[:a], NullableArray(dt1[:a])) # preserves dt1 order
+    @test isequal(m2[:b], NullableArray(dt1[:b])) # preserves dt1 order
     # TODO: Re-enable
     m2 = join(dt1, dt2, on = :a, kind = :outer)
     # @test isequal(m2[:b2],
@@ -240,13 +240,13 @@ module TestData
     @test m1[:a] == [1, 2]
 
     m2 = join(dt1, dt2, on = :a, kind = :left)
-    @test m2[:a] == [1, 2, 3]
+    @test isequal(m2[:a], NullableArray([1, 2, 3]))
 
     m3 = join(dt1, dt2, on = :a, kind = :right)
-    @test m3[:a] == [1, 2, 4]
+    @test isequal(m3[:a], NullableArray([1, 2, 4]))
 
     m4 = join(dt1, dt2, on = :a, kind = :outer)
-    @test m4[:a] == [1, 2, 3, 4]
+    @test isequal(m4[:a], NullableArray([1, 2, 3, 4]))
 
     # test with nulls (issue #185)
     dt1 = DataTable()
diff --git a/test/join.jl b/test/join.jl
index 5a09561..23fc836 100644
--- a/test/join.jl
+++ b/test/join.jl
@@ -2,8 +2,8 @@ module TestJoin
     using Base.Test
     using DataTables
 
-    name = DataTable(ID = [1, 2, 3], Name = NullableArray(["John Doe", "Jane Doe", "Joe Blogs"]))
-    job = DataTable(ID = [1, 2, 2, 4], Job = NullableArray(["Lawyer", "Doctor", "Florist", "Farmer"]))
+    name = DataTable(ID = NullableArray([1, 2, 3]), Name = NullableArray(["John Doe", "Jane Doe", "Joe Blogs"]))
+    job = DataTable(ID = NullableArray([1, 2, 2, 4]), Job = NullableArray(["Lawyer", "Doctor", "Florist", "Farmer"]))
 
     # Join on symbols or vectors of symbols
     join(name, job, on = :ID)
@@ -13,7 +13,7 @@ module TestJoin
     #@test_throws join(name, job)
 
     # Test output of various join types
-    outer = DataTable(ID = [1, 2, 2, 3, 4],
+    outer = DataTable(ID = NullableArray([1, 2, 2, 3, 4]),
                       Name = NullableArray(["John Doe", "Jane Doe", "Jane Doe", "Joe Blogs", Nullable()]),
                       Job = NullableArray(["Lawyer", "Doctor", "Florist", Nullable(), "Farmer"]))
 
@@ -70,7 +70,7 @@ module TestJoin
     @test_throws ArgumentError join(dt1, dt2, on = :A, kind = :cross)
 
     # test empty inputs
-    simple_dt(len::Int, col=:A) = (dt = DataTable(); dt[col]=collect(1:len); dt)
+    simple_dt(len::Int, col=:A) = (dt = DataTable(); dt[col]=NullableArray(collect(1:len)); dt)
     @test isequal(join(simple_dt(0), simple_dt(0), on = :A, kind = :left),  simple_dt(0))
     @test isequal(join(simple_dt(2), simple_dt(0), on = :A, kind = :left),  simple_dt(2))
     @test isequal(join(simple_dt(0), simple_dt(2), on = :A, kind = :left),  simple_dt(0))
@@ -107,7 +107,7 @@ module TestJoin
     dt2 = DataTable(Name = Nullable{String}["A", "B", "C", "A"],
                     Quantity = [3, 3, 2, 4])
     @test join(dt2, dt, on=:Name, kind=:left) == DataTable(Name = Nullable{String}["A", "B", "C", "A"],
-                                                           Quantity = [3, 3, 2, 4],
+                                                           Quantity = Nullable{Int}[3, 3, 2, 4],
                                                            Mass = Nullable{Float64}[1.5, 2.2, 1.1, 1.5])
 
     # Test that join works when mixing Array and NullableArray (#1151)
@@ -118,25 +118,49 @@ module TestJoin
     @test join(dtnull, dt, on = :x) ==
         DataTable([collect(1:10), collect(3:12), collect(2:11)], [:x, :z, :y])
 
-    @testset "missingness" begin
-        small = DataTable(fruit = [:banana, :plantain, :melon],
-                          vegetable = [:artichoke, :leek, :pepper])
-        large = DataTable(fruit = [:banana, :plantain, :melon, :raspberry],
-                          vegetable = [:artichoke, :collards, :leek, :pepper])
-
-        @test join(small, large, on=:fruit, kind=:left) == DataTable(fruit = [:banana, :plantain, :melon],
-                                                                     vegetable = [:artichoke, :leek, :pepper],
-                                                                     vegetable_1 = [:artichoke, :collards, :leek])
-        @test join(small, large, on=:fruit, kind=:right) == DataTable(fruit = [:banana, :plantain, :melon],
-                                                                     vegetable = [:artichoke, :leek, :pepper],
-                                                                     vegetable_1 = [:artichoke, :collards, :leek])
-        @test join(small, large, on=:fruit, kind=:outer)
-
-        @test join(small, large, on=:vegetable, kind=:left)
-        @test join(small, large, on=:vegetable, kind=:right)
-        @test join(small, large, on=:vegetable, kind=:outer)
-
-        @test join(small, large, on=[:fruit, :vegetable], kind=:outer)
-        @test join(small, large, on=[:fruit, :vegetable], kind=:left)
-        @test join(small, large, on=[:fruit, :vegetable], kind=:right)
+    @testset "complete set of joins" begin
+        small = DataTable(id = [1, 3, 5], fid = [1.0, 3.0, 5.0])
+        large = DataTable(id = [0, 1, 2, 3, 4], fid = [0.0, 1.0, 2.0, 3.0, 4.0])
+        N = Nullable()
+
+        @test join(small, large, kind=:cross) == DataTable(id = repeat([1, 3, 5], inner=5),
+                                                           fid = repeat([1.0, 3.0, 5.0], inner=5),
+                                                           id_1 = repeat([0, 1, 2, 3, 4], outer=3),
+                                                           fid_1 = repeat([0.0, 1.0, 2.0, 3.0, 4.0], outer=3))
+        # id
+        @test join(small, large, on=:id, kind=:inner) == DataTable(id = [1, 3], fid = [1.0, 3.0], fid_1 = [1.0, 3.0])
+        @test join(small, large, on=:id, kind=:left) == nullify!(DataTable(id = [1, 3, 5], fid = [1.0, 3.0, 5.0], fid_1 = [1.0, 3.0, N]))
+        # FIXME
+        # @test join(small, large, on=:id, kind=:right) == nullify!(DataTable(id = [1, 3, 0, 2, 4],
+        #                                                                     fid = [1.0, 3.0, N, N, N],
+        #                                                                     fid_1 = [1.0, 3.0, 0.0, 2.0, 4.0])
+        @test join(small, large, on=:id, kind=:outer) == nullify!(DataTable(id = [1, 3, 5, 0, 2, 4],
+                                                                            fid = [1.0, 3.0, 5.0, N, N, N],
+                                                                            fid_1 = [1.0, 3.0, N, 0.0, 2.0, 4.0]))
+        @test join(small, large, on=:id, kind=:semi) == DataTable(id = [1, 3], fid = [1.0, 3.0])
+        @test join(small, large, on=:id, kind=:anti) == DataTable(id = 5, fid = 5.0)
+
+        # fid
+        @test join(small, large, on=:fid, kind=:inner) == DataTable(id = [1, 3], fid = [1.0, 3.0], id_1 = [1, 3])
+        @test join(small, large, on=:fid, kind=:left) == nullify!(DataTable(id = [1, 3, 5], fid = [1.0, 3.0, 5.0], id_1 = [1, 3, N]))
+        # FIXME
+        # @test join(small, large, on=:fid, kind=:right) == nullify!(DataTable(id = [1, 3, N, N, N],
+        #                                                                      fid = [1.0, 3.0, 0.0, 2.0, 4.0],
+        #                                                                      id_1 = [1, 3, 0, 2, 4]))
+        @test join(small, large, on=:fid, kind=:outer) == nullify!(DataTable(id = [1, 3, 5, N, N, N],
+                                                                             fid = [1.0, 3.0, 5.0, 0.0, 2.0, 4.0],
+                                                                             id_1 = [1, 3, N, 0, 2, 4]))
+        @test join(small, large, on=:fid, kind=:semi) == DataTable(id = [1, 3], fid = [1.0, 3.0])
+        @test join(small, large, on=:fid, kind=:anti) == DataTable(id = 5, fid = 5.0)
+
+        # both
+        @test join(small, large, on=[:id, :fid], kind=:inner) == DataTable(id = [1, 3], fid = [1.0, 3.0])
+        @test join(small, large, on=[:id, :fid], kind=:left) == nullify!(DataTable(id = [1, 3, 5], fid = [1.0, 3.0, 5.0]))
+        # FIXME
+        # @test join(small, large, on=[:id, :fid], kind=:right) == nullify!(DataTable(id = [1, 3, 0, 2, 4], fid = [1.0, 3.0, 0.0, 2.0, 4.0]))
+        @test join(small, large, on=[:id, :fid], kind=:outer) == nullify!(DataTable(id = [1, 3, 5, 0, 2, 4],
+                                                                                    fid = [1.0, 3.0, 5.0, 0.0, 2.0, 4.0]))
+        @test join(small, large, on=[:id, :fid], kind=:semi) == DataTable(id = [1, 3], fid = [1.0, 3.0])
+        @test join(small, large, on=[:id, :fid], kind=:anti) == DataTable(id = 5, fid = 5.0)
+    end
 end

From 3f2cd63468482f564d86186e6233dd50a20e3139 Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Thu, 16 Mar 2017 17:11:50 -0700
Subject: [PATCH 22/43] fix right join

---
 src/abstractdatatable/join.jl |  8 +++---
 test/join.jl                  | 46 +++++++++++++++++++++--------------
 2 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/src/abstractdatatable/join.jl b/src/abstractdatatable/join.jl
index 6f2c292..68e4cfe 100644
--- a/src/abstractdatatable/join.jl
+++ b/src/abstractdatatable/join.jl
@@ -76,14 +76,14 @@ function compose_joined_table(joiner::DataTableJoiner, kind::Symbol,
 
     nrow = length(all_orig_left_ixs) + roil
     @assert nrow == length(all_orig_right_ixs) + loil
-    ncl = ncol(joiner.dtl)
-    cols = Vector{Any}(ncl + ncol(dtr_noon))
+    ncleft = ncol(joiner.dtl)
+    cols = Vector{Any}(ncleft + ncol(dtr_noon))
     for (i, col) in enumerate(columns(joiner.dtl))
         cols[i] = kind == :inner ? col[all_orig_left_ixs] :
                                    copy!(similar_nullable(col, nrow), col[all_orig_left_ixs])
     end
     for (i, col) in enumerate(columns(dtr_noon))
-        cols[i+ncl] = kind == :inner ? col[all_orig_right_ixs] :
+        cols[i+ncleft] = kind == :inner ? col[all_orig_right_ixs] :
                                        copy!(similar_nullable(col, nrow), col[all_orig_right_ixs])[right_perm]
     end
     res = DataTable(cols, vcat(names(joiner.dtl), names(dtr_noon)))
@@ -93,7 +93,7 @@ function compose_joined_table(joiner::DataTableJoiner, kind::Symbol,
         # need to be taken from the right
         for (on_col_ix, on_col) in enumerate(joiner.on_cols)
             # fix the result of the rightjoin by taking the nonnull values from the right table
-            res[on_col][rightonly_ixs.join] = joiner.dtr_on[rightonly_ixs.orig, on_col_ix]
+            res[on_col][end-length(rightonly_ixs.orig)+1:end] = joiner.dtr_on[rightonly_ixs.orig, on_col_ix]
         end
     end
     return res
diff --git a/test/join.jl b/test/join.jl
index 23fc836..63fc38a 100644
--- a/test/join.jl
+++ b/test/join.jl
@@ -128,12 +128,15 @@ module TestJoin
                                                            id_1 = repeat([0, 1, 2, 3, 4], outer=3),
                                                            fid_1 = repeat([0.0, 1.0, 2.0, 3.0, 4.0], outer=3))
         # id
-        @test join(small, large, on=:id, kind=:inner) == DataTable(id = [1, 3], fid = [1.0, 3.0], fid_1 = [1.0, 3.0])
-        @test join(small, large, on=:id, kind=:left) == nullify!(DataTable(id = [1, 3, 5], fid = [1.0, 3.0, 5.0], fid_1 = [1.0, 3.0, N]))
-        # FIXME
-        # @test join(small, large, on=:id, kind=:right) == nullify!(DataTable(id = [1, 3, 0, 2, 4],
-        #                                                                     fid = [1.0, 3.0, N, N, N],
-        #                                                                     fid_1 = [1.0, 3.0, 0.0, 2.0, 4.0])
+        @test join(small, large, on=:id, kind=:inner) == DataTable(id = [1, 3],
+                                                                   fid = [1.0, 3.0],
+                                                                   fid_1 = [1.0, 3.0])
+        @test join(small, large, on=:id, kind=:left) == nullify!(DataTable(id = [1, 3, 5],
+                                                                           fid = [1.0, 3.0, 5.0],
+                                                                           fid_1 = [1.0, 3.0, N]))
+        @test join(small, large, on=:id, kind=:right) == nullify!(DataTable(id = [1, 3, 0, 2, 4],
+                                                                            fid = [1.0, 3.0, N, N, N],
+                                                                            fid_1 = [1.0, 3.0, 0.0, 2.0, 4.0]))
         @test join(small, large, on=:id, kind=:outer) == nullify!(DataTable(id = [1, 3, 5, 0, 2, 4],
                                                                             fid = [1.0, 3.0, 5.0, N, N, N],
                                                                             fid_1 = [1.0, 3.0, N, 0.0, 2.0, 4.0]))
@@ -141,12 +144,15 @@ module TestJoin
         @test join(small, large, on=:id, kind=:anti) == DataTable(id = 5, fid = 5.0)
 
         # fid
-        @test join(small, large, on=:fid, kind=:inner) == DataTable(id = [1, 3], fid = [1.0, 3.0], id_1 = [1, 3])
-        @test join(small, large, on=:fid, kind=:left) == nullify!(DataTable(id = [1, 3, 5], fid = [1.0, 3.0, 5.0], id_1 = [1, 3, N]))
-        # FIXME
-        # @test join(small, large, on=:fid, kind=:right) == nullify!(DataTable(id = [1, 3, N, N, N],
-        #                                                                      fid = [1.0, 3.0, 0.0, 2.0, 4.0],
-        #                                                                      id_1 = [1, 3, 0, 2, 4]))
+        @test join(small, large, on=:fid, kind=:inner) == DataTable(id = [1, 3],
+                                                                    fid = [1.0, 3.0],
+                                                                    id_1 = [1, 3])
+        @test join(small, large, on=:fid, kind=:left) == nullify!(DataTable(id = [1, 3, 5],
+                                                                            fid = [1.0, 3.0, 5.0],
+                                                                            id_1 = [1, 3, N]))
+        @test join(small, large, on=:fid, kind=:right) == nullify!(DataTable(id = [1, 3, N, N, N],
+                                                                             fid = [1.0, 3.0, 0.0, 2.0, 4.0],
+                                                                             id_1 = [1, 3, 0, 2, 4]))
         @test join(small, large, on=:fid, kind=:outer) == nullify!(DataTable(id = [1, 3, 5, N, N, N],
                                                                              fid = [1.0, 3.0, 5.0, 0.0, 2.0, 4.0],
                                                                              id_1 = [1, 3, N, 0, 2, 4]))
@@ -154,13 +160,17 @@ module TestJoin
         @test join(small, large, on=:fid, kind=:anti) == DataTable(id = 5, fid = 5.0)
 
         # both
-        @test join(small, large, on=[:id, :fid], kind=:inner) == DataTable(id = [1, 3], fid = [1.0, 3.0])
-        @test join(small, large, on=[:id, :fid], kind=:left) == nullify!(DataTable(id = [1, 3, 5], fid = [1.0, 3.0, 5.0]))
-        # FIXME
-        # @test join(small, large, on=[:id, :fid], kind=:right) == nullify!(DataTable(id = [1, 3, 0, 2, 4], fid = [1.0, 3.0, 0.0, 2.0, 4.0]))
+        @test join(small, large, on=[:id, :fid], kind=:inner) == DataTable(id = [1, 3],
+                                                                           fid = [1.0, 3.0])
+        @test join(small, large, on=[:id, :fid], kind=:left) == nullify!(DataTable(id = [1, 3, 5],
+                                                                                   fid = [1.0, 3.0, 5.0]))
+        @test join(small, large, on=[:id, :fid], kind=:right) == nullify!(DataTable(id = [1, 3, 0, 2, 4],
+                                                                                    fid = [1.0, 3.0, 0.0, 2.0, 4.0]))
         @test join(small, large, on=[:id, :fid], kind=:outer) == nullify!(DataTable(id = [1, 3, 5, 0, 2, 4],
                                                                                     fid = [1.0, 3.0, 5.0, 0.0, 2.0, 4.0]))
-        @test join(small, large, on=[:id, :fid], kind=:semi) == DataTable(id = [1, 3], fid = [1.0, 3.0])
-        @test join(small, large, on=[:id, :fid], kind=:anti) == DataTable(id = 5, fid = 5.0)
+        @test join(small, large, on=[:id, :fid], kind=:semi) == DataTable(id = [1, 3],
+                                                                          fid = [1.0, 3.0])
+        @test join(small, large, on=[:id, :fid], kind=:anti) == DataTable(id = 5,
+                                                                          fid = 5.0)
     end
 end

From 9c3ad2160238daaf4eeedddb92720185808def2f Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Thu, 16 Mar 2017 17:56:20 -0700
Subject: [PATCH 23/43] update join help message and add note about temp fix

---
 src/abstractdatatable/join.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/abstractdatatable/join.jl b/src/abstractdatatable/join.jl
index 68e4cfe..4844554 100644
--- a/src/abstractdatatable/join.jl
+++ b/src/abstractdatatable/join.jl
@@ -93,6 +93,7 @@ function compose_joined_table(joiner::DataTableJoiner, kind::Symbol,
         # need to be taken from the right
         for (on_col_ix, on_col) in enumerate(joiner.on_cols)
             # fix the result of the rightjoin by taking the nonnull values from the right table
+            # end-length(rightonly_ixs.orig)+1:end was rightonly_ixs.join. Try and FIXME
             res[on_col][end-length(rightonly_ixs.orig)+1:end] = joiner.dtr_on[rightonly_ixs.orig, on_col_ix]
         end
     end
@@ -214,8 +215,7 @@ join(dt1::AbstractDataTable,
     row of `dt1` is matched with every row of `dt2`
 
 For the three join operations that may introduce missing values, `:outer`, `:left`,
-and `:right`,
-Null values are filled in where needed to complete joins.
+and `:right`, all columns of the returned datatable will be nullable.
 
 ### Result
 

From 1e7d26e65cf58c2735fcaa8fd378edb4385b90a2 Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Thu, 16 Mar 2017 18:00:25 -0700
Subject: [PATCH 24/43] indentation

---
 src/abstractdatatable/join.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/abstractdatatable/join.jl b/src/abstractdatatable/join.jl
index 4844554..44e15d3 100644
--- a/src/abstractdatatable/join.jl
+++ b/src/abstractdatatable/join.jl
@@ -84,7 +84,7 @@ function compose_joined_table(joiner::DataTableJoiner, kind::Symbol,
     end
     for (i, col) in enumerate(columns(dtr_noon))
         cols[i+ncleft] = kind == :inner ? col[all_orig_right_ixs] :
-                                       copy!(similar_nullable(col, nrow), col[all_orig_right_ixs])[right_perm]
+                                          copy!(similar_nullable(col, nrow), col[all_orig_right_ixs])[right_perm]
     end
     res = DataTable(cols, vcat(names(joiner.dtl), names(dtr_noon)))
 

From e39ba637718db19c579ee769ea3fbc8a691e545b Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Fri, 17 Mar 2017 14:21:41 -0700
Subject: [PATCH 25/43] changes

---
 docs/src/lib/manipulation.md               |   2 +
 docs/src/man/reshaping_and_pivoting.md     |  23 +++
 src/DataTables.jl                          |   2 +
 src/abstractdatatable/abstractdatatable.jl |   6 +-
 src/abstractdatatable/io.jl                |   4 +-
 src/abstractdatatable/reshape.jl           | 217 ++++++++++++++++++++-
 src/datatable/datatable.jl                 |  20 +-
 test/show.jl                               |   7 +
 8 files changed, 269 insertions(+), 12 deletions(-)

diff --git a/docs/src/lib/manipulation.md b/docs/src/lib/manipulation.md
index 8d24d4b..c67345a 100644
--- a/docs/src/lib/manipulation.md
+++ b/docs/src/lib/manipulation.md
@@ -20,4 +20,6 @@ join
 melt
 stack
 unstack
+stackdt
+meltdt
 ```
diff --git a/docs/src/man/reshaping_and_pivoting.md b/docs/src/man/reshaping_and_pivoting.md
index d99e814..9be632a 100644
--- a/docs/src/man/reshaping_and_pivoting.md
+++ b/docs/src/man/reshaping_and_pivoting.md
@@ -53,6 +53,29 @@ If the remaining columns are unique, you can skip the id variable and use:
 widedt = unstack(longdt, :variable, :value)
 ```
 
+`stackdt` and `meltdt` are two additional functions that work like `stack` and `melt`, but they provide a view into the original wide DataTable. Here is an example:
+
+```julia
+d = stackdt(iris)
+```
+
+This saves memory. To create the view, several AbstractVectors are defined:
+
+`:variable` column -- `EachRepeatedVector`
+This repeats the variables N times where N is the number of rows of the original AbstractDataTable.
+
+`:value` column -- `StackedVector`
+This is provides a view of the original columns stacked together.
+
+Id columns -- `RepeatedVector`
+This repeats the original columns N times where N is the number of columns stacked.
+
+For more details on the storage representation, see:
+
+```julia
+dump(stackdt(iris))
+```
+
 None of these reshaping functions perform any aggregation. To do aggregation, use the split-apply-combine functions in combination with reshaping. Here is an example:
 
 ```julia
diff --git a/src/DataTables.jl b/src/DataTables.jl
index 4b89a3b..799f7f6 100644
--- a/src/DataTables.jl
+++ b/src/DataTables.jl
@@ -57,6 +57,7 @@ export @~,
        eltypes,
        groupby,
        melt,
+       meltdt,
        names!,
        ncol,
        nonunique,
@@ -70,6 +71,7 @@ export @~,
        rename,
        showcols,
        stack,
+       stackdt,
        unique!,
        unstack,
        head,
diff --git a/src/abstractdatatable/abstractdatatable.jl b/src/abstractdatatable/abstractdatatable.jl
index b51c017..6f95073 100644
--- a/src/abstractdatatable/abstractdatatable.jl
+++ b/src/abstractdatatable/abstractdatatable.jl
@@ -860,7 +860,7 @@ julia> eltypes(dt)
  Nullable{Int64}
 ```
 
-See also [`denullify!`] & [`nullify`](@ref).
+See also [`denullify!`] and [`nullify`](@ref).
 """
 denullify(dt::AbstractDataTable) = denullify!(copy(dt))
 
@@ -899,7 +899,7 @@ julia> eltypes(dt)
  Nullable{Int64}
 ```
 
-See also [`nullify`](@ref) & [`denullify!`](@ref).
+See also [`nullify`](@ref) and [`denullify!`](@ref).
 """
 function nullify!(dt::AbstractDataTable)
     for i in 1:size(dt,2)
@@ -946,7 +946,7 @@ julia> eltypes(dt)
  Int64
 ```
 
-See also [`nullify!`](@ref) & [`denullify`](@ref).
+See also [`nullify!`](@ref) and [`denullify`](@ref).
 """
 function nullify(dt::AbstractDataTable)
     nullify!(copy(dt))
diff --git a/src/abstractdatatable/io.jl b/src/abstractdatatable/io.jl
index 6af518b..3c6ff81 100644
--- a/src/abstractdatatable/io.jl
+++ b/src/abstractdatatable/io.jl
@@ -45,7 +45,7 @@ function printtable(io::IO,
             if !isnull(dt[j][i])
                 if ! (etypes[j] <: Real)
                     print(io, quotemark)
-                    x = isa(dt[i, j], Nullable) ? unsafe_get(dt[i, j]) : dt[i, j]
+                    x = unsafe_get(dt[i, j])
                     escapedprint(io, x, quotestr)
                     print(io, quotemark)
                 else
@@ -168,7 +168,7 @@ function Base.show(io::IO, ::MIME"text/latex", dt::AbstractDataTable)
             write(io, " & ")
             cell = dt[row,col]
             if !isnull(cell)
-                content = isa(cell, Nullable) ? unsafe_get(cell) : cell
+                content = unsafe_get(cell)
                 if mimewritable(MIME("text/latex"), content)
                     show(io, MIME("text/latex"), content)
                 else
diff --git a/src/abstractdatatable/reshape.jl b/src/abstractdatatable/reshape.jl
index 381825b..aab7b5c 100644
--- a/src/abstractdatatable/reshape.jl
+++ b/src/abstractdatatable/reshape.jl
@@ -94,7 +94,7 @@ end
 function stack(dt::AbstractDataTable, measure_var::Int, id_vars::Vector{Int};
                variable_name::Symbol=:variable, value_name::Symbol=:value)
     stack(dt, [measure_var], id_vars;
-            variable_name=variable_name, value_name=value_name)
+          variable_name=variable_name, value_name=value_name)
 end
 function stack(dt::AbstractDataTable, measure_vars, id_vars;
                variable_name::Symbol=:variable, value_name::Symbol=:value)
@@ -221,3 +221,218 @@ function unstack(dt::AbstractDataTable, colkey::Int, value::Int)
 end
 
 unstack(dt::AbstractDataTable) = unstack(dt, :id, :variable, :value)
+
+##############################################################################
+##
+## Reshaping using referencing (issue #145)
+## New AbstractVector types (all read only):
+##     StackedVector
+##     RepeatedVector
+##
+##############################################################################
+
+"""
+An AbstractVector{Any} that is a linear, concatenated view into
+another set of AbstractVectors
+NOTE: Not exported.
+### Constructor
+```julia
+StackedVector(d::AbstractVector...)
+```
+### Arguments
+* `d...` : one or more AbstractVectors
+### Examples
+```julia
+StackedVector(Any[[1,2], [9,10], [11,12]])  # [1,2,9,10,11,12]
+```
+"""
+type StackedVector <: AbstractVector{Any}
+    components::Vector{Any}
+end
+
+function Base.getindex(v::StackedVector,i::Real)
+    lengths = [length(x)::Int for x in v.components]
+    cumlengths = [0; cumsum(lengths)]
+    j = searchsortedlast(cumlengths .+ 1, i)
+    if j > length(cumlengths)
+        error("indexing bounds error")
+    end
+    k = i - cumlengths[j]
+    if k < 1 || k > length(v.components[j])
+        error("indexing bounds error")
+    end
+    v.components[j][k]
+end
+
+function Base.getindex{I<:Real}(v::StackedVector,i::AbstractVector{I})
+    result = similar(v.components[1], length(i))
+    for idx in 1:length(i)
+        result[idx] = v[i[idx]]
+    end
+    result
+end
+
+Base.size(v::StackedVector) = (length(v),)
+Base.length(v::StackedVector) = sum(map(length, v.components))
+Base.ndims(v::StackedVector) = 1
+Base.eltype(v::StackedVector) = promote_type(map(eltype, v.components)...)
+Base.similar(v::StackedVector, T, dims::Dims) = similar(v.components[1], T, dims)
+
+CategoricalArrays.CategoricalArray(v::StackedVector) = CategoricalArray(v[:]) # could be more efficient
+
+
+"""
+An AbstractVector that is a view into another AbstractVector with
+repeated elements
+NOTE: Not exported.
+### Constructor
+```julia
+RepeatedVector(parent::AbstractVector, inner::Int, outer::Int)
+```
+### Arguments
+* `parent` : the AbstractVector that's repeated
+* `inner` : the numer of times each element is repeated
+* `outer` : the numer of times the whole vector is repeated after
+  expanded by `inner`
+`inner` and `outer` have the same meaning as similarly named arguments
+to `repeat`.
+### Examples
+```julia
+RepeatedVector([1,2], 3, 1)   # [1,1,1,2,2,2]
+RepeatedVector([1,2], 1, 3)   # [1,2,1,2,1,2]
+RepeatedVector([1,2], 2, 2)   # [1,2,1,2,1,2,1,2]
+```
+"""
+type RepeatedVector{T} <: AbstractVector{T}
+    parent::AbstractVector{T}
+    inner::Int
+    outer::Int
+end
+
+function Base.getindex{T,I<:Real}(v::RepeatedVector{T},i::AbstractVector{I})
+    N = length(v.parent)
+    idx = Int[Base.fld1(mod1(j,v.inner*N),v.inner) for j in i]
+    v.parent[idx]
+end
+function Base.getindex{T}(v::RepeatedVector{T},i::Real)
+    N = length(v.parent)
+    idx = Base.fld1(mod1(i,v.inner*N),v.inner)
+    v.parent[idx]
+end
+Base.getindex(v::RepeatedVector,i::Range) = getindex(v, [i;])
+
+Base.size(v::RepeatedVector) = (length(v),)
+Base.length(v::RepeatedVector) = v.inner * v.outer * length(v.parent)
+Base.ndims(v::RepeatedVector) = 1
+Base.eltype{T}(v::RepeatedVector{T}) = T
+Base.reverse(v::RepeatedVector) = RepeatedVector(reverse(v.parent), v.inner, v.outer)
+Base.similar(v::RepeatedVector, T, dims::Dims) = similar(v.parent, T, dims)
+Base.unique(v::RepeatedVector) = unique(v.parent)
+
+function CategoricalArrays.CategoricalArray(v::RepeatedVector)
+    res = CategoricalArrays.CategoricalArray(v.parent)
+    res.refs = repeat(res.refs, inner = [v.inner], outer = [v.outer])
+    res
+end
+
+##############################################################################
+##
+## stackdt()
+## meltdt()
+## Reshaping using referencing (issue #145), using the above vector types
+##
+##############################################################################
+
+"""
+A stacked view of a DataTable (long format)
+Like `stack` and `melt`, but a view is returned rather than data
+copies.
+```julia
+stackdt(dt::AbstractDataTable, [measure_vars], [id_vars];
+        variable_name::Symbol=:variable, value_name::Symbol=:value)
+meltdt(dt::AbstractDataTable, [id_vars], [measure_vars];
+       variable_name::Symbol=:variable, value_name::Symbol=:value)
+```
+### Arguments
+* `dt` : the wide AbstractDataTable
+* `measure_vars` : the columns to be stacked (the measurement
+  variables), a normal column indexing type, like a Symbol,
+  Vector{Symbol}, Int, etc.; for `melt`, defaults to all
+  variables that are not `id_vars`
+* `id_vars` : the identifier columns that are repeated during
+  stacking, a normal column indexing type; for `stack` defaults to all
+  variables that are not `measure_vars`
+### Result
+* `::DataTable` : the long-format datatable with column `:value`
+  holding the values of the stacked columns (`measure_vars`), with
+  column `:variable` a Vector of Symbols with the `measure_vars` name,
+  and with columns for each of the `id_vars`.
+The result is a view because the columns are special AbstractVectors
+that return indexed views into the original DataTable.
+### Examples
+```julia
+d1 = DataTable(a = repeat([1:3;], inner = [4]),
+               b = repeat([1:4;], inner = [3]),
+               c = randn(12),
+               d = randn(12),
+               e = map(string, 'a':'l'))
+d1s = stackdt(d1, [:c, :d])
+d1s2 = stackdt(d1, [:c, :d], [:a])
+d1m = meltdt(d1, [:a, :b, :e])
+```
+"""
+function stackdt(dt::AbstractDataTable, measure_vars::Vector{Int},
+                 id_vars::Vector{Int}; variable_name::Symbol=:variable,
+                 value_name::Symbol=:value)
+    N = length(measure_vars)
+    cnames = names(dt)[id_vars]
+    insert!(cnames, 1, value_name)
+    insert!(cnames, 1, variable_name)
+    DataTable(Any[RepeatedVector(_names(dt)[measure_vars], nrow(dt), 1),   # variable
+                  StackedVector(Any[dt[:,c] for c in measure_vars]),     # value
+                  [RepeatedVector(dt[:,c], 1, N) for c in id_vars]...],     # id_var columns
+              cnames)
+end
+function stackdt(dt::AbstractDataTable, measure_var::Int, id_var::Int;
+                 variable_name::Symbol=:variable, value_name::Symbol=:value)
+    stackdt(dt, [measure_var], [id_var]; variable_name=variable_name,
+            value_name=value_name)
+end
+function stackdt(dt::AbstractDataTable, measure_vars, id_var::Int;
+                 variable_name::Symbol=:variable, value_name::Symbol=:value)
+    stackdt(dt, measure_vars, [id_var]; variable_name=variable_name,
+            value_name=value_name)
+end
+function stackdt(dt::AbstractDataTable, measure_var::Int, id_vars;
+                 variable_name::Symbol=:variable, value_name::Symbol=:value)
+    stackdt(dt, [measure_var], id_vars; variable_name=variable_name,
+            value_name=value_name)
+end
+function stackdt(dt::AbstractDataTable, measure_vars, id_vars;
+                 variable_name::Symbol=:variable, value_name::Symbol=:value)
+    stackdt(dt, index(dt)[measure_vars], index(dt)[id_vars];
+            variable_name=variable_name, value_name=value_name)
+end
+function stackdt(dt::AbstractDataTable, measure_vars = numeric_vars(dt);
+                 variable_name::Symbol=:variable, value_name::Symbol=:value)
+    m_inds = index(dt)[measure_vars]
+    stackdt(dt, m_inds, _setdiff(1:ncol(dt), m_inds);
+            variable_name=variable_name, value_name=value_name)
+end
+
+"""
+A stacked view of a DataTable (long format); see `stackdt`
+"""
+function meltdt(dt::AbstractDataTable, id_vars; variable_name::Symbol=:variable,
+                value_name::Symbol=:value)
+    id_inds = index(dt)[id_vars]
+    stackdt(dt, _setdiff(1:ncol(dt), id_inds), id_inds;
+            variable_name=variable_name, value_name=value_name)
+end
+function meltdt(dt::AbstractDataTable, id_vars, measure_vars;
+                variable_name::Symbol=:variable, value_name::Symbol=:value)
+    stackdt(dt, measure_vars, id_vars; variable_name=variable_name,
+            value_name=value_name)
+end
+meltdt(dt::AbstractDataTable; variable_name::Symbol=:variable, value_name::Symbol=:value) =
+    stackdt(dt; variable_name=variable_name, value_name=value_name)
diff --git a/src/datatable/datatable.jl b/src/datatable/datatable.jl
index 96cf59a..57d78a8 100644
--- a/src/datatable/datatable.jl
+++ b/src/datatable/datatable.jl
@@ -145,7 +145,19 @@ function DataTable{T<:Type}(column_eltypes::AbstractVector{T}, cnames::AbstractV
     columns = Vector{Any}(p)
     for j in 1:p
         elty = column_eltypes[j]
-        columns[j] = elty <: Nullable ? NullableArray{eltype(elty)}(nrows) : Vector{elty}(nrows)
+        if elty <: Nullable
+            if eltype(elty) <: CategoricalValue
+                columns[j] = NullableCategoricalArray{eltype(elty).parameters[1]}(nrows)
+            else
+                columns[j] = NullableArray{eltype(elty)}(nrows)
+            end
+        else
+            if elty <: CategoricalValue
+                columns[j] = CategoricalArray{elty.parameters[1]}(nrows)
+            else
+                columns[j] = Vector{elty}(nrows)
+            end
+        end
     end
     return DataTable(columns, Index(convert(Vector{Symbol}, cnames)))
 end
@@ -731,11 +743,7 @@ function hcat!(dt1::DataTable, dt2::AbstractDataTable)
 
     return dt1
 end
-hcat!(dt::DataTable, x::CategoricalArray) = hcat!(dt, DataTable(Any[x]))
-hcat!(dt::DataTable, x::NullableCategoricalArray) = hcat!(dt, DataTable(Any[x]))
-hcat!(dt::DataTable, x::NullableVector) = hcat!(dt, DataTable(Any[x]))
-hcat!(dt::DataTable, x::Vector) = hcat!(dt, DataTable(Any[(x)]))
-hcat!(dt::DataTable, x) = hcat!(dt, DataTable(Any[([x])]))
+hcat!(dt::DataTable, x::AbstractVector) = hcat!(dt, DataTable(Any[x]))
 
 # hcat! for 1-n arguments
 hcat!(dt::DataTable) = dt
diff --git a/test/show.jl b/test/show.jl
index abad44c..8279458 100644
--- a/test/show.jl
+++ b/test/show.jl
@@ -30,6 +30,13 @@ module TestShow
 
     dt = DataTable(A = Vector{String}(3))
 
+    A = DataTables.StackedVector(Any[[1, 2, 3], [4, 5, 6], [7, 8, 9]])		
+    show(io, A)		
+    A = DataTables.RepeatedVector([1, 2, 3], 5, 1)		
+    show(io, A)		
+    A = DataTables.RepeatedVector([1, 2, 3], 1, 5)		
+    show(io, A)
+
     #Test show output for REPL and similar
     dt = DataTable(Fish = ["Suzy", "Amir"], Mass = [1.5, Nullable()])
     io = IOBuffer()

From 04cb9eef008c32edee869769b920d1d69bb046ee Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Fri, 17 Mar 2017 14:24:59 -0700
Subject: [PATCH 26/43] spacing

---
 test/show.jl | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/test/show.jl b/test/show.jl
index 8279458..8bbbd78 100644
--- a/test/show.jl
+++ b/test/show.jl
@@ -30,11 +30,11 @@ module TestShow
 
     dt = DataTable(A = Vector{String}(3))
 
-    A = DataTables.StackedVector(Any[[1, 2, 3], [4, 5, 6], [7, 8, 9]])		
-    show(io, A)		
-    A = DataTables.RepeatedVector([1, 2, 3], 5, 1)		
-    show(io, A)		
-    A = DataTables.RepeatedVector([1, 2, 3], 1, 5)		
+    A = DataTables.StackedVector(Any[[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+    show(io, A)
+    A = DataTables.RepeatedVector([1, 2, 3], 5, 1)
+    show(io, A)
+    A = DataTables.RepeatedVector([1, 2, 3], 1, 5)
     show(io, A)
 
     #Test show output for REPL and similar

From 5d706857998db1731abe69ca43352f7a9287dbab Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Fri, 17 Mar 2017 20:25:54 -0700
Subject: [PATCH 27/43] put old unstack back and stabilize types, ordering

---
 src/abstractdatatable/join.jl    | 15 ++++---
 src/abstractdatatable/reshape.jl | 71 ++++++++++++++++++++++++--------
 src/datatable/datatable.jl       |  6 +--
 test/data.jl                     |  6 +--
 test/datatable.jl                | 61 ++++++++++++++++++---------
 5 files changed, 110 insertions(+), 49 deletions(-)

diff --git a/src/abstractdatatable/join.jl b/src/abstractdatatable/join.jl
index 44e15d3..cbc32cc 100644
--- a/src/abstractdatatable/join.jl
+++ b/src/abstractdatatable/join.jl
@@ -3,14 +3,17 @@
 ##
 
 # Like similar, but returns a nullable array
-similar_nullable{T}(dv::AbstractArray{T}, dims::Union{Int, Tuple{Vararg{Int}}}) =
-    NullableArray(T, dims)
+similar_nullable{T}(dv::AbstractVector{T}, dims::Union{Int, Tuple{Vararg{Int}}}) =
+    NullableVector{T}(dims)
 
-similar_nullable{T<:Nullable}(dv::AbstractArray{T}, dims::Union{Int, Tuple{Vararg{Int}}}) =
-    NullableArray(eltype(T), dims)
+similar_nullable{T<:Nullable}(dv::AbstractVector{T}, dims::Union{Int, Tuple{Vararg{Int}}}) =
+    NullableVector{eltype(T)}(dims)
 
-similar_nullable{T,R}(dv::CategoricalArray{T,R}, dims::Union{Int, Tuple{Vararg{Int}}}) =
-    NullableCategoricalArray(T, dims)
+similar_nullable{T,R}(dv::CategoricalVector{T,R}, dims::Union{Int, Tuple{Vararg{Int}}}) =
+    NullableCategoricalVector{T}(dims)
+
+similar_nullable{T,R}(dv::NullableCategoricalVector{T,R}, dims::Union{Int, Tuple{Vararg{Int}}}) =
+    NullableCategoricalVector{T}(dims)
 
 # helper structure for DataTables joining
 immutable DataTableJoiner{DT1<:AbstractDataTable, DT2<:AbstractDataTable}
diff --git a/src/abstractdatatable/reshape.jl b/src/abstractdatatable/reshape.jl
index aab7b5c..2ca3204 100644
--- a/src/abstractdatatable/reshape.jl
+++ b/src/abstractdatatable/reshape.jl
@@ -53,6 +53,11 @@ melt(dt::AbstractDataTable, [id_vars], [measure_vars];
   column `:variable` a Vector of Symbols with the `measure_vars` name,
   and with columns for each of the `id_vars`.
 
+See also `stackdt` and `meltdt` for stacking methods that return a
+view into the original DataTable. See `unstack` for converting from
+long to wide format.
+
+
 ### Examples
 
 ```julia
@@ -135,6 +140,7 @@ end
 melt(dt::AbstractDataTable; variable_name::Symbol=:variable, value_name::Symbol=:value) =
     stack(dt; variable_name=variable_name, value_name=value_name)
 
+
 ##############################################################################
 ##
 ## unstack()
@@ -188,17 +194,28 @@ function unstack(dt::AbstractDataTable, rowkey::Int, colkey::Int, value::Int)
     # `rowkey` integer indicating which column to place along rows
     # `colkey` integer indicating which column to place along column headers
     # `value` integer indicating which column has values
-    values = dt[value]
-    newcols = dt[colkey]
-    uniquenewcols = unique(newcols)
-    ncol = length(uniquenewcols) + 1
-    columns = Vector{Any}(ncol)
-    columns[1] = unique(dt[rowkey])
-    for (i,coli) in enumerate(2:ncol)
-        columns[coli] = values[find(newcols .== uniquenewcols[i])]
+    refkeycol = NullableCategoricalArray(dt[rowkey])
+    levels!(refkeycol, unique(dt[rowkey]))
+    valuecol = dt[value]
+    keycol = NullableCategoricalArray(dt[colkey])
+    levels!(keycol, unique(dt[colkey]))
+    Nrow = length(refkeycol.pool)
+    Ncol = length(keycol.pool)
+    payload = DataTable(Any[similar_nullable(valuecol, Nrow) for i in 1:Ncol], map(Symbol, levels(keycol)))
+    nowarning = true
+    for k in 1:nrow(dt)
+        j = Int(CategoricalArrays.order(keycol.pool)[keycol.refs[k]])
+        i = Int(CategoricalArrays.order(refkeycol.pool)[refkeycol.refs[k]])
+        if i > 0 && j > 0
+            if nowarning && !isnull(payload[j][i])
+                warn("Duplicate entries in unstack.")
+                nowarning = false
+            end
+            payload[j][i]  = valuecol[k]
+        end
     end
-    colnames = vcat(names(dt)[rowkey], Symbol.(uniquenewcols))
-    DataTable(columns, colnames)
+    col = typeof(similar_nullable(dt[rowkey], 1))(levels(refkeycol))
+    insert!(payload, 1, col, _names(dt)[rowkey])
 end
 unstack(dt::AbstractDataTable, rowkey, colkey, value) =
     unstack(dt, index(dt)[rowkey], index(dt)[colkey], index(dt)[value])
@@ -208,20 +225,38 @@ unstack(dt::AbstractDataTable, colkey, value) =
     unstack(dt, index(dt)[colkey], index(dt)[value])
 
 function unstack(dt::AbstractDataTable, colkey::Int, value::Int)
-    anchor = unique(dt[deleteat!(names(dt), [colkey, value])])
-    groups = groupby(dt, names(anchor))
-    newcolnames = unique(dt[colkey])
-    newcols = DataTable(Any[typeof(dt[value])(size(anchor,1)) for n in newcolnames], Symbol.(newcolnames))
-    for (i, g) in enumerate(groups)
-        for col in newcolnames
-            newcols[i, Symbol(col)] = g[g[colkey] .== col, value][1]
+    # group on anything not a key or value:
+    g = groupby(dt, setdiff(_names(dt), _names(dt)[[colkey, value]]))
+    groupidxs = [g.idx[g.starts[i]:g.ends[i]] for i in 1:length(g.starts)]
+    rowkey = zeros(Int, size(dt, 1))
+    for i in 1:length(groupidxs)
+        rowkey[groupidxs[i]] = i
+    end
+    keycol = NullableCategoricalArray(dt[colkey])
+    levels!(keycol, unique(dt[colkey]))
+    valuecol = dt[value]
+    dt1 = nullify!(dt[g.idx[g.starts], g.cols])
+    Nrow = length(g)
+    Ncol = length(levels(keycol))
+    dt2 = DataTable(Any[similar_nullable(valuecol, Nrow) for i in 1:Ncol], map(Symbol, levels(keycol)))
+    nowarning = true
+    for k in 1:nrow(dt)
+        j = Int(CategoricalArrays.order(keycol.pool)[keycol.refs[k]])
+        i = rowkey[k]
+        if i > 0 && j > 0
+            if nowarning && !isnull(dt2[j][i])
+                warn("Duplicate entries in unstack at row $k.")
+                nowarning = false
+            end
+            dt2[j][i]  = valuecol[k]
         end
     end
-    hcat(anchor, newcols)
+    hcat(dt1, dt2)
 end
 
 unstack(dt::AbstractDataTable) = unstack(dt, :id, :variable, :value)
 
+
 ##############################################################################
 ##
 ## Reshaping using referencing (issue #145)
diff --git a/src/datatable/datatable.jl b/src/datatable/datatable.jl
index 57d78a8..5d3e231 100644
--- a/src/datatable/datatable.jl
+++ b/src/datatable/datatable.jl
@@ -147,13 +147,13 @@ function DataTable{T<:Type}(column_eltypes::AbstractVector{T}, cnames::AbstractV
         elty = column_eltypes[j]
         if elty <: Nullable
             if eltype(elty) <: CategoricalValue
-                columns[j] = NullableCategoricalArray{eltype(elty).parameters[1]}(nrows)
+                columns[j] = NullableCategoricalVector{eltype(elty).parameters[1]}(nrows)
             else
-                columns[j] = NullableArray{eltype(elty)}(nrows)
+                columns[j] = NullableVector{eltype(elty)}(nrows)
             end
         else
             if elty <: CategoricalValue
-                columns[j] = CategoricalArray{elty.parameters[1]}(nrows)
+                columns[j] = CategoricalVector{elty.parameters[1]}(nrows)
             else
                 columns[j] = Vector{elty}(nrows)
             end
diff --git a/test/data.jl b/test/data.jl
index ca54f26..d0272e1 100644
--- a/test/data.jl
+++ b/test/data.jl
@@ -192,9 +192,9 @@ module TestData
     d1us = unstack(d1s, :id, :variable, :value)
     d1us2 = unstack(d1s2)
     d1us3 = unstack(d1s2, :variable, :value)
-    @test d1us[:a] == d1[:a]
-    @test d1us2[:d] == d1[:d]
-    @test d1us2[:3] == d1[:d]
+    @test isequal(d1us[:a], NullableArray(d1[:a]))
+    @test isequal(d1us2[:d], NullableArray(d1[:d]))
+    @test isequal(d1us2[:3], NullableArray(d1[:d]))
 
 
 
diff --git a/test/datatable.jl b/test/datatable.jl
index 6769733..4adaf24 100644
--- a/test/datatable.jl
+++ b/test/datatable.jl
@@ -286,25 +286,48 @@ module TestDataTable
         @test nothing == describe(f, NullableCategoricalArray(Nullable{String}["1", "2", Nullable()]))
     end
 
-    dt = DataTable(Fish = CategoricalArray(["Bob", "Bob", "Batman", "Batman"]),
-                   Key = ["Mass", "Color", "Mass", "Color"],
-                   Value = ["12 g", "Red", "18 g", "Grey"])
-    # Check that reordering levels does not confuse unstack
-    levels!(dt[1], ["XXX", "Bob", "Batman"])
-    #Unstack specifying a row column
-    dt2 = unstack(dt, :Fish, :Key, :Value)
-    #Unstack without specifying a row column
-    dt3 = unstack(dt, :Key, :Value)
-    #The expected output
-    dt4 = DataTable(Fish = ["Bob", "Batman"],
-                    Mass = ["12 g", "18 g"],
-                    Color = ["Red", "Grey"] )
-    @test isequal(dt2, dt4)
-    @test isequal(dt3, dt4)
-    # can't assign Nullable() to a typed column
-    #Make sure unstack works with NULLs at the start of the value column
-    # dt[1,:Value] = Nullable()
-    dt2 = unstack(dt,:Fish, :Key, :Value)
+    @testset "unstacking and nullables" begin
+        dtA = DataTable(Fish = CategoricalArray(["Bob", "Bob", "Batman", "Batman"]),
+                        Key = ["Mass", "Color", "Mass", "Color"],
+                        Value = ["12 g", "Red", "18 g", "Grey"])
+        # Check that reordering levels does not confuse unstack
+        levels!(dtA[1], ["XXX", "Bob", "Batman"])
+        # should all be the same, just different column types
+        dt2A = unstack(dtA, :Fish, :Key, :Value)
+        dt3A = unstack(dtA, :Key, :Value)
+        #The expected output
+        dt4A = DataTable(Fish = NullableCategoricalArray(["Bob", "Batman"]),
+                         Mass = NullableArray(["12 g", "18 g"]),
+                         Color = NullableArray(["Red", "Grey"]))
+        @test dt2A == dt3A == dt4A
+
+        dtB = DataTable(Fish = CategoricalArray(["Bob", "Bob", "Batman", "Batman"]),
+                        Key = CategoricalArray(["Mass", "Color", "Mass", "Color"]),
+                        Value = CategoricalArray(["12 g", "Red", "18 g", "Grey"]))
+        dt2B = unstack(dtB, :Fish, :Key, :Value)
+        dt3B = unstack(dtB, :Key, :Value)
+        # fixme, these are all being reordered by NullableCategoricalArray constructor
+        dt4B = DataTable(Fish = NullableCategoricalArray(["Batman", "Bob"]),
+                         Color = NullableCategoricalArray(["Grey", "Red"]),
+                         Mass = NullableCategoricalArray(["18 g", "12 g"]))
+        @test dt2B == dt3B[[2,1], :] == dt4B
+
+        # test multiple entries in unstack error
+        dt = DataTable(id=[1, 2, 1, 2], variable=["a", "b", "a", "b"], value=[3, 4, 5, 6])
+        a = unstack(dt, :id, :variable, :value)
+        b = unstack(dt, :variable, :value)
+        @test a == b == DataTable(id = Nullable[1, 2], a = Nullable[5, Nullable()], b =  Nullable[Nullable(), 6])
+
+        dt = DataTable(id=1:2, variable=["a", "b"], value=3:4)
+        a = unstack(dt, :id, :variable, :value)
+        b = unstack(dt, :variable, :value)
+        @test a == b == DataTable(id = Nullable[1, 2], a = Nullable[3, Nullable()], b =  Nullable[Nullable(), 4])
+
+        dt = DataTable(id=1:2, variable=["a", "b"], value=3:4)
+        a = unstack(dt, :id, :variable, :value)
+        b = unstack(dt, :variable, :value)
+        @test a == b == DataTable(id = Nullable[1, 2], a = [3, Nullable()], b = [Nullable(), 4])
+    end
 
     dt = DataTable(A = 1:10, B = 'A':'J')
     @test !(dt[:,:] === dt)

From 7859132539b351111ec4888f6dfbc4595092ed17 Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Fri, 17 Mar 2017 20:43:07 -0700
Subject: [PATCH 28/43] fix bad copy and paste spacing and condense scalar
 recycling code

---
 src/abstractdatatable/reshape.jl | 28 ++++++++++++++++++++++++++++
 src/datatable/datatable.jl       |  8 ++------
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/src/abstractdatatable/reshape.jl b/src/abstractdatatable/reshape.jl
index 2ca3204..df4be00 100644
--- a/src/abstractdatatable/reshape.jl
+++ b/src/abstractdatatable/reshape.jl
@@ -269,17 +269,25 @@ unstack(dt::AbstractDataTable) = unstack(dt, :id, :variable, :value)
 """
 An AbstractVector{Any} that is a linear, concatenated view into
 another set of AbstractVectors
+
 NOTE: Not exported.
+
 ### Constructor
+
 ```julia
 StackedVector(d::AbstractVector...)
 ```
+
 ### Arguments
+
 * `d...` : one or more AbstractVectors
+
 ### Examples
+
 ```julia
 StackedVector(Any[[1,2], [9,10], [11,12]])  # [1,2,9,10,11,12]
 ```
+
 """
 type StackedVector <: AbstractVector{Any}
     components::Vector{Any}
@@ -319,24 +327,33 @@ CategoricalArrays.CategoricalArray(v::StackedVector) = CategoricalArray(v[:]) #
 """
 An AbstractVector that is a view into another AbstractVector with
 repeated elements
+
 NOTE: Not exported.
+
 ### Constructor
+
 ```julia
 RepeatedVector(parent::AbstractVector, inner::Int, outer::Int)
+
 ```
+
 ### Arguments
+
 * `parent` : the AbstractVector that's repeated
 * `inner` : the numer of times each element is repeated
 * `outer` : the numer of times the whole vector is repeated after
   expanded by `inner`
 `inner` and `outer` have the same meaning as similarly named arguments
 to `repeat`.
+
 ### Examples
+
 ```julia
 RepeatedVector([1,2], 3, 1)   # [1,1,1,2,2,2]
 RepeatedVector([1,2], 1, 3)   # [1,2,1,2,1,2]
 RepeatedVector([1,2], 2, 2)   # [1,2,1,2,1,2,1,2]
 ```
+
 """
 type RepeatedVector{T} <: AbstractVector{T}
     parent::AbstractVector{T}
@@ -382,28 +399,37 @@ end
 A stacked view of a DataTable (long format)
 Like `stack` and `melt`, but a view is returned rather than data
 copies.
+
 ```julia
 stackdt(dt::AbstractDataTable, [measure_vars], [id_vars];
         variable_name::Symbol=:variable, value_name::Symbol=:value)
 meltdt(dt::AbstractDataTable, [id_vars], [measure_vars];
        variable_name::Symbol=:variable, value_name::Symbol=:value)
 ```
+
 ### Arguments
+
 * `dt` : the wide AbstractDataTable
+
 * `measure_vars` : the columns to be stacked (the measurement
   variables), a normal column indexing type, like a Symbol,
   Vector{Symbol}, Int, etc.; for `melt`, defaults to all
   variables that are not `id_vars`
+
 * `id_vars` : the identifier columns that are repeated during
   stacking, a normal column indexing type; for `stack` defaults to all
   variables that are not `measure_vars`
+
 ### Result
+
 * `::DataTable` : the long-format datatable with column `:value`
   holding the values of the stacked columns (`measure_vars`), with
   column `:variable` a Vector of Symbols with the `measure_vars` name,
   and with columns for each of the `id_vars`.
+
 The result is a view because the columns are special AbstractVectors
 that return indexed views into the original DataTable.
+
 ### Examples
 ```julia
 d1 = DataTable(a = repeat([1:3;], inner = [4]),
@@ -411,10 +437,12 @@ d1 = DataTable(a = repeat([1:3;], inner = [4]),
                c = randn(12),
                d = randn(12),
                e = map(string, 'a':'l'))
+
 d1s = stackdt(d1, [:c, :d])
 d1s2 = stackdt(d1, [:c, :d], [:a])
 d1m = meltdt(d1, [:a, :b, :e])
 ```
+
 """
 function stackdt(dt::AbstractDataTable, measure_vars::Vector{Int},
                  id_vars::Vector{Int}; variable_name::Symbol=:variable,
diff --git a/src/datatable/datatable.jl b/src/datatable/datatable.jl
index 5d3e231..06673d3 100644
--- a/src/datatable/datatable.jl
+++ b/src/datatable/datatable.jl
@@ -83,7 +83,7 @@ type DataTable <: AbstractDataTable
         minlen, maxlen = extrema(lengths)
         if minlen == 0 && maxlen == 0
             return new(columns, colindex)
-        elseif minlen != maxlen
+        elseif minlen != maxlen || minlen == maxlen == 1
             # recycle scalars
             for i in 1:length(columns)
                 typeof(columns[i]) <: AbstractArray && continue
@@ -101,12 +101,8 @@ type DataTable <: AbstractDataTable
         for (i,c) in enumerate(columns)
             if isa(c, Range)
                 columns[i] = collect(c)
-            elseif !isa(c, AbstractVector)
-                if isa(c, AbstractArray)
+            elseif !isa(c, AbstractVector) && isa(c, AbstractArray)
                     throw(DimensionMismatch("columns must be 1-dimensional"))
-                else
-                    columns[i] = [c]
-                end
             else
                 columns[i] = c
             end

From 6496acfd09c8769913ea8a11f19e36b37c13dff0 Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Fri, 17 Mar 2017 23:06:56 -0700
Subject: [PATCH 29/43] update vcat error

---
 src/abstractdatatable/abstractdatatable.jl | 22 ++++++++++++++++------
 test/cat.jl                                | 15 +++++++++++++++
 2 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/src/abstractdatatable/abstractdatatable.jl b/src/abstractdatatable/abstractdatatable.jl
index 6f95073..ef66f35 100644
--- a/src/abstractdatatable/abstractdatatable.jl
+++ b/src/abstractdatatable/abstractdatatable.jl
@@ -747,13 +747,23 @@ function Base.vcat(dts::AbstractDataTable...)
     uniqueheaders = unique(allheaders[notempty])
     if length(uniqueheaders) == 0
         return DataTable()
-    elseif length(uniqueheaders) > 1
-        estring = Vector{String}(length(uniqueheaders))
-        for (i,u) in enumerate(uniqueheaders)
-            indices = string.(find(x -> x == u, allheaders))
-            estring[i] = "columns ($(join(u, ", "))) of input(s) ($(join(indices, ", ")))"
+    end
+    coldiff = setdiff(union(uniqueheaders...), intersect(uniqueheaders...))
+    if length(uniqueheaders) > 1
+        if !isempty(coldiff)
+            headerlengths = length.(uniqueheaders)
+            minheaderloci = find(headerlengths .== minimum(headerlengths))
+            minheaders = uniqueheaders[minheaderloci[1]]
+            throw(ArgumentError("column(s) ($(join(string.(coldiff), ", "))) are missing from argument(s) ($(join(string.(minheaderloci), ", ")))"))
+        else
+            estrings = Vector{String}(length(uniqueheaders))
+            for (i, u) in enumerate(uniqueheaders)
+                indices = find(a -> a == u, allheaders)
+                indices = join(string.(indices), ", ")
+                estrings[i] = "column order of argument(s) ($indices)"
+            end
+            throw(ArgumentError(join(estrings, " != ")))
         end
-        throw(ArgumentError(join(estring, " != ")))
     else
         header = uniqueheaders[1]
         dts_to_vcat = dts[notempty]
diff --git a/test/cat.jl b/test/cat.jl
index a5b41b5..6303db1 100644
--- a/test/cat.jl
+++ b/test/cat.jl
@@ -125,5 +125,20 @@ module TestCat
         @test_throws ArgumentError vcat(dt1, dt2)
         dt2 = DataTable(A = 1:3, C = 1:3)
         @test_throws ArgumentError vcat(dt1, dt2)
+        dt1 = DataTable(A = 1, B = 1)
+        dt2 = DataTable(B = 1, A = 1)
+        @test_throws ArgumentError vcat(dt1, dt2)
+        @test_throws ArgumentError vcat(dt1, dt1, dt1, dt1, dt2, dt2, dt2, dt2)
+        dt3 = DataTable(A = 1, B = 1, C = 1)
+        @test_throws ArgumentError vcat(dt1, dt3)
+        @test_throws ArgumentError vcat(dt2, dt3)
+        dt4 = DataTable(A = 1, B = 1, C = 1, D = 1)
+        @test_throws ArgumentError vcat(dt1, dt4)
+        @test_throws ArgumentError vcat(dt2, dt4)
+        @test_throws ArgumentError vcat(dt3, dt4)
+        dt5 = hcat(dt4, dt4, dt4, dt4)
+        @test_throws ArgumentError vcat(dt3, dt5)
+        dt5r = names!(copy(dt5), reverse(names(dt5)))
+        @test_throws ArgumentError vcat(dt5, dt5r)
     end
 end

From f47810fcf70087932a4029b41fd18d02f3dd5dc0 Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Fri, 17 Mar 2017 23:30:34 -0700
Subject: [PATCH 30/43] unused function, another test, remove unused variable

---
 src/abstractdatatable/abstractdatatable.jl | 14 +-------------
 test/cat.jl                                |  1 +
 2 files changed, 2 insertions(+), 13 deletions(-)

diff --git a/src/abstractdatatable/abstractdatatable.jl b/src/abstractdatatable/abstractdatatable.jl
index ef66f35..bc40a3c 100644
--- a/src/abstractdatatable/abstractdatatable.jl
+++ b/src/abstractdatatable/abstractdatatable.jl
@@ -663,17 +663,6 @@ unique!(dt)  # modifies dt
 """
 (unique, unique!)
 
-function nonuniquekey(dt::AbstractDataTable)
-    # Here's another (probably a lot faster) way to do `nonunique`
-    # by grouping on all columns. It will fail if columns cannot be
-    # made into CategoricalVector's.
-    gd = groupby(dt, _names(dt))
-    idx = [1:length(gd.idx)][gd.idx][gd.starts]
-    res = fill(true, nrow(dt))
-    res[idx] = false
-    res
-end
-
 # Count the number of missing values in every column of an AbstractDataTable.
 function colmissing(dt::AbstractDataTable) # -> Vector{Int}
     nrows, ncols = size(dt)
@@ -751,9 +740,8 @@ function Base.vcat(dts::AbstractDataTable...)
     coldiff = setdiff(union(uniqueheaders...), intersect(uniqueheaders...))
     if length(uniqueheaders) > 1
         if !isempty(coldiff)
-            headerlengths = length.(uniqueheaders)
+            headerlengths = length.(allheaders)
             minheaderloci = find(headerlengths .== minimum(headerlengths))
-            minheaders = uniqueheaders[minheaderloci[1]]
             throw(ArgumentError("column(s) ($(join(string.(coldiff), ", "))) are missing from argument(s) ($(join(string.(minheaderloci), ", ")))"))
         else
             estrings = Vector{String}(length(uniqueheaders))
diff --git a/test/cat.jl b/test/cat.jl
index 6303db1..9b35649 100644
--- a/test/cat.jl
+++ b/test/cat.jl
@@ -131,6 +131,7 @@ module TestCat
         @test_throws ArgumentError vcat(dt1, dt1, dt1, dt1, dt2, dt2, dt2, dt2)
         dt3 = DataTable(A = 1, B = 1, C = 1)
         @test_throws ArgumentError vcat(dt1, dt3)
+        @test_throws ArgumentError vcat(dt1, dt1, dt3, dt3)
         @test_throws ArgumentError vcat(dt2, dt3)
         dt4 = DataTable(A = 1, B = 1, C = 1, D = 1)
         @test_throws ArgumentError vcat(dt1, dt4)

From 259ceef7e36cecc80a9fbf7b1bd32f6a77162f99 Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Sat, 18 Mar 2017 00:05:39 -0700
Subject: [PATCH 31/43] revert function removal to appease new code failures?

---
 src/abstractdatatable/abstractdatatable.jl | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/abstractdatatable/abstractdatatable.jl b/src/abstractdatatable/abstractdatatable.jl
index bc40a3c..ffe4213 100644
--- a/src/abstractdatatable/abstractdatatable.jl
+++ b/src/abstractdatatable/abstractdatatable.jl
@@ -663,6 +663,17 @@ unique!(dt)  # modifies dt
 """
 (unique, unique!)
 
+function nonuniquekey(dt::AbstractDataTable)		
+    # Here's another (probably a lot faster) way to do `nonunique`		
+    # by grouping on all columns. It will fail if columns cannot be		
+    # made into CategoricalVector's.		
+    gd = groupby(dt, _names(dt))		
+    idx = [1:length(gd.idx)][gd.idx][gd.starts]		
+    res = fill(true, nrow(dt))		
+    res[idx] = false		
+    res		
+end
+
 # Count the number of missing values in every column of an AbstractDataTable.
 function colmissing(dt::AbstractDataTable) # -> Vector{Int}
     nrows, ncols = size(dt)

From 26e87ac2c76bfef3e6f42baa1a960b3ee950fedb Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Sat, 18 Mar 2017 00:21:01 -0700
Subject: [PATCH 32/43] fix v0.5 issue

---
 src/abstractdatatable/abstractdatatable.jl | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/abstractdatatable/abstractdatatable.jl b/src/abstractdatatable/abstractdatatable.jl
index ffe4213..8254bb8 100644
--- a/src/abstractdatatable/abstractdatatable.jl
+++ b/src/abstractdatatable/abstractdatatable.jl
@@ -663,15 +663,15 @@ unique!(dt)  # modifies dt
 """
 (unique, unique!)
 
-function nonuniquekey(dt::AbstractDataTable)		
-    # Here's another (probably a lot faster) way to do `nonunique`		
-    # by grouping on all columns. It will fail if columns cannot be		
-    # made into CategoricalVector's.		
-    gd = groupby(dt, _names(dt))		
-    idx = [1:length(gd.idx)][gd.idx][gd.starts]		
-    res = fill(true, nrow(dt))		
-    res[idx] = false		
-    res		
+function nonuniquekey(dt::AbstractDataTable)
+    # Here's another (probably a lot faster) way to do `nonunique`
+    # by grouping on all columns. It will fail if columns cannot be
+    # made into CategoricalVector's.
+    gd = groupby(dt, _names(dt))
+    idx = [1:length(gd.idx)][gd.idx][gd.starts]
+    res = fill(true, nrow(dt))
+    res[idx] = false
+    res
 end
 
 # Count the number of missing values in every column of an AbstractDataTable.
@@ -752,7 +752,8 @@ function Base.vcat(dts::AbstractDataTable...)
     if length(uniqueheaders) > 1
         if !isempty(coldiff)
             headerlengths = length.(allheaders)
-            minheaderloci = find(headerlengths .== minimum(headerlengths))
+            m = minimum(headerlengths)
+            minheaderloci = find(h -> h == m, headerlengths)
             throw(ArgumentError("column(s) ($(join(string.(coldiff), ", "))) are missing from argument(s) ($(join(string.(minheaderloci), ", ")))"))
         else
             estrings = Vector{String}(length(uniqueheaders))

From e0f7982dc9c3647b029ecf8f07db96d90ac7e09d Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Sat, 18 Mar 2017 11:46:25 -0700
Subject: [PATCH 33/43] update vcat testing and change similar_nullable
 constructor call

---
 src/abstractdatatable/abstractdatatable.jl | 45 ++++++++----
 src/abstractdatatable/reshape.jl           |  5 +-
 test/cat.jl                                | 81 +++++++++++++++++-----
 3 files changed, 96 insertions(+), 35 deletions(-)

diff --git a/src/abstractdatatable/abstractdatatable.jl b/src/abstractdatatable/abstractdatatable.jl
index ffe4213..195b496 100644
--- a/src/abstractdatatable/abstractdatatable.jl
+++ b/src/abstractdatatable/abstractdatatable.jl
@@ -663,15 +663,15 @@ unique!(dt)  # modifies dt
 """
 (unique, unique!)
 
-function nonuniquekey(dt::AbstractDataTable)		
-    # Here's another (probably a lot faster) way to do `nonunique`		
-    # by grouping on all columns. It will fail if columns cannot be		
-    # made into CategoricalVector's.		
-    gd = groupby(dt, _names(dt))		
-    idx = [1:length(gd.idx)][gd.idx][gd.starts]		
-    res = fill(true, nrow(dt))		
-    res[idx] = false		
-    res		
+function nonuniquekey(dt::AbstractDataTable)
+    # Here's another (probably a lot faster) way to do `nonunique`
+    # by grouping on all columns. It will fail if columns cannot be
+    # made into CategoricalVector's.
+    gd = groupby(dt, _names(dt))
+    idx = [1:length(gd.idx)][gd.idx][gd.starts]
+    res = fill(true, nrow(dt))
+    res[idx] = false
+    res
 end
 
 # Count the number of missing values in every column of an AbstractDataTable.
@@ -748,18 +748,33 @@ function Base.vcat(dts::AbstractDataTable...)
     if length(uniqueheaders) == 0
         return DataTable()
     end
-    coldiff = setdiff(union(uniqueheaders...), intersect(uniqueheaders...))
     if length(uniqueheaders) > 1
+        unionunique = union(uniqueheaders...)
+        coldiff = setdiff(unionunique, intersect(uniqueheaders...))
         if !isempty(coldiff)
-            headerlengths = length.(allheaders)
-            minheaderloci = find(headerlengths .== minimum(headerlengths))
-            throw(ArgumentError("column(s) ($(join(string.(coldiff), ", "))) are missing from argument(s) ($(join(string.(minheaderloci), ", ")))"))
+            # if any datatables are a full superset of names, skip them
+            filter!(u -> Set(u) != Set(unionunique), uniqueheaders)
+            estrings = Vector{String}(length(uniqueheaders))
+            for (i, u) in enumerate(uniqueheaders)
+                matchingloci = find(h -> u == h, allheaders)
+                headerdiff = filter(x -> !in(x, u), coldiff)
+                headerdiff = length(headerdiff) > 1 ?
+                                join(string.(headerdiff[1:end-1]), ", ") * " and " * string(headerdiff[end]) :
+                                string(headerdiff[end])
+                matchingloci = length(matchingloci) > 1 ?
+                                    join(string.(matchingloci[1:end-1]), ", ") * " and " * string(matchingloci[end]) :
+                                    string(matchingloci[end])
+                estrings[i] = "column(s) $headerdiff are missing from argument(s) $matchingloci"
+            end
+            throw(ArgumentError(join(estrings, ", and ")))
         else
             estrings = Vector{String}(length(uniqueheaders))
             for (i, u) in enumerate(uniqueheaders)
                 indices = find(a -> a == u, allheaders)
-                indices = join(string.(indices), ", ")
-                estrings[i] = "column order of argument(s) ($indices)"
+                indices = length(indices) > 1 ?
+                            join(string.(indices[1:end-1]), ", ") * " and " * string(indices[end]) :
+                            string(indices[end])
+                estrings[i] = "column order of argument(s) $indices"
             end
             throw(ArgumentError(join(estrings, " != ")))
         end
diff --git a/src/abstractdatatable/reshape.jl b/src/abstractdatatable/reshape.jl
index df4be00..3368d81 100644
--- a/src/abstractdatatable/reshape.jl
+++ b/src/abstractdatatable/reshape.jl
@@ -214,8 +214,9 @@ function unstack(dt::AbstractDataTable, rowkey::Int, colkey::Int, value::Int)
             payload[j][i]  = valuecol[k]
         end
     end
-    col = typeof(similar_nullable(dt[rowkey], 1))(levels(refkeycol))
-    insert!(payload, 1, col, _names(dt)[rowkey])
+    levs = levels(refkeycol)
+    col = similar_nullable(dt[rowkey], length(levs))
+    insert!(payload, 1, copy!(col, levs), _names(dt)[rowkey])
 end
 unstack(dt::AbstractDataTable, rowkey, colkey, value) =
     unstack(dt, index(dt)[rowkey], index(dt)[colkey], index(dt)[value])
diff --git a/test/cat.jl b/test/cat.jl
index 9b35649..5f9ac7c 100644
--- a/test/cat.jl
+++ b/test/cat.jl
@@ -122,24 +122,69 @@ module TestCat
     @testset "vcat errors" begin
         dt1 = DataTable(A = 1:3, B = 1:3)
         dt2 = DataTable(A = 1:3)
-        @test_throws ArgumentError vcat(dt1, dt2)
-        dt2 = DataTable(A = 1:3, C = 1:3)
-        @test_throws ArgumentError vcat(dt1, dt2)
+        # right missing 1 column
+        err = @test_throws ArgumentError vcat(dt1, dt2)
+        @test err.value.msg == "column(s) B are missing from argument(s) 2"
+        # left missing 1 column
+        err = @test_throws ArgumentError vcat(dt2, dt1)
+        @test err.value.msg == "column(s) B are missing from argument(s) 1"
+        # multiple missing 1 column
+        err = @test_throws ArgumentError vcat(dt1, dt2, dt2, dt2, dt2, dt2)
+        @test err.value.msg == "column(s) B are missing from argument(s) 2, 3, 4, 5 and 6"
+        # argument missing >1columns
+        dt1 = DataTable(A = 1:3, B = 1:3, C = 1:3, D = 1:3, E = 1:3)
+        err = @test_throws ArgumentError vcat(dt1, dt2)
+        @test err.value.msg == "column(s) B, C, D and E are missing from argument(s) 2"
+        # >1 arguments missing >1 columns
+        err = @test_throws ArgumentError vcat(dt1, dt2, dt2, dt2, dt2)
+        @test err.value.msg == "column(s) B, C, D and E are missing from argument(s) 2, 3, 4 and 5"
+        # out of order
+        dt2 = dt1[reverse(names(dt1))]
+        err = @test_throws ArgumentError vcat(dt1, dt2)
+        @test err.value.msg == "column order of argument(s) 1 != column order of argument(s) 2"
+        # left >1
+        err = @test_throws ArgumentError vcat(dt1, dt1, dt2)
+        @test err.value.msg == "column order of argument(s) 1 and 2 != column order of argument(s) 3"
+        # right >1
+        err = @test_throws ArgumentError vcat(dt1, dt2, dt2)
+        @test err.value.msg == "column order of argument(s) 1 != column order of argument(s) 2 and 3"
+        # left and right >1
+        err = @test_throws ArgumentError vcat(dt1, dt1, dt1, dt2, dt2, dt2)
+        @test err.value.msg == "column order of argument(s) 1, 2 and 3 != column order of argument(s) 4, 5 and 6"
+        # >2 groups out of order
+        srand(1)
+        dt3 = dt1[shuffle(names(dt1))]
+        err = @test_throws ArgumentError vcat(dt1, dt1, dt1, dt2, dt2, dt2, dt3, dt3, dt3, dt3)
+        @test err.value.msg == "column order of argument(s) 1, 2 and 3 != column order of argument(s) 4, 5 and 6 != column order of argument(s) 7, 8, 9 and 10"
+        # missing columns throws error before out of order columns
         dt1 = DataTable(A = 1, B = 1)
-        dt2 = DataTable(B = 1, A = 1)
-        @test_throws ArgumentError vcat(dt1, dt2)
-        @test_throws ArgumentError vcat(dt1, dt1, dt1, dt1, dt2, dt2, dt2, dt2)
-        dt3 = DataTable(A = 1, B = 1, C = 1)
-        @test_throws ArgumentError vcat(dt1, dt3)
-        @test_throws ArgumentError vcat(dt1, dt1, dt3, dt3)
-        @test_throws ArgumentError vcat(dt2, dt3)
-        dt4 = DataTable(A = 1, B = 1, C = 1, D = 1)
-        @test_throws ArgumentError vcat(dt1, dt4)
-        @test_throws ArgumentError vcat(dt2, dt4)
-        @test_throws ArgumentError vcat(dt3, dt4)
-        dt5 = hcat(dt4, dt4, dt4, dt4)
-        @test_throws ArgumentError vcat(dt3, dt5)
-        dt5r = names!(copy(dt5), reverse(names(dt5)))
-        @test_throws ArgumentError vcat(dt5, dt5r)
+        dt2 = DataTable(A = 1)
+        dt3 = DataTable(B = 1, A = 1)
+        err = @test_throws ArgumentError vcat(dt1, dt2, dt3)
+        @test err.value.msg == "column(s) B are missing from argument(s) 2"
+        # unique columns for both sides
+        dt1 = DataTable(A = 1, B = 1, C = 1, D = 1)
+        dt2 = DataTable(A = 1, C = 1, D = 1, E = 1, F = 1)
+        err = @test_throws ArgumentError vcat(dt1, dt2)
+        @test err.value.msg == "column(s) E and F are missing from argument(s) 1, and column(s) B are missing from argument(s) 2"
+        err = @test_throws ArgumentError vcat(dt1, dt1, dt2, dt2)
+        @test err.value.msg == "column(s) E and F are missing from argument(s) 1 and 2, and column(s) B are missing from argument(s) 3 and 4"
+        dt3 = DataTable(A = 1, B = 1, C = 1, D = 1, E = 1)
+        err = @test_throws ArgumentError vcat(dt1, dt2, dt3)
+        @test err.value.msg == "column(s) E and F are missing from argument(s) 1, and column(s) B are missing from argument(s) 2, and column(s) F are missing from argument(s) 3"
+        err = @test_throws ArgumentError vcat(dt1, dt1, dt2, dt2, dt3, dt3)
+        @test err.value.msg == "column(s) E and F are missing from argument(s) 1 and 2, and column(s) B are missing from argument(s) 3 and 4, and column(s) F are missing from argument(s) 5 and 6"
+        err = @test_throws ArgumentError vcat(dt1, dt1, dt1, dt2, dt2, dt2, dt3, dt3, dt3)
+        @test err.value.msg == "column(s) E and F are missing from argument(s) 1, 2 and 3, and column(s) B are missing from argument(s) 4, 5 and 6, and column(s) F are missing from argument(s) 7, 8 and 9"
+        # dt4 is a superset of names found in all other datatables and won't be shown in error
+        dt4 = DataTable(A = 1, B = 1, C = 1, D = 1, E = 1, F = 1)
+        err = @test_throws ArgumentError vcat(dt1, dt2, dt3, dt4)
+        @test err.value.msg == "column(s) E and F are missing from argument(s) 1, and column(s) B are missing from argument(s) 2, and column(s) F are missing from argument(s) 3"
+        err = @test_throws ArgumentError vcat(dt1, dt1, dt2, dt2, dt3, dt3, dt4, dt4)
+        @test err.value.msg == "column(s) E and F are missing from argument(s) 1 and 2, and column(s) B are missing from argument(s) 3 and 4, and column(s) F are missing from argument(s) 5 and 6"
+        err = @test_throws ArgumentError vcat(dt1, dt1, dt1, dt2, dt2, dt2, dt3, dt3, dt3, dt4, dt4, dt4)
+        @test err.value.msg == "column(s) E and F are missing from argument(s) 1, 2 and 3, and column(s) B are missing from argument(s) 4, 5 and 6, and column(s) F are missing from argument(s) 7, 8 and 9"
+        err = @test_throws ArgumentError vcat(dt1, dt2, dt3, dt4, dt1, dt2, dt3, dt4, dt1, dt2, dt3, dt4)
+        @test err.value.msg == "column(s) E and F are missing from argument(s) 1, 5 and 9, and column(s) B are missing from argument(s) 2, 6 and 10, and column(s) F are missing from argument(s) 3, 7 and 11"
     end
 end

From b0c29b4a1ddd14752846d664298fefa86ce149d4 Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Sat, 18 Mar 2017 11:48:58 -0700
Subject: [PATCH 34/43] remove old error message from docstring

---
 src/abstractdatatable/abstractdatatable.jl | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/abstractdatatable/abstractdatatable.jl b/src/abstractdatatable/abstractdatatable.jl
index 195b496..6efeb31 100644
--- a/src/abstractdatatable/abstractdatatable.jl
+++ b/src/abstractdatatable/abstractdatatable.jl
@@ -721,8 +721,6 @@ julia> dt1 = DataTable(A=1:3, B=1:3);
 
 julia> dt2 = DataTable(A=4:6, B=4:6);
 
-julia> dt3 = DataTable(A=7:9, B=7:9, C=7:9);
-
 julia> vcat(dt1, dt2)
 6×2 DataTables.DataTable
 │ Row │ A │ B │
@@ -733,9 +731,6 @@ julia> vcat(dt1, dt2)
 │ 4   │ 4 │ 4 │
 │ 5   │ 5 │ 5 │
 │ 6   │ 6 │ 6 │
-
-julia> vcat(dt1, dt2, dt3)
-ERROR: ArgumentError: columns (A, B) of input(s) (1, 2) != columns (A, B, C) of input(s) (3)
 ```
 """
 Base.vcat(dt::AbstractDataTable) = dt

From 95a6f314ef1917ebd8e9a6c57959a4d236263a6e Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Sat, 18 Mar 2017 11:49:22 -0700
Subject: [PATCH 35/43] and change docstring to doctest

---
 src/abstractdatatable/abstractdatatable.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/abstractdatatable/abstractdatatable.jl b/src/abstractdatatable/abstractdatatable.jl
index 6efeb31..88fce9f 100644
--- a/src/abstractdatatable/abstractdatatable.jl
+++ b/src/abstractdatatable/abstractdatatable.jl
@@ -716,7 +716,7 @@ Base.hcat(dt1::AbstractDataTable, dt2::AbstractDataTable, dtn::AbstractDataTable
 Vertically concatenate `AbstractDataTables` that have the same column names in
 the same order.
 
-```julia
+```jldoctest
 julia> dt1 = DataTable(A=1:3, B=1:3);
 
 julia> dt2 = DataTable(A=4:6, B=4:6);

From 7df712f08cced61ced0ff1c20690db92e0d5292a Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Sat, 18 Mar 2017 12:05:38 -0700
Subject: [PATCH 36/43] change similar_nullable back and fix unrelated copy
 paste space removal

---
 src/abstractdatatable/join.jl    | 16 ++++++++--------
 src/abstractdatatable/reshape.jl |  3 +++
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/abstractdatatable/join.jl b/src/abstractdatatable/join.jl
index cbc32cc..15f5c2e 100644
--- a/src/abstractdatatable/join.jl
+++ b/src/abstractdatatable/join.jl
@@ -3,17 +3,17 @@
 ##
 
 # Like similar, but returns a nullable array
-similar_nullable{T}(dv::AbstractVector{T}, dims::Union{Int, Tuple{Vararg{Int}}}) =
-    NullableVector{T}(dims)
+similar_nullable{T}(dv::AbstractArray{T}, dims::Union{Int, Tuple{Vararg{Int}}}) =
+    NullableArray(T, dims)
 
-similar_nullable{T<:Nullable}(dv::AbstractVector{T}, dims::Union{Int, Tuple{Vararg{Int}}}) =
-    NullableVector{eltype(T)}(dims)
+similar_nullable{T<:Nullable}(dv::AbstractArray{T}, dims::Union{Int, Tuple{Vararg{Int}}}) =
+    NullableArray(eltype(T), dims)
 
-similar_nullable{T,R}(dv::CategoricalVector{T,R}, dims::Union{Int, Tuple{Vararg{Int}}}) =
-    NullableCategoricalVector{T}(dims)
+similar_nullable{T,R}(dv::CategoricalArray{T,R}, dims::Union{Int, Tuple{Vararg{Int}}}) =
+    NullableCategoricalArray(T, dims)
 
-similar_nullable{T,R}(dv::NullableCategoricalVector{T,R}, dims::Union{Int, Tuple{Vararg{Int}}}) =
-    NullableCategoricalVector{T}(dims)
+similar_nullable{T,R}(dv::NullableCategoricalArray{T,R}, dims::Union{Int, Tuple{Vararg{Int}}}) =
+    NullableCategoricalArray(T, dims)
 
 # helper structure for DataTables joining
 immutable DataTableJoiner{DT1<:AbstractDataTable, DT2<:AbstractDataTable}
diff --git a/src/abstractdatatable/reshape.jl b/src/abstractdatatable/reshape.jl
index 3368d81..2b0dadf 100644
--- a/src/abstractdatatable/reshape.jl
+++ b/src/abstractdatatable/reshape.jl
@@ -344,6 +344,7 @@ RepeatedVector(parent::AbstractVector, inner::Int, outer::Int)
 * `inner` : the numer of times each element is repeated
 * `outer` : the numer of times the whole vector is repeated after
   expanded by `inner`
+
 `inner` and `outer` have the same meaning as similarly named arguments
 to `repeat`.
 
@@ -398,6 +399,7 @@ end
 
 """
 A stacked view of a DataTable (long format)
+
 Like `stack` and `melt`, but a view is returned rather than data
 copies.
 
@@ -432,6 +434,7 @@ The result is a view because the columns are special AbstractVectors
 that return indexed views into the original DataTable.
 
 ### Examples
+
 ```julia
 d1 = DataTable(a = repeat([1:3;], inner = [4]),
                b = repeat([1:4;], inner = [3]),

From 27da644350ca13619088c698d6afbacc2fd9ac52 Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Sat, 18 Mar 2017 12:39:57 -0700
Subject: [PATCH 37/43] add missing rightperm reordering and properly unify
 hcat! functions

---
 src/abstractdatatable/join.jl | 2 +-
 src/datatable/datatable.jl    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/abstractdatatable/join.jl b/src/abstractdatatable/join.jl
index 15f5c2e..a239927 100644
--- a/src/abstractdatatable/join.jl
+++ b/src/abstractdatatable/join.jl
@@ -86,7 +86,7 @@ function compose_joined_table(joiner::DataTableJoiner, kind::Symbol,
                                    copy!(similar_nullable(col, nrow), col[all_orig_left_ixs])
     end
     for (i, col) in enumerate(columns(dtr_noon))
-        cols[i+ncleft] = kind == :inner ? col[all_orig_right_ixs] :
+        cols[i+ncleft] = kind == :inner ? col[all_orig_right_ixs][right_perm] :
                                           copy!(similar_nullable(col, nrow), col[all_orig_right_ixs])[right_perm]
     end
     res = DataTable(cols, vcat(names(joiner.dtl), names(dtr_noon)))
diff --git a/src/datatable/datatable.jl b/src/datatable/datatable.jl
index 06673d3..91772a6 100644
--- a/src/datatable/datatable.jl
+++ b/src/datatable/datatable.jl
@@ -739,7 +739,7 @@ function hcat!(dt1::DataTable, dt2::AbstractDataTable)
 
     return dt1
 end
-hcat!(dt::DataTable, x::AbstractVector) = hcat!(dt, DataTable(Any[x]))
+hcat!(dt::DataTable, x) = hcat!(dt, DataTable(Any[x]))
 
 # hcat! for 1-n arguments
 hcat!(dt::DataTable) = dt

From 5fa8fa031fb98873ff4d907c3f55be5185de63d7 Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Sat, 18 Mar 2017 12:47:45 -0700
Subject: [PATCH 38/43] accidental spacing changes

---
 src/abstractdatatable/reshape.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/abstractdatatable/reshape.jl b/src/abstractdatatable/reshape.jl
index 2b0dadf..c9f1f82 100644
--- a/src/abstractdatatable/reshape.jl
+++ b/src/abstractdatatable/reshape.jl
@@ -140,7 +140,6 @@ end
 melt(dt::AbstractDataTable; variable_name::Symbol=:variable, value_name::Symbol=:value) =
     stack(dt; variable_name=variable_name, value_name=value_name)
 
-
 ##############################################################################
 ##
 ## unstack()

From a1d58f93480fa382141cb37c6b419b7540124190 Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Sat, 18 Mar 2017 13:01:52 -0700
Subject: [PATCH 39/43] forgot one spacing change

---
 src/abstractdatatable/reshape.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/abstractdatatable/reshape.jl b/src/abstractdatatable/reshape.jl
index c9f1f82..f7f774a 100644
--- a/src/abstractdatatable/reshape.jl
+++ b/src/abstractdatatable/reshape.jl
@@ -334,7 +334,6 @@ NOTE: Not exported.
 
 ```julia
 RepeatedVector(parent::AbstractVector, inner::Int, outer::Int)
-
 ```
 
 ### Arguments

From db87443120e0ed493807b0399eb313818ecf9d67 Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Sat, 18 Mar 2017 13:44:48 -0700
Subject: [PATCH 40/43] change deprecations

---
 src/deprecated.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/deprecated.jl b/src/deprecated.jl
index 83912d7..6f176a8 100644
--- a/src/deprecated.jl
+++ b/src/deprecated.jl
@@ -18,5 +18,5 @@ import Base: keys, values, insert!
 
 @deprecate sub(dt::AbstractDataTable, rows) view(dt, rows)
 
-@deprecate stackdf stack
-@deprecate meltdf melt
+@deprecate stackdf stackdt
+@deprecate meltdf meltdt

From 9c66a1e1edfe686f1092c04c718e6b972bf7958f Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Sat, 18 Mar 2017 13:49:38 -0700
Subject: [PATCH 41/43] add back extra spaces

---
 docs/src/man/reshaping_and_pivoting.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/src/man/reshaping_and_pivoting.md b/docs/src/man/reshaping_and_pivoting.md
index 9be632a..1b936e1 100644
--- a/docs/src/man/reshaping_and_pivoting.md
+++ b/docs/src/man/reshaping_and_pivoting.md
@@ -61,13 +61,13 @@ d = stackdt(iris)
 
 This saves memory. To create the view, several AbstractVectors are defined:
 
-`:variable` column -- `EachRepeatedVector`
+`:variable` column -- `EachRepeatedVector`  
 This repeats the variables N times where N is the number of rows of the original AbstractDataTable.
 
-`:value` column -- `StackedVector`
+`:value` column -- `StackedVector`  
 This is provides a view of the original columns stacked together.
 
-Id columns -- `RepeatedVector`
+Id columns -- `RepeatedVector`  
 This repeats the original columns N times where N is the number of columns stacked.
 
 For more details on the storage representation, see:

From 887346ba5293c845b39f6f7746c3164ec34f620f Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Mon, 20 Mar 2017 11:28:58 -0700
Subject: [PATCH 42/43] bump catarrays version, remove manual resetting of
 levels in unstack

and adjust tests accordingly
---
 REQUIRE                          |  2 +-
 src/abstractdatatable/reshape.jl |  5 +----
 test/datatable.jl                | 12 +++++-------
 3 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/REQUIRE b/REQUIRE
index 7bb9ed3..b18bc91 100644
--- a/REQUIRE
+++ b/REQUIRE
@@ -1,6 +1,6 @@
 julia 0.5
 NullableArrays 0.1.0
-CategoricalArrays 0.1.2
+CategoricalArrays 0.1.3
 StatsBase 0.11.0
 SortingAlgorithms
 Reexport
diff --git a/src/abstractdatatable/reshape.jl b/src/abstractdatatable/reshape.jl
index f7f774a..ed26cd4 100644
--- a/src/abstractdatatable/reshape.jl
+++ b/src/abstractdatatable/reshape.jl
@@ -194,10 +194,8 @@ function unstack(dt::AbstractDataTable, rowkey::Int, colkey::Int, value::Int)
     # `colkey` integer indicating which column to place along column headers
     # `value` integer indicating which column has values
     refkeycol = NullableCategoricalArray(dt[rowkey])
-    levels!(refkeycol, unique(dt[rowkey]))
     valuecol = dt[value]
     keycol = NullableCategoricalArray(dt[colkey])
-    levels!(keycol, unique(dt[colkey]))
     Nrow = length(refkeycol.pool)
     Ncol = length(keycol.pool)
     payload = DataTable(Any[similar_nullable(valuecol, Nrow) for i in 1:Ncol], map(Symbol, levels(keycol)))
@@ -226,14 +224,13 @@ unstack(dt::AbstractDataTable, colkey, value) =
 
 function unstack(dt::AbstractDataTable, colkey::Int, value::Int)
     # group on anything not a key or value:
-    g = groupby(dt, setdiff(_names(dt), _names(dt)[[colkey, value]]))
+    g = groupby(dt, setdiff(_names(dt), _names(dt)[[colkey, value]]), sort=true)
     groupidxs = [g.idx[g.starts[i]:g.ends[i]] for i in 1:length(g.starts)]
     rowkey = zeros(Int, size(dt, 1))
     for i in 1:length(groupidxs)
         rowkey[groupidxs[i]] = i
     end
     keycol = NullableCategoricalArray(dt[colkey])
-    levels!(keycol, unique(dt[colkey]))
     valuecol = dt[value]
     dt1 = nullify!(dt[g.idx[g.starts], g.cols])
     Nrow = length(g)
diff --git a/test/datatable.jl b/test/datatable.jl
index 4adaf24..b2ee0aa 100644
--- a/test/datatable.jl
+++ b/test/datatable.jl
@@ -292,25 +292,23 @@ module TestDataTable
                         Value = ["12 g", "Red", "18 g", "Grey"])
         # Check that reordering levels does not confuse unstack
         levels!(dtA[1], ["XXX", "Bob", "Batman"])
-        # should all be the same, just different column types
+        # should all return the same output, just different column types
         dt2A = unstack(dtA, :Fish, :Key, :Value)
         dt3A = unstack(dtA, :Key, :Value)
-        #The expected output
         dt4A = DataTable(Fish = NullableCategoricalArray(["Bob", "Batman"]),
-                         Mass = NullableArray(["12 g", "18 g"]),
-                         Color = NullableArray(["Red", "Grey"]))
-        @test dt2A == dt3A == dt4A
+                         Color = NullableArray(["Red", "Grey"]),
+                         Mass = NullableArray(["12 g", "18 g"]))
+        @test dt2A[[2, 3], :] == dt3A == dt4A
 
         dtB = DataTable(Fish = CategoricalArray(["Bob", "Bob", "Batman", "Batman"]),
                         Key = CategoricalArray(["Mass", "Color", "Mass", "Color"]),
                         Value = CategoricalArray(["12 g", "Red", "18 g", "Grey"]))
         dt2B = unstack(dtB, :Fish, :Key, :Value)
         dt3B = unstack(dtB, :Key, :Value)
-        # fixme, these are all being reordered by NullableCategoricalArray constructor
         dt4B = DataTable(Fish = NullableCategoricalArray(["Batman", "Bob"]),
                          Color = NullableCategoricalArray(["Grey", "Red"]),
                          Mass = NullableCategoricalArray(["18 g", "12 g"]))
-        @test dt2B == dt3B[[2,1], :] == dt4B
+        @test dt2B == dt3B == dt4B
 
         # test multiple entries in unstack error
         dt = DataTable(id=[1, 2, 1, 2], variable=["a", "b", "a", "b"], value=[3, 4, 5, 6])

From 020c88ed0b116f293f21970ed998ef9556c4ee11 Mon Sep 17 00:00:00 2001
From: Cameron Prybol <cameron.prybol@gmail.com>
Date: Fri, 24 Mar 2017 12:36:13 -0700
Subject: [PATCH 43/43] only use "and" when joining the last estring

---
 src/abstractdatatable/abstractdatatable.jl |  3 ++-
 test/cat.jl                                | 14 +++++++-------
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/abstractdatatable/abstractdatatable.jl b/src/abstractdatatable/abstractdatatable.jl
index 88fce9f..7879014 100644
--- a/src/abstractdatatable/abstractdatatable.jl
+++ b/src/abstractdatatable/abstractdatatable.jl
@@ -761,7 +761,8 @@ function Base.vcat(dts::AbstractDataTable...)
                                     string(matchingloci[end])
                 estrings[i] = "column(s) $headerdiff are missing from argument(s) $matchingloci"
             end
-            throw(ArgumentError(join(estrings, ", and ")))
+            length(estrings) == 1 ? throw(ArgumentError(estrings[1])) :
+                throw(ArgumentError(join(estrings[1:end-1], ", ") * ", and " * estrings[end]))
         else
             estrings = Vector{String}(length(uniqueheaders))
             for (i, u) in enumerate(uniqueheaders)
diff --git a/test/cat.jl b/test/cat.jl
index 5f9ac7c..ba44d0a 100644
--- a/test/cat.jl
+++ b/test/cat.jl
@@ -171,20 +171,20 @@ module TestCat
         @test err.value.msg == "column(s) E and F are missing from argument(s) 1 and 2, and column(s) B are missing from argument(s) 3 and 4"
         dt3 = DataTable(A = 1, B = 1, C = 1, D = 1, E = 1)
         err = @test_throws ArgumentError vcat(dt1, dt2, dt3)
-        @test err.value.msg == "column(s) E and F are missing from argument(s) 1, and column(s) B are missing from argument(s) 2, and column(s) F are missing from argument(s) 3"
+        @test err.value.msg == "column(s) E and F are missing from argument(s) 1, column(s) B are missing from argument(s) 2, and column(s) F are missing from argument(s) 3"
         err = @test_throws ArgumentError vcat(dt1, dt1, dt2, dt2, dt3, dt3)
-        @test err.value.msg == "column(s) E and F are missing from argument(s) 1 and 2, and column(s) B are missing from argument(s) 3 and 4, and column(s) F are missing from argument(s) 5 and 6"
+        @test err.value.msg == "column(s) E and F are missing from argument(s) 1 and 2, column(s) B are missing from argument(s) 3 and 4, and column(s) F are missing from argument(s) 5 and 6"
         err = @test_throws ArgumentError vcat(dt1, dt1, dt1, dt2, dt2, dt2, dt3, dt3, dt3)
-        @test err.value.msg == "column(s) E and F are missing from argument(s) 1, 2 and 3, and column(s) B are missing from argument(s) 4, 5 and 6, and column(s) F are missing from argument(s) 7, 8 and 9"
+        @test err.value.msg == "column(s) E and F are missing from argument(s) 1, 2 and 3, column(s) B are missing from argument(s) 4, 5 and 6, and column(s) F are missing from argument(s) 7, 8 and 9"
         # dt4 is a superset of names found in all other datatables and won't be shown in error
         dt4 = DataTable(A = 1, B = 1, C = 1, D = 1, E = 1, F = 1)
         err = @test_throws ArgumentError vcat(dt1, dt2, dt3, dt4)
-        @test err.value.msg == "column(s) E and F are missing from argument(s) 1, and column(s) B are missing from argument(s) 2, and column(s) F are missing from argument(s) 3"
+        @test err.value.msg == "column(s) E and F are missing from argument(s) 1, column(s) B are missing from argument(s) 2, and column(s) F are missing from argument(s) 3"
         err = @test_throws ArgumentError vcat(dt1, dt1, dt2, dt2, dt3, dt3, dt4, dt4)
-        @test err.value.msg == "column(s) E and F are missing from argument(s) 1 and 2, and column(s) B are missing from argument(s) 3 and 4, and column(s) F are missing from argument(s) 5 and 6"
+        @test err.value.msg == "column(s) E and F are missing from argument(s) 1 and 2, column(s) B are missing from argument(s) 3 and 4, and column(s) F are missing from argument(s) 5 and 6"
         err = @test_throws ArgumentError vcat(dt1, dt1, dt1, dt2, dt2, dt2, dt3, dt3, dt3, dt4, dt4, dt4)
-        @test err.value.msg == "column(s) E and F are missing from argument(s) 1, 2 and 3, and column(s) B are missing from argument(s) 4, 5 and 6, and column(s) F are missing from argument(s) 7, 8 and 9"
+        @test err.value.msg == "column(s) E and F are missing from argument(s) 1, 2 and 3, column(s) B are missing from argument(s) 4, 5 and 6, and column(s) F are missing from argument(s) 7, 8 and 9"
         err = @test_throws ArgumentError vcat(dt1, dt2, dt3, dt4, dt1, dt2, dt3, dt4, dt1, dt2, dt3, dt4)
-        @test err.value.msg == "column(s) E and F are missing from argument(s) 1, 5 and 9, and column(s) B are missing from argument(s) 2, 6 and 10, and column(s) F are missing from argument(s) 3, 7 and 11"
+        @test err.value.msg == "column(s) E and F are missing from argument(s) 1, 5 and 9, column(s) B are missing from argument(s) 2, 6 and 10, and column(s) F are missing from argument(s) 3, 7 and 11"
     end
 end