diff --git a/src/dataframe.jl b/src/dataframe.jl index fd48509907..290bea948e 100644 --- a/src/dataframe.jl +++ b/src/dataframe.jl @@ -1730,10 +1730,6 @@ function isfinite(df::DataFrame) return DataFrame(res_columns, colnames(df)) end -function sortby(df::DataFrame, colname::String) - return df[order(df[colname]), :] -end - # TODO: Use cor_pearson and cov_pearson for DataMatrix to do this function cor_pearson(df::DataFrame) numeric_cols = find(map(t -> t <: Number, coltypes(df))) @@ -1759,6 +1755,95 @@ function flipud!(df::DataFrame) return end + +############################################################################## +## Sorting +############################################################################## + +import Sort.sort, Sort.sortby, Sort.By, + Sort.sort!, Sort.sortby!, + Sort.Algorithm, Sort.Ordering, + Sort.lt, Sort.Perm, Sort.Forward + +typealias ColIndexVec Union(AbstractVector{Integer}, AbstractVector{ASCIIString}, AbstractVector{UTF8String}, AbstractVector{Symbol}) + +const DF_STABLE_SORT = Sort.TimSort() + +# Permute indices according to the ordering of the given dataframe columns +type DFPerm{O<:Ordering,DF<:AbstractDataFrame} <: Ordering + ords::AbstractVector{O} + df::DF +end + +function DFPerm{O<:Ordering,DF<:AbstractDataFrame}(o::AbstractVector{Ordering}, df::DF) + o_cols = length(o) + df_cols = ncols(df) + if o_cols > df_cols + error("DFPerm: number of column orderings is greater than the number of columns") + end + if o_cols < df_cols + o = cat(1, o, fill(Sort.Forward(), df_cols-o_cols)) + end + DFPerm{O,DF}(o, df[cols]) +end + +DFPerm{O<:Ordering,DF<:AbstractDataFrame}(o::O, df::DF) = DFPerm{O,DF}(fill(o,ncol(df)), df) +DFPerm{ DF<:AbstractDataFrame}( df::DF) = DFPerm(Sort.Forward(), df) + +function lt(o::DFPerm, a, b) + for i = 1:ncol(o.df) + if lt(o.ords[i], o.df[a,i], o.df[b,i]) + return true + end + if lt(o.ords[i], o.df[b,i], o.df[a,i]) + return false + end + end + false +end + +# TODO: move [1:nrow(df)] first if/when julia pull #2179 is applied +sortperm(df::AbstractDataFrame, a::Algorithm, o::Union(Perm,DFPerm)) = sort!(a, o, [1:nrow(df)]) +sortperm(df::AbstractDataFrame, a::Algorithm, o::Ordering) = sortperm(df, a, DFPerm(o,df)) +sort (df::AbstractDataFrame, a::Algorithm, o::Ordering) = df[sortperm(df, a, o),:] + +function sort!(df::AbstractDataFrame, a::Algorithm, o::Ordering) + p = sortperm(df, a, o) + pp = similar(p) + for col in df.columns + copy!(pp,p) + permute!!(col, pp) + end + df +end + +for s in {:sort!, :sort, :sortperm} + @eval begin + $s{O<:Ordering}(df::AbstractDataFrame, ::Type{O}) = $s(df, DF_STABLE_SORT, O()) + $s (df::AbstractDataFrame, o::Ordering) = $s(df, DF_STABLE_SORT, o) + $s (df::AbstractDataFrame ) = $s(df, Sort.Forward()) + end +end + +for (sb,s) in {(:sortby!, :sort!), (:sortby, :sort)} + @eval begin + $sb(df::AbstractDataFrame, by::Function) = $s(df,By(by)) + + $sb{O<:Ordering}(df::AbstractDataFrame, col::ColumnIndex, ::Type{O}) = $s(df,Perm(O(),df[col])) + $sb (df::AbstractDataFrame, col::ColumnIndex, o::Ordering) = $s(df,Perm(o,df[col])) + $sb (df::AbstractDataFrame, col::ColumnIndex) = $sb(df,col,Sort.Forward()) + + $sb{O<:Ordering}(df::AbstractDataFrame, cols::ColIndexVec, ::Type{O}) = $s(df,DFPerm(O(),df[cols])) + $sb (df::AbstractDataFrame, cols::ColIndexVec, o::Ordering) = $s(df,DFPerm(o, df[cols])) + $sb (df::AbstractDataFrame, cols::ColIndexVec) = $sb(df,cols,Sort.Forward()) + + $sb{O<:Ordering}(df::AbstractDataFrame, cols::ColIndexVec, o::AbstractArray{O}) = $s(df,DFPerm(o, df[cols])) + $sb (df::AbstractDataFrame, cols::ColIndexVec, o::AbstractArray{CompositeKind}) = $s(df,DFPerm(Ordering[O() for O in o], df[cols])) + $sb (df::AbstractDataFrame, cols::ColIndexVec, o::AbstractArray) = $sb(df,cols,CompositeKind[ot for ot in o]) + $sb (df::AbstractDataFrame, col_ord::AbstractArray{Tuple}) = ((cols,o) = zip(col_ord...); $sb(df, [cols...], [o...])) + end +end + ############################################################################## ## ## Iteration: EachRow, EachCol