diff --git a/.travis.yml b/.travis.yml index b3d6bd2..fc3caa3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,7 +10,6 @@ os: - linux julia: - - 0.6 - 1.0 - nightly diff --git a/appveyor.yml b/appveyor.yml index c1edd1a..1016f07 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,7 +1,5 @@ environment: matrix: - - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x86/0.6/julia-0.6-latest-win32.exe" - - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x64/0.6/julia-0.6-latest-win64.exe" - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x86/0.7/julia-0.7-latest-win32.exe" - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x64/0.7/julia-0.7-latest-win64.exe" - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x86/julia-latest-win32.exe" diff --git a/src/Feather.jl b/src/Feather.jl index e918f19..43ab123 100644 --- a/src/Feather.jl +++ b/src/Feather.jl @@ -1,26 +1,16 @@ -VERSION < v"0.7.0-beta2.199" && __precompile__() module Feather -using Arrow, Compat, Compat.Mmap +using Arrow, Mmap using FlatBuffers, CategoricalArrays, DataStreams, DataFrames -using Compat.Sys: iswindows - - -if Base.VERSION < v"0.7.0-DEV.2575" - const Dates = Base.Dates - using Missings - using Compat: @warn -else - import Dates -end +import Dates const FEATHER_VERSION = 2 # wesm/feather/cpp/src/common.h const FEATHER_MAGIC_BYTES = Vector{UInt8}(codeunits("FEA1")) const MIN_FILE_LENGTH = 12 -const SHOULD_USE_MMAP = !iswindows() +const SHOULD_USE_MMAP = !Sys.iswindows() include("metadata.jl") # flatbuffer defintions diff --git a/src/sink.jl b/src/sink.jl index 2fc6144..a245930 100644 --- a/src/sink.jl +++ b/src/sink.jl @@ -40,11 +40,31 @@ Base.size(sink::Sink, i::Integer) = size(sink.schema, i) """ - write(filename::AbstractString, df::DataFrame) + write(filename::AbstractString, df::DataFrame; overwrite::Bool=false) Write the dataframe `df` to the feather formatted file `filename`. + +If the file `filename` already exists, an error will be thrown, unless `overwrite=true` in +which case the file will be deleted before writing. """ -function write(filename::AbstractString, df::AbstractDataFrame) +function write(filename::AbstractString, df::AbstractDataFrame; overwrite::Bool=false) + if isfile(filename) + if !overwrite + throw(ArgumentError("File $filename already exists. Pass `overwrite=true` to overwrite.")) + else + if Sys.iswindows() + try + rm(filename) + catch e + @error(string("Unable to delete file $filename. It's possible that it's ", + "already open and being used by this or another process.")) + rethrow(e) + end + else + rm(filename) + end + end + end sink = Feather.Sink(filename, df) Data.stream!(df, sink) Data.close!(sink) @@ -58,7 +78,7 @@ end # NOTE: the below is very inefficient, but we are forced to do it by the Feather format function Data.streamto!(sink::Sink, ::Type{Data.Column}, val::AbstractVector{Union{T,Missing}}, row, col) where T - hasmissing = Compat.findfirst(ismissing, val) + hasmissing = findfirst(ismissing, val) sink.columns[col] = arrowformat(hasmissing == nothing ? convert(AbstractVector{T}, val) : val) end