Skip to content

Commit

Permalink
end of day commit
Browse files Browse the repository at this point in the history
missed a spot

little refactor

pycall and conda to extras

little refactoring

squash
  • Loading branch information
sa- committed Apr 18, 2021
1 parent 005c946 commit 130de79
Show file tree
Hide file tree
Showing 7 changed files with 179 additions and 69 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ Manifest.toml
*.jl.mem

test/_scrap.jl
.DS_STORE
.vscode
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
StructTypes = "856f2bd8-1eba-4b0a-8007-ebc267875bd4"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d"

[targets]
test = ["Test", "Random", "JSON3", "StructTypes", "CategoricalArrays"]
test = ["Test", "Random", "JSON3", "StructTypes", "CategoricalArrays", "PyCall", "Conda"]
2 changes: 1 addition & 1 deletion src/Arrow.jl
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ include("arraytypes/arraytypes.jl")
include("eltypes.jl")
include("table.jl")
include("write.jl")
include("cinterface.jl")
include("CDataInterface/CDataInterface.jl")

const LZ4_FRAME_COMPRESSOR = LZ4FrameCompressor[]
const ZSTD_COMPRESSOR = ZstdCompressor[]
Expand Down
37 changes: 37 additions & 0 deletions src/CDataInterface/CDataInterface.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
module CDataInterface

include("c_definitions.jl")
include("jl_definitions.jl")
include("format_string_to_types.jl")

export ArrowSchema, ArrowArray, getschema, getarray

function get_schema(f)
schema_ref = Ref{CArrowSchema}()
ptr = Base.unsafe_convert(Ptr{CArrowSchema}, schema_ref)
f(ptr)
sch = ArrowSchema(schema_ref)
finalizer(sch) do x
r = getfield(x.c_arrow_schema[], :release)
if r != C_NULL
ccall(r, Cvoid, (Ptr{CArrowSchema},), x.carrowschema)
end
end
return sch
end

function get_array(f)
arr_ref = Ref{CArrowArray}()
ptr = Base.unsafe_convert(Ptr{CArrowArray}, arr_ref)
f(ptr)
arr = ArrowArray(arr_ref)
finalizer(arr) do x
r = getfield(x.c_arrow_array[], :release)
if r != C_NULL
ccall(r, Cvoid, (Ptr{CArrowArray},), x.c_arrow_array)
end
end
return arr
end

end # module
69 changes: 3 additions & 66 deletions src/cinterface.jl → src/CDataInterface/c_definitions.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
module CData

export ArrowSchema, ArrowArray, getschema, getarray

const ARROW_FLAG_DICTIONARY_ORDERED = 1
const ARROW_FLAG_NULLABLE = 2
const ARROW_FLAG_MAP_KEYS_SORTED = 4
Expand All @@ -22,7 +18,7 @@ CArrowSchema() = CArrowSchema(C_NULL, C_NULL, C_NULL, 0, 0, C_NULL, C_NULL, _CNU

Base.propertynames(::CArrowSchema) = (:format, :name, :metadata, :flags, :n_children, :children, :dictionary)

function readmetadata(ptr::Ptr{UInt8})
function read_c_arrow_schema_metadata(ptr::Ptr{UInt8})
pos = 1
meta = Dict{String, String}()
if ptr != C_NULL
Expand All @@ -49,7 +45,7 @@ function Base.getproperty(x::CArrowSchema, nm::Symbol)
elseif nm === :name
return unsafe_string(getfield(x, :name))
elseif nm === :metadata
return readmetadata(getfield(x, :metadata))
return read_c_arrow_schema_metadata(getfield(x, :metadata))
elseif nm === :flags
return getfield(x, :flags)
elseif nm === :n_children
Expand All @@ -64,34 +60,6 @@ function Base.getproperty(x::CArrowSchema, nm::Symbol)
error("unknown property requested: $nm")
end

mutable struct ArrowSchema
format::String
name::String
metadata::Dict{String, String}
flags::Int64
n_children::Int64
children::Vector{ArrowSchema}
dictionary::Union{Nothing, ArrowSchema}
carrowschema::Ref{CArrowSchema}
end

ArrowSchema(s::Ref{CArrowSchema}) = ArrowSchema(s[].format, s[].name, s[].metadata, s[].flags, s[].n_children, map(ArrowSchema, s[].children), s[].dictionary === nothing ? nothing : ArrowSchema(s[].dictionary), s)
ArrowSchema(s::CArrowSchema) = ArrowSchema(s.format, s.name, s.metadata, s.flags, s.n_children, map(ArrowSchema, s.children), s.dictionary === nothing ? nothing : ArrowSchema(s.dictionary), Ref{CArrowSchema}())

function getschema(f)
schref = Ref{CArrowSchema}()
ptr = Base.unsafe_convert(Ptr{CArrowSchema}, schref)
f(ptr)
sch = ArrowSchema(schref)
finalizer(sch) do x
r = getfield(x.carrowschema[], :release)
if r != C_NULL
ccall(r, Cvoid, (Ptr{CArrowSchema},), x.carrowschema)
end
end
return sch
end

struct CArrowArray
length::Int64
null_count::Int64
Expand Down Expand Up @@ -131,35 +99,4 @@ function Base.getproperty(x::CArrowArray, nm::Symbol)
return d == C_NULL ? nothing : unsafe_load(d)
end
error("unknown property requested: $nm")
end

mutable struct ArrowArray
length::Int64
null_count::Int64
offset::Int64
n_buffers::Int64
n_children::Int64
buffers::Vector{Ptr{UInt8}}
children::Vector{ArrowArray}
dictionary::Union{Nothing, ArrowArray}
carrowarray::Ref{CArrowArray}
end

ArrowArray(a::Ref{CArrowArray}) = ArrowArray(a[].length, a[].null_count, a[].offset, a[].n_buffers, a[].n_children, a[].buffers, map(ArrowArray, a[].children), a[].dictionary === nothing ? nothing : ArrowArray(a[].dictionary), a)
ArrowArray(a::CArrowArray) = ArrowArray(a.length, a.null_count, a.offset, a.n_buffers, a.n_children, a.buffers, map(ArrowArray, a.children), a.dictionary === nothing ? nothing : ArrowArray(a.dictionary), Ref{CArrowArray}())

function getarray(f)
arrref = Ref{CArrowArray}()
ptr = Base.unsafe_convert(Ptr{CArrowArray}, arrref)
f(ptr)
arr = ArrowArray(arrref)
finalizer(arr) do x
r = getfield(x.carrowarray[], :release)
if r != C_NULL
ccall(r, Cvoid, (Ptr{CArrowArray},), x.carrowarray)
end
end
return arr
end

end # module
end
68 changes: 68 additions & 0 deletions src/CDataInterface/format_string_to_types.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# https://arrow.apache.org/docs/format/CDataInterface.html#data-type-description-format-strings
module FormatStrings

function get_type_from_format_string(format_string ::String) ::DataType
if format_string == "n"
Nothing
elseif format_string == "b"
Bool
elseif format_string == "c"
Int8
elseif format_string == "C"
UInt8
elseif format_string == "s"
Int16
elseif format_string == "S"
UInt16
elseif format_string == "i"
Int32
elseif format_string == "I"
UInt32
elseif format_string == "l"
Int64
elseif format_string == "L"
UInt64
elseif format_string == "e"
Float16
elseif format_string == "f"
Float32
elseif format_string == "g"
Float64
elseif format_string == "z" || format_string == "Z"
Vector{UInt8}
elseif format_string == "u" || format_string == "U"
String
elseif format_string[1] == 'd'
const splits = split(format_string[3:end], ",")
precision = Int(splits[1])
scale = Int(splits[2])
if length(splits) == 3
bandwidth = splits[3]
end
#TODO return something here
elseif format_string[1] == 'w'
#TODO figure out fixed width binary
elseif format_string[1] == '+'
if format_string[2] == 'l' || format_string[2] == 'L'
Arrow.List
elseif format_string[2] == 'w'
size = Int(format_string[4:end]) #TODO use this somehow
Arrow.FixedSizeList
elseif format_string[2] == 's'
Arrow.Struct
elseif format_string[2] == 'm'
Arrow.Map
elseif format_string[2:3] == "ud"
type_strings = split(format_string[5:end], ",") # todo use this somehow
Arrow.DenseUnion
elseif format_string[2:3] == "us"
type_strings = split(format_string[5:end], ",") # todo use this somehow
Arrow.DenseUnion
end
elseif format_string[1] == 't'
if format_string[2:3]
Date
end
end

end # module
66 changes: 66 additions & 0 deletions src/CDataInterface/jl_definitions.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
mutable struct ArrowSchema
format::String
name::String
metadata::Dict{String, String}
flags::Int64
n_children::Int64
children::Vector{ArrowSchema}
dictionary::Union{Nothing, ArrowSchema}
c_arrow_schema::Ref{CArrowSchema}
end

ArrowSchema(s::Ref{CArrowSchema}) = ArrowSchema(
s[].format,
s[].name,
s[].metadata,
s[].flags,
s[].n_children,
map(ArrowSchema, s[].children),
s[].dictionary === nothing ? nothing : ArrowSchema(s[].dictionary),
s
)

ArrowSchema(s::CArrowSchema) = ArrowSchema(
s.format,
s.name,
s.metadata,
s.flags,
s.n_children,
map(ArrowSchema, s.children), s.dictionary === nothing ? nothing : ArrowSchema(s.dictionary),
Ref{CArrowSchema}()
)

mutable struct ArrowArray
length::Int64
null_count::Int64
offset::Int64
n_buffers::Int64
n_children::Int64
buffers::Vector{Ptr{UInt8}}
children::Vector{ArrowArray}
dictionary::Union{Nothing, ArrowArray}
c_arrow_array::Ref{CArrowArray}
end

ArrowArray(a::Ref{CArrowArray}) = ArrowArray(
a[].length,
a[].null_count,
a[].offset,
a[].n_buffers,
a[].n_children,
a[].buffers,
map(ArrowArray, a[].children), a[].dictionary === nothing ? nothing : ArrowArray(a[].dictionary),
a
)

ArrowArray(a::CArrowArray) = ArrowArray(
a.length,
a.null_count,
a.offset,
a.n_buffers,
a.n_children,
a.buffers,
map(ArrowArray, a.children),
a.dictionary === nothing ? nothing : ArrowArray(a.dictionary),
Ref{CArrowArray}()
)

0 comments on commit 130de79

Please sign in to comment.