Skip to content

Commit

Permalink
Buffer + Improvements (#12)
Browse files Browse the repository at this point in the history
* buffer

* add elem in buffer

* fix buffer types in meth dec

* do not use gc

* wip, minor changes & timers

* optimize mem alloc & op in dynsparse matrix constructor

* error

* error

* improve

* various improvment

* fix

* allow creation of empty matrix

* copy vectors in matrix constructor

* fix

* tests

* rm apply_combine
  • Loading branch information
guimarqu authored Dec 28, 2020
1 parent 4c40bc1 commit 47d0d2f
Show file tree
Hide file tree
Showing 11 changed files with 257 additions and 57 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "DynamicSparseArrays"
uuid = "8086fd22-9a0c-46a5-a6c8-6e24676501fe"
authors = ["Guillaume Marques <guillaume.marques@protonmail.com>"]
version = "0.2.4"
version = "0.3.0"

[compat]
julia = "1"
Expand Down
6 changes: 5 additions & 1 deletion src/DynamicSparseArrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ export PackedMemoryArray,
nbpartitions,
deletepartition!,
deletecolumn!,
deleterow!
deleterow!,
addrow!,
closefillmode!

const Elements{K,T} = Vector{Union{Nothing,Tuple{K,T}}}

Expand All @@ -22,6 +24,8 @@ include("pcsr.jl")

include("views.jl")

include("buffer.jl")

include("matrix.jl")

end# module
55 changes: 55 additions & 0 deletions src/buffer.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
mutable struct Buffer{L,K,T}
rowmajor_coo::Dict{K, Tuple{Vector{L}, Vector{T}}}
length::Int
end

function buffer(::Type{K}, ::Type{L}, ::Type{T}) where {K,L,T}
return Buffer{L,K,T}(Dict{K, Tuple{Vector{L}, Vector{T}}}(), 0)
end

function addrow!(
buffer::Buffer{L,K,T}, rowid::K, colids::Vector{L}, vals::Vector{T}
) where {K,L,T}
haskey(buffer.rowmajor_coo, rowid) && error("Row with id $rowid already written in dynamic sparse matrix buffer.")
p = sortperm(colids)
buffer.rowmajor_coo[rowid] = (colids[p], vals[p])
buffer.length += length(vals)
return
end

function addelem!(
buffer::Buffer{L,K,T}, rowid::K, colid::L, val::T
) where {K,L,T}
if !haskey(buffer.rowmajor_coo, rowid)
buffer.rowmajor_coo[rowid] = (Vector{K}(), Vector{T}())
end
r = buffer.rowmajor_coo[rowid]
push!(r[1], colid)
push!(r[2], val)
buffer.length += 1
return
end

function get_rowids_colids_vals(buffer::Buffer{L,K,T}) where {K,L,T}
rowids = Vector{K}(undef, buffer.length)
colids = Vector{L}(undef, buffer.length)
vals = Vector{T}(undef, buffer.length)

curpos = 1
for (rowid, brow) in buffer.rowmajor_coo
bcolids = brow[1]
bvals = brow[2]
for i in 1:length(bvals)
rowids[curpos] = rowid
colids[curpos] = bcolids[i]
vals[curpos] = bvals[i]
curpos += 1
end
end
return rowids, colids, vals
end

function Base.getindex(buffer::Buffer{L,K,T}, row::K, ::Colon) where {L,K,T}
elems = buffer.rowmajor_coo[row]
return PackedMemoryArray(elems[1], elems[2])
end
60 changes: 49 additions & 11 deletions src/matrix.jl
Original file line number Diff line number Diff line change
@@ -1,37 +1,51 @@
struct DynamicSparseMatrix{K,L,T}
colmajor::MappedPackedCSC{K,L,T}
rowmajor::MappedPackedCSC{L,K,T}
mutable struct DynamicSparseMatrix{K,L,T}
fillmode::Bool
buffer::Union{Buffer{L,K,T}, Nothing}
colmajor::Union{MappedPackedCSC{K,L,T}, Nothing}
rowmajor::Union{MappedPackedCSC{L,K,T}, Nothing}
end

function dynamicsparse(I::Vector{K}, J::Vector{L}, V::Vector{T}) where {K,L,T}
return DynamicSparseMatrix(
dynamicsparsecolmajor(I,J,V), dynamicsparsecolmajor(J,I,V)
false, nothing, dynamicsparsecolmajor(I,J,V), dynamicsparsecolmajor(J,I,V)
)
end

# TODO remove
function dynamicsparse(::Type{K}, ::Type{L}, ::Type{T}) where {K,L,T}
return DynamicSparseMatrix(
dynamicsparsecolmajor(K,L,T), dynamicsparsecolmajor(L,K,T)
)
function dynamicsparse(::Type{K}, ::Type{L}, ::Type{T}; fill_mode = true) where {K,L,T}
return if fill_mode
DynamicSparseMatrix(
true, buffer(K,L,T), nothing, nothing
)
else
DynamicSparseMatrix(
false, nothing, dynamicsparsecolmajor(K,L,T), dynamicsparsecolmajor(K,L,T)
)
end
end

function Base.setindex!(m::DynamicSparseMatrix{K,L,T}, val, row::K, col::L) where {K,L,T}
m.colmajor[row, col] = val
m.rowmajor[col, row] = val
if m.fillmode
addelem!(m.buffer, row, col, val)
else
m.colmajor[row, col] = val
m.rowmajor[col, row] = val
end
return m
end

function Base.getindex(m::DynamicSparseMatrix, row, col)
m.fillmode && return m.buffer[row, col]
# TODO : check number of rows & cols
return m.colmajor[row, col]
end

function Base.view(m::DynamicSparseMatrix{K,L,T}, row::K, ::Colon) where {K,L,T}
m.fillmode && error("View of a row not available in fill mode (Open an issue at https://github.com/atoptima/DynamicSparseArrays.jl if you need it).")
return view(m.rowmajor, :, row)
end

function Base.view(m::DynamicSparseMatrix{K,L,T}, ::Colon, col::L) where {K,L,T}
m.fillmode && error("View of a column not available in fill mode.")
return view(m.colmajor, :, col)
end

Expand All @@ -40,6 +54,7 @@ Base.length(m::DynamicSparseMatrix) = length(m.rowmajor)
Base.size(m::DynamicSparseMatrix) = (nbpartitions(m.rowmajor), nbpartitions(m.colmajor))

function deletecolumn!(matrix::DynamicSparseMatrix{K,L,T}, col::L) where {K,L,T}
matrix.fillmode && error("Cannot delete a column in fill mode")
for (row, val) in @view matrix[:, col]
matrix.rowmajor[col, row] = zero(T)
end
Expand All @@ -48,9 +63,32 @@ function deletecolumn!(matrix::DynamicSparseMatrix{K,L,T}, col::L) where {K,L,T}
end

function deleterow!(matrix::DynamicSparseMatrix{K,L,T}, row::K) where {K,L,T}
matrix.fillmode && error("Cannot delete a row in fill mode")
for (col, val) in @view matrix[row, :]
matrix.colmajor[row, col] = zero(T)
end
deletecolumn!(matrix.rowmajor, row)
return true
end

function addrow!(
matrix::DynamicSparseMatrix{K,L,T}, row::L, colids::Vector{K}, vals::Vector{T}
) where {K,L,T}
if matrix.fillmode
addrow!(matrix.buffer, row, colids, vals)
else
for j in 1:length(colids)
setindex!(matrix, row, colids[j], vals[j])
end
end
return true
end

function closefillmode!(matrix::DynamicSparseMatrix{K,L,T}) where {K,L,T}
I, J, V = get_rowids_colids_vals(matrix.buffer)
matrix.fillmode = false
matrix.buffer = nothing
matrix.colmajor = dynamicsparsecolmajor(I,J,V)
matrix.rowmajor = dynamicsparsecolmajor(J,I,V)
return true
end
100 changes: 59 additions & 41 deletions src/pcsr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,30 +22,38 @@ semaphore_key(::Type{K}) where {K<:Integer} = zero(K)

function PackedCSC(
row_keys::Vector{Vector{L}}, values::Vector{Vector{T}},
combine::Function = +
combine::Function = +;
) where {L,T <: Real}
nb_semaphores = length(row_keys)
nb_values = sum(length(values[i]) for i in 1:nb_semaphores)
@assert nb_semaphores == length(values)
applicable(semaphore_key, L) || error("method `semaphore_key` not implemented for type $(L).")
pcsc_keys = Vector{L}()
pcsc_values = Vector{T}()
pcsc_keys = Vector{L}(undef, nb_values + nb_semaphores)
pcsc_values = Vector{T}(undef, nb_values + nb_semaphores)
i = 1
for semaphore_id in 1:nb_semaphores
# Insert the semaphore
push!(pcsc_keys, semaphore_key(L))
push!(pcsc_values, T(semaphore_id)) # This is why T <: Real
@inbounds pcsc_keys[i] = semaphore_key(L)
@inbounds pcsc_values[i] = T(semaphore_id) # This is why T <: Real
i += 1
# Create the column
nkeys = Vector(row_keys[semaphore_id])
nvalues = Vector(values[semaphore_id])
@inbounds nkeys = Vector(row_keys[semaphore_id])
@inbounds nvalues = Vector(values[semaphore_id])
_prepare_keys_vals!(nkeys, nvalues, combine)
push!(pcsc_keys, nkeys...)
push!(pcsc_values, nvalues...)
for j in 1:length(nkeys)
@inbounds pcsc_keys[i] = nkeys[j]
@inbounds pcsc_values[i] = nvalues[j]
i += 1
end
end
resize!(pcsc_keys, i - 1)
resize!(pcsc_values, i - 1)
pma = PackedMemoryArray(pcsc_keys, pcsc_values, sort = false)
semaphores = Vector{Union{Int, Nothing}}(zeros(Int, nb_semaphores))
semaphores = Vector{Union{Int, Nothing}}(undef, nb_semaphores)
for (pos, pair) in enumerate(pma.array)
if pair != nothing && pair[1] == semaphore_key(L)
if pair !== nothing && pair[1] == semaphore_key(L)
id = Int(pair[2])
semaphores[id] = pos
@inbounds semaphores[id] = pos
end
end
return PackedCSC(nb_semaphores, semaphores, pma)
Expand All @@ -68,10 +76,9 @@ function MappedPackedCSC(
return MappedPackedCSC(col_keys, pcsc)
end

# TODO : remove
function MappedPackedCSC(::Type{K}, ::Type{L}, ::Type{T}) where {K,L,T}
pcsc = PackedCSC(K, T)
col_keys = Vector{Union{Nothing, L}}()
col_keys = Vector{Union{Nothing, L}}(undef, 0)
return MappedPackedCSC(col_keys, pcsc)
end

Expand Down Expand Up @@ -273,7 +280,7 @@ function Base.setindex!(pcsc::PackedCSC{K,T}, value, key::K, partition::Int) whe
_add_partitions!(pcsc, partition)
end
from = pcsc.semaphores[partition]
from == nothing && error("The partition has been deleted.")
from === nothing && error("The partition has been deleted.")
to = _pos_of_partition_end(pcsc, partition)
if value != zero(T)
_insert!(pcsc, value, key, from, to)
Expand Down Expand Up @@ -324,34 +331,43 @@ function Base.setindex!(mpcsc::MappedPackedCSC{L,K,T}, value, row::L, col::K) wh
return setindex!(mpcsc, T(value), row, col)
end


## Dynamic sparse matrix builder (exported)
## Dynamic sparse matrix builder
function _dynamicsparse(
I::Vector{K}, J::Vector{L}, V::Vector{T}, combine, always_use_map
) where {K,L,T}
!always_use_map && error("TODO issue #2.")

p = sortperm(collect(zip(J,I))) # Columns first
permute!(I, p)
permute!(J, p)
permute!(V, p)
p = sortperm(collect(zip(J,I)), alg=QuickSort) # Columns first
@inbounds I = I[p]
@inbounds J = J[p]
@inbounds V = V[p]

nb_cols = 1
nb_rows_in_col = Int[]
push!(nb_rows_in_col, 1)

write_pos = 1
read_pos = 1
prev_i = I[read_pos]
prev_j = J[read_pos]
while read_pos < length(I)
read_pos += 1
cur_i = I[read_pos]
cur_j = J[read_pos]
@inbounds cur_i = I[read_pos]
@inbounds cur_j = J[read_pos]
if prev_i == cur_i && prev_j == cur_j
V[write_pos] = combine(V[write_pos], V[read_pos])
@inbounds V[write_pos] = combine(V[write_pos], V[read_pos])
else
write_pos += 1
if write_pos < read_pos
I[write_pos] = cur_i
J[write_pos] = cur_j
V[write_pos] = V[read_pos]
@inbounds I[write_pos] = cur_i
@inbounds J[write_pos] = cur_j
@inbounds V[write_pos] = V[read_pos]
end
if cur_j != prev_j
nb_cols += 1
push!(nb_rows_in_col, 1)
elseif cur_i != prev_i
nb_rows_in_col[end] += 1
end
prev_i = cur_i
prev_j = cur_j
Expand All @@ -361,21 +377,26 @@ function _dynamicsparse(
resize!(J, write_pos)
resize!(V, write_pos)

col_keys = Vector{L}()
row_keys = Vector{Vector{K}}()
values = Vector{Vector{T}}()
col_keys = Vector{L}(undef, nb_cols)
row_keys = Vector{Vector{K}}(undef, nb_cols)
values = Vector{Vector{T}}(undef, nb_cols)
i = 1
prev_col = J[1]
col_pos = 0
row_pos = 0
while i <= length(I)
cur_col = J[i]
@inbounds cur_col = J[i]
if prev_col != cur_col || i == 1
push!(col_keys, cur_col)
push!(row_keys, Vector{K}())
push!(values, Vector{K}())
col_pos += 1
row_pos = 1
@inbounds col_keys[col_pos] = cur_col
@inbounds row_keys[col_pos] = Vector{K}(undef, nb_rows_in_col[col_pos])
@inbounds values[col_pos] = Vector{T}(undef, nb_rows_in_col[col_pos])
end
push!(row_keys[end], I[i])
push!(values[end], V[i])
@inbounds row_keys[col_pos][row_pos] = I[i]
@inbounds values[col_pos][row_pos] = V[i]
prev_col = cur_col
row_pos += 1
i += 1
end

Expand All @@ -385,7 +406,7 @@ function _dynamicsparse(
# TODO : Check that we use integer keys for columns, otherwise we have to use a map
# Add empty columns in the rows_keys vector
# We can put all those things in a
return PackedCSC(rows_keys, values)
return PackedCSC(rows_keys, values, combine)
end
end

Expand All @@ -401,12 +422,9 @@ function dynamicsparsecolmajor(
throw(ArgumentError("vectors cannot be empty."))
applicable(<, L, L) ||
throw(ArgumentError("set of keys must be totally ordered (define method Base.:< for type $L)."))
return _dynamicsparse(
Vector(I), Vector(J), Vector(V), combine, always_use_map
)
return _dynamicsparse(Vector(I), Vector(J), Vector(V), combine, always_use_map)
end

# TODO remove
function dynamicsparsecolmajor(::Type{K}, ::Type{L}, ::Type{T}) where {K,L,T}
return MappedPackedCSC(K,L,T)
end
Expand Down
2 changes: 1 addition & 1 deletion src/pma.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ function _array(keys::Vector{K}, values::Vector{T}, capacity) where {K,T}
array = Elements{K,T}(nothing, capacity)
nb_elements = length(values)
for i in 1:nb_elements
array[i] = (keys[i], values[i])
@inbounds array[i] = (keys[i], values[i])
end
return array
end
Expand Down
2 changes: 1 addition & 1 deletion src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,4 @@ function _nbcells(array::Elements, from::Int, to::Int)
end
end
return nbcells
end
end
4 changes: 4 additions & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[deps]
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Loading

2 comments on commit 47d0d2f

@guimarqu
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/27023

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.3.0 -m "<description of version>" 47d0d2f6ed0e53e0e98c4bdc626e6091c96dd134
git push origin v0.3.0

Please sign in to comment.