Skip to content

Commit

Permalink
Merge pull request #667 from chriscoey/alphafailures
Browse files Browse the repository at this point in the history
fix some slow oracles and failing instances
  • Loading branch information
chriscoey authored Apr 1, 2021
2 parents 49e0cdb + 355a749 commit 1203a52
Show file tree
Hide file tree
Showing 14 changed files with 324 additions and 429 deletions.
17 changes: 7 additions & 10 deletions examples/matrixcompletion/JuMP.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,17 @@ function build(inst::MatrixCompletionJuMP{T}) where {T <: Float64}
@assert size_ratio >= 1
num_cols = size_ratio * num_rows

(rows, cols, Avals) = findnz(sprand(num_rows, num_cols, 0.1))
mat_to_vec_idx(i::Int, j::Int) = (j - 1) * num_rows + i
is_unknown = fill(true, num_rows * num_cols)
for (i, j) in zip(rows, cols)
is_unknown[mat_to_vec_idx(i, j)] = false
end
(rows, cols, Avals) = findnz(sprandn(num_rows, num_cols, 0.8))
is_known = vec(Matrix(sparse(rows, cols, trues(length(Avals)), num_rows, num_cols)))

model = JuMP.Model()
JuMP.@variable(model, X[1:num_rows, 1:num_cols])
JuMP.@variable(model, X[1:length(is_known)])
JuMP.@variable(model, t)
JuMP.@objective(model, Min, t)
JuMP.@constraint(model, vcat(t, vec(X)) in MOI.NormSpectralCone(num_rows, num_cols))
JuMP.@constraint(model, vcat(1, X[is_unknown]) in MOI.GeometricMeanCone(1 + sum(is_unknown)))
JuMP.@constraint(model, X[.!is_unknown] .== Avals)
JuMP.@constraint(model, vcat(t, X) in MOI.NormSpectralCone(num_rows, num_cols))
X_unknown = X[.!is_known]
JuMP.@constraint(model, vcat(1, X_unknown) in MOI.GeometricMeanCone(1 + length(X_unknown)))
JuMP.@constraint(model, X[is_known] .== Avals)

return model
end
8 changes: 3 additions & 5 deletions examples/matrixcompletion/native.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,16 @@ function build(inst::MatrixCompletionNative{T}) where {T <: Real}
mn = m * n
rt2 = sqrt(T(2))

num_known = round(Int, mn * 0.1)
num_known = round(Int, mn * 0.8)
known_rows = rand(1:m, num_known)
known_cols = rand(1:n, num_known)
known_vals = rand(T, num_known) .- T(0.5)

mat_to_vec_idx(i::Int, j::Int) = (j - 1) * m + i
known_vals = 2 * rand(T, num_known) .- 1

is_known = fill(false, mn)
# h for the rows that X (the matrix and not epigraph variable) participates in
h_norm_x = zeros(T, m * n)
for (k, (i, j)) in enumerate(zip(known_rows, known_cols))
known_idx = mat_to_vec_idx(i, j)
known_idx = (j - 1) * m + i
# if not using the epinorminf cone, indices relate to X'
h_norm_x[known_idx] = known_vals[k]
is_known[known_idx] = true
Expand Down
210 changes: 54 additions & 156 deletions src/Cones/Cones.jl
Original file line number Diff line number Diff line change
Expand Up @@ -346,8 +346,14 @@ vec_copy_to!(v1::AbstractVecOrMat{Complex{T}}, v2::AbstractVecOrMat{T}) where {T
# utilities for hessians for cones with PSD parts

# TODO parallelize
function symm_kron(H::AbstractMatrix{T}, mat::AbstractMatrix{T}, rt2::T; upper_only::Bool = true) where {T <: Real}
function symm_kron(
H::AbstractMatrix{T},
mat::AbstractMatrix{T},
rt2::T;
upper_only::Bool = true,
) where {T <: Real}
side = size(mat, 1)

col_idx = 1
@inbounds for l in 1:side
for k in 1:(l - 1)
Expand All @@ -364,6 +370,7 @@ function symm_kron(H::AbstractMatrix{T}, mat::AbstractMatrix{T}, rt2::T; upper_o
end
col_idx += 1
end

row_idx = 1
for j in 1:side
upper_only && row_idx > col_idx && continue
Expand All @@ -376,12 +383,18 @@ function symm_kron(H::AbstractMatrix{T}, mat::AbstractMatrix{T}, rt2::T; upper_o
end
col_idx += 1
end

return H
end

# TODO test output for non-Hermitian mat, the result may need transposing
function symm_kron(H::AbstractMatrix{T}, mat::AbstractMatrix{Complex{T}}, rt2::T) where {T <: Real}
function symm_kron(
H::AbstractMatrix{T},
mat::AbstractMatrix{Complex{T}},
rt2::T,
) where {T <: Real}
side = size(mat, 1)

col_idx = 1
for i in 1:side, j in 1:i
row_idx = 1
Expand Down Expand Up @@ -428,15 +441,28 @@ function symm_kron(H::AbstractMatrix{T}, mat::AbstractMatrix{Complex{T}}, rt2::T
col_idx += 2
end
end

return H
end

function hess_element(H::Matrix{T}, r_idx::Int, c_idx::Int, term1::T, term2::T) where {T <: Real}
function hess_element(
H::Matrix{T},
r_idx::Int,
c_idx::Int,
term1::T,
term2::T,
) where {T <: Real}
@inbounds H[r_idx, c_idx] = term1 + term2
return
end

function hess_element(H::Matrix{T}, r_idx::Int, c_idx::Int, term1::Complex{T}, term2::Complex{T}) where {T <: Real}
function hess_element(
H::Matrix{T},
r_idx::Int,
c_idx::Int,
term1::Complex{T},
term2::Complex{T},
) where {T <: Real}
@inbounds begin
H[r_idx, c_idx] = real(term1) + real(term2)
H[r_idx + 1, c_idx] = imag(term2) - imag(term1)
Expand All @@ -446,165 +472,28 @@ function hess_element(H::Matrix{T}, r_idx::Int, c_idx::Int, term1::Complex{T}, t
return
end

# set up sparse arrow matrix data structure
# TODO remove this in favor of new hess_nz_count etc functions that directly use uu, uw, ww etc
function sparse_arrow(T::Type{<:Real}, w_dim::Int)
dim = w_dim + 1
nnz_tri = 2 * dim - 1
I = Vector{Int}(undef, nnz_tri)
J = Vector{Int}(undef, nnz_tri)
idxs1 = 1:dim
@views I[idxs1] .= 1
@views J[idxs1] .= idxs1
idxs2 = (dim + 1):(2 * dim - 1)
@views I[idxs2] .= 2:dim
@views J[idxs2] .= 2:dim
V = ones(T, nnz_tri)
return sparse(I, J, V, dim, dim)
end

# 2x2 block case
function sparse_arrow_block2(T::Type{<:Real}, w_dim::Int)
dim = 2 * w_dim + 1
nnz_tri = 2 * dim - 1 + w_dim
I = Vector{Int}(undef, nnz_tri)
J = Vector{Int}(undef, nnz_tri)
idxs1 = 1:dim
@views I[idxs1] .= 1
@views J[idxs1] .= idxs1
idxs2 = (dim + 1):(2 * dim - 1)
@views I[idxs2] .= 2:dim
@views J[idxs2] .= 2:dim
idxs3 = (2 * dim):nnz_tri
@views I[idxs3] .= 2:2:dim
@views J[idxs3] .= 3:2:dim
V = ones(T, nnz_tri)
return sparse(I, J, V, dim, dim)
end

# lmul with arrow matrix
function arrow_prod(prod::AbstractVecOrMat{T}, arr::AbstractVecOrMat{T}, uu::T, uw::Vector{T}, ww::Vector{T}) where {T <: Real}
@inbounds @views begin
arru = arr[1, :]
arrw = arr[2:end, :]
produ = prod[1, :]
prodw = prod[2:end, :]
copyto!(produ, arru)
mul!(produ, arrw', uw, true, uu)
mul!(prodw, uw, arru')
@. prodw += ww * arrw
end
return prod
end

# 2x2 block case
function arrow_prod(prod::AbstractVecOrMat{T}, arr::AbstractVecOrMat{T}, uu::T, uv::Vector{T}, uw::Vector{T}, vv::Vector{T}, vw::Vector{T}, ww::Vector{T}) where {T <: Real}
@inbounds @views begin
arru = arr[1, :]
arrv = arr[2:2:end, :]
arrw = arr[3:2:end, :]
produ = prod[1, :]
prodv = prod[2:2:end, :]
prodw = prod[3:2:end, :]
@. produ = uu * arru
mul!(produ, arrv', uv, true, true)
mul!(produ, arrw', uw, true, true)
mul!(prodv, uv, arru')
mul!(prodw, uw, arru')
@. prodv += vv * arrv + vw * arrw
@. prodw += ww * arrw + vw * arrv
end
return prod
end

# factorize arrow matrix
function arrow_sqrt(uu::T, uw::Vector{T}, ww::Vector{T}, rtuw::Vector{T}, rtww::Vector{T}) where {T <: Real}
tol = sqrt(eps(T))
any(<(tol), ww) && return zero(T)
@. rtww = sqrt(ww)
@. rtuw = uw / rtww
diff = uu - sum(abs2, rtuw)
(diff < tol) && return zero(T)
return sqrt(diff)
end

# 2x2 block case
function arrow_sqrt(uu::T, uv::Vector{T}, uw::Vector{T}, vv::Vector{T}, vw::Vector{T}, ww::Vector{T}, rtuv::Vector{T}, rtuw::Vector{T}, rtvv::Vector{T}, rtvw::Vector{T}, rtww::Vector{T}) where {T <: Real}
tol = sqrt(eps(T))
any(<(tol), ww) && return zero(T)
@. rtww = sqrt(ww)
@. rtvw = vw / rtww
@. rtuw = uw / rtww
@. rtuv = vv - abs2(rtvw)
any(<(tol), rtuv) && return zero(T)
@. rtvv = sqrt(rtuv)
@. rtuv = (uv - rtvw * rtuw) / rtvv
diff = uu - sum(abs2, rtuv) - sum(abs2, rtuw)
(diff < tol) && return zero(T)
return sqrt(diff)
end

# lmul with lower Cholesky factor of arrow matrix
function arrow_sqrt_prod(prod::AbstractVecOrMat{T}, arr::AbstractVecOrMat{T}, rtuu::T, rtuw::Vector{T}, rtww::Vector{T}) where {T <: Real}
@inbounds @views begin
arr1 = arr[1, :]
@. prod[1, :] = rtuu * arr1
@. prod[2:end, :] = rtuw * arr1' + rtww * arr[2:end, :]
end
return prod
end

# 2x2 block case
function arrow_sqrt_prod(prod::AbstractVecOrMat{T}, arr::AbstractVecOrMat{T}, rtuu::T, rtuv::Vector{T}, rtuw::Vector{T}, rtvv::Vector{T}, rtvw::Vector{T}, rtww::Vector{T}) where {T <: Real}
@inbounds @views begin
arr1 = arr[1, :]
arrv = arr[2:2:end, :]
@. prod[1, :] = rtuu * arr1
@. prod[2:2:end, :] = rtuv * arr1' + rtvv * arrv
@. prod[3:2:end, :] = rtuw * arr1' + rtvw * arrv + rtww * arr[3:2:end, :]
end
return prod
end

# ldiv with upper Cholesky factor of arrow matrix
function inv_arrow_sqrt_prod(prod::AbstractVecOrMat{T}, arr::AbstractVecOrMat{T}, rtuu::T, rtuw::Vector{T}, rtww::Vector{T}) where {T <: Real}
@inbounds @. @views prod[2:end, :] = arr[2:end, :] / rtww
@inbounds @views for j in 1:size(arr, 2)
prod[1, j] = (arr[1, j] - dot(prod[2:end, j], rtuw)) / rtuu
end
return prod
end

# 2x2 block case
function inv_arrow_sqrt_prod(prod::AbstractVecOrMat{T}, arr::AbstractVecOrMat{T}, rtuu::T, rtuv::Vector{T}, rtuw::Vector{T}, rtvv::Vector{T}, rtvw::Vector{T}, rtww::Vector{T}) where {T <: Real}
@inbounds @views begin
prodw = prod[3:2:end, :]
@. prodw = arr[3:2:end, :] / rtww
@. prod[2:2:end, :] = (arr[2:2:end, :] - rtvw * prodw) / rtvv
end
@inbounds @views for j in 1:size(arr, 2)
prod[1, j] = (arr[1, j] - dot(prod[2:2:end, j], rtuv) - dot(prod[3:2:end, j], rtuw)) / rtuu
end
return prod
end

function grad_logm!(
mat::Matrix{T},
vecs::Matrix{T},
tempmat1::Matrix{T},
tempmat2::Matrix{T},
tempvec::Vector{T},
diff_mat::AbstractMatrix{T},
rt2::T,
) where T
mat::Matrix{T},
vecs::Matrix{T},
tempmat1::Matrix{T},
tempmat2::Matrix{T},
tempvec::Vector{T},
diff_mat::AbstractMatrix{T},
rt2::T,
) where {T <: Real}
veckron = symm_kron(tempmat1, vecs, rt2, upper_only = false)
smat_to_svec!(tempvec, diff_mat, one(T))
mul!(tempmat2, veckron, Diagonal(tempvec))
return mul!(mat, tempmat2, veckron')
end

function diff_mat!(mat::Matrix{T}, vals::Vector{T}, log_vals::Vector{T}) where T
function diff_mat!(
mat::Matrix{T},
vals::Vector{T},
log_vals::Vector{T},
) where {T <: Real}
rteps = sqrt(eps(T))

@inbounds for j in eachindex(vals)
(vj, lvj) = (vals[j], log_vals[j])
for i in 1:(j - 1)
Expand All @@ -613,14 +502,21 @@ function diff_mat!(mat::Matrix{T}, vals::Vector{T}, log_vals::Vector{T}) where T
end
mat[j, j] = inv(vj)
end

return mat
end

function diff_tensor!(diff_tensor::Array{T, 3}, diff_mat::AbstractMatrix{T}, vals::Vector{T}) where T
function diff_tensor!(
diff_tensor::Array{T, 3},
diff_mat::AbstractMatrix{T},
vals::Vector{T},
) where {T <: Real}
rteps = sqrt(eps(T))
d = size(diff_mat, 1)

@inbounds for k in 1:d, j in 1:k, i in 1:j
(vi, vj, vk) = (vals[i], vals[j], vals[k])

if abs(vj - vk) < rteps
if abs(vi - vj) < rteps
vijk = (vi + vj + vk) / 3
Expand All @@ -632,9 +528,11 @@ function diff_tensor!(diff_tensor::Array{T, 3}, diff_mat::AbstractMatrix{T}, val
else
t = (diff_mat[i, j] - diff_mat[i, k]) / (vj - vk)
end

diff_tensor[i, j, k] = diff_tensor[i, k, j] = diff_tensor[j, i, k] =
diff_tensor[j, k, i] = diff_tensor[k, i, j] = diff_tensor[k, j, i] = t
end

return diff_tensor
end

Expand Down
Loading

0 comments on commit 1203a52

Please sign in to comment.