-
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
/
bunchkaufman.jl
1601 lines (1457 loc) · 58.9 KB
/
bunchkaufman.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# This file is a part of Julia. License is MIT: https://julialang.org/license
## Create an extractor that extracts the modified original matrix, e.g.
## LD for BunchKaufman, UL for CholeskyDense, LU for LUDense and
## define size methods for Factorization types using it.
##----------- Type utilities for generic Bunch-Kaufman implementation ------------
# Generic real type. Any real number type should able to approximate
# real numbers, and thus be closed under arithmetic operations.
# Therefore so Int, Complex{Int}, etc. are excluded.
ClosedReal = T where T <: Union{AbstractFloat, Rational}
# Similarly, we also use a closed scalar type
ClosedScalar = Union{T, Complex{T}} where T <: ClosedReal
##--------------------------------------------------------------------------------
"""
BunchKaufman <: Factorization
Matrix factorization type of the Bunch-Kaufman factorization of a symmetric or
Hermitian matrix `A` as `P'UDU'P` or `P'LDL'P`, depending on whether the upper
(the default) or the lower triangle is stored in `A`. If `A` is complex symmetric
then `U'` and `L'` denote the unconjugated transposes, i.e. `transpose(U)` and
`transpose(L)`, respectively. This is the return type of [`bunchkaufman`](@ref),
the corresponding matrix factorization function.
If `S::BunchKaufman` is the factorization object, the components can be obtained
via `S.D`, `S.U` or `S.L` as appropriate given `S.uplo`, and `S.p`.
Iterating the decomposition produces the components `S.D`, `S.U` or `S.L`
as appropriate given `S.uplo`, and `S.p`.
# Examples
```jldoctest
julia> A = Float64.([1 2; 2 3])
2×2 Matrix{Float64}:
1.0 2.0
2.0 3.0
julia> S = bunchkaufman(A) # A gets wrapped internally by Symmetric(A)
BunchKaufman{Float64, Matrix{Float64}, Vector{Int64}}
D factor:
2×2 Tridiagonal{Float64, Vector{Float64}}:
-0.333333 0.0
0.0 3.0
U factor:
2×2 UnitUpperTriangular{Float64, Matrix{Float64}}:
1.0 0.666667
⋅ 1.0
permutation:
2-element Vector{Int64}:
1
2
julia> d, u, p = S; # destructuring via iteration
julia> d == S.D && u == S.U && p == S.p
true
julia> S = bunchkaufman(Symmetric(A, :L))
BunchKaufman{Float64, Matrix{Float64}, Vector{Int64}}
D factor:
2×2 Tridiagonal{Float64, Vector{Float64}}:
3.0 0.0
0.0 -0.333333
L factor:
2×2 UnitLowerTriangular{Float64, Matrix{Float64}}:
1.0 ⋅
0.666667 1.0
permutation:
2-element Vector{Int64}:
2
1
```
"""
struct BunchKaufman{T,S<:AbstractMatrix,P<:AbstractVector{<:Integer}} <: Factorization{T}
LD::S
ipiv::P
uplo::Char
symmetric::Bool
rook::Bool
info::BlasInt
function BunchKaufman{T,S,P}(LD, ipiv, uplo, symmetric, rook, info) where {T,S<:AbstractMatrix,P<:AbstractVector}
require_one_based_indexing(LD)
new{T,S,P}(LD, ipiv, uplo, symmetric, rook, info)
end
end
BunchKaufman(A::AbstractMatrix{T}, ipiv::AbstractVector{<:Integer}, uplo::AbstractChar,
symmetric::Bool, rook::Bool, info::BlasInt) where {T} =
BunchKaufman{T,typeof(A),typeof(ipiv)}(A, ipiv, uplo, symmetric, rook, info)
# backwards-compatible constructors (remove with Julia 2.0)
@deprecate(BunchKaufman{T,S}(LD, ipiv, uplo, symmetric, rook, info) where {T,S},
BunchKaufman{T,S,typeof(ipiv)}(LD, ipiv, uplo, symmetric, rook, info), false)
# iteration for destructuring into components
Base.iterate(S::BunchKaufman) = (S.D, Val(:UL))
Base.iterate(S::BunchKaufman, ::Val{:UL}) = (S.uplo == 'L' ? S.L : S.U, Val(:p))
Base.iterate(S::BunchKaufman, ::Val{:p}) = (S.p, Val(:done))
Base.iterate(S::BunchKaufman, ::Val{:done}) = nothing
copy(S::BunchKaufman) = BunchKaufman(copy(S.LD), copy(S.ipiv), S.uplo, S.symmetric, S.rook, S.info)
"""
bunchkaufman!(A, rook::Bool=false; check = true) -> BunchKaufman
`bunchkaufman!` is the same as [`bunchkaufman`](@ref), but saves space by overwriting the
input `A`, instead of creating a copy.
"""
function bunchkaufman!(A::RealHermSymComplexSym{<:BlasReal,<:StridedMatrix},
rook::Bool = false; check::Bool = true)
LD, ipiv, info = rook ? LAPACK.sytrf_rook!(A.uplo, A.data) : LAPACK.sytrf!(A.uplo, A.data)
check && checknonsingular(info)
BunchKaufman(LD, ipiv, A.uplo, true, rook, info)
end
function bunchkaufman!(A::Hermitian{<:BlasComplex,<:StridedMatrix},
rook::Bool = false; check::Bool = true)
LD, ipiv, info = rook ? LAPACK.hetrf_rook!(A.uplo, A.data) : LAPACK.hetrf!(A.uplo, A.data)
check && checknonsingular(info)
BunchKaufman(LD, ipiv, A.uplo, false, rook, info)
end
function bunchkaufman!(A::StridedMatrix{<:BlasFloat}, rook::Bool = false; check::Bool = true)
if ishermitian(A)
return bunchkaufman!(Hermitian(A), rook; check = check)
elseif issymmetric(A)
return bunchkaufman!(Symmetric(A), rook; check = check)
else
throw(ArgumentError("Bunch-Kaufman decomposition is only valid for symmetric or Hermitian matrices"))
end
end
bkcopy_oftype(A, S) = eigencopy_oftype(A, S)
bkcopy_oftype(A::Symmetric{<:Complex}, S) = Symmetric(copytrito!(similar(parent(A), S, size(A)), A.data, A.uplo), sym_uplo(A.uplo))
"""
bunchkaufman(A, rook::Bool=false; check = true) -> S::BunchKaufman
Compute the Bunch-Kaufman [^Bunch1977] factorization of a symmetric or
Hermitian matrix `A` as `P'*U*D*U'*P` or `P'*L*D*L'*P`, depending on
which triangle is stored in `A`, and return a [`BunchKaufman`](@ref) object.
Note that if `A` is complex symmetric then `U'` and `L'` denote
the unconjugated transposes, i.e. `transpose(U)` and `transpose(L)`.
Iterating the decomposition produces the components `S.D`, `S.U` or `S.L`
as appropriate given `S.uplo`, and `S.p`.
If `rook` is `true`, rook pivoting is used. If `rook` is false,
rook pivoting is not used.
When `check = true`, an error is thrown if the decomposition fails.
When `check = false`, responsibility for checking the decomposition's
validity (via [`issuccess`](@ref)) lies with the user.
The following functions are available for `BunchKaufman` objects:
[`size`](@ref), `\\`, [`inv`](@ref), [`issymmetric`](@ref),
[`ishermitian`](@ref), [`getindex`](@ref).
[^Bunch1977]: J R Bunch and L Kaufman, Some stable methods for calculating inertia and solving symmetric linear systems, Mathematics of Computation 31:137 (1977), 163-179. [url](https://www.ams.org/journals/mcom/1977-31-137/S0025-5718-1977-0428694-0/).
# Examples
```jldoctest
julia> A = Float64.([1 2; 2 3])
2×2 Matrix{Float64}:
1.0 2.0
2.0 3.0
julia> S = bunchkaufman(A) # A gets wrapped internally by Symmetric(A)
BunchKaufman{Float64, Matrix{Float64}, Vector{Int64}}
D factor:
2×2 Tridiagonal{Float64, Vector{Float64}}:
-0.333333 0.0
0.0 3.0
U factor:
2×2 UnitUpperTriangular{Float64, Matrix{Float64}}:
1.0 0.666667
⋅ 1.0
permutation:
2-element Vector{Int64}:
1
2
julia> d, u, p = S; # destructuring via iteration
julia> d == S.D && u == S.U && p == S.p
true
julia> S.U*S.D*S.U' - S.P*A*S.P'
2×2 Matrix{Float64}:
0.0 0.0
0.0 0.0
julia> S = bunchkaufman(Symmetric(A, :L))
BunchKaufman{Float64, Matrix{Float64}, Vector{Int64}}
D factor:
2×2 Tridiagonal{Float64, Vector{Float64}}:
3.0 0.0
0.0 -0.333333
L factor:
2×2 UnitLowerTriangular{Float64, Matrix{Float64}}:
1.0 ⋅
0.666667 1.0
permutation:
2-element Vector{Int64}:
2
1
julia> S.L*S.D*S.L' - A[S.p, S.p]
2×2 Matrix{Float64}:
0.0 0.0
0.0 0.0
```
"""
bunchkaufman(A::AbstractMatrix{T}, rook::Bool=false; check::Bool = true) where {T} =
bunchkaufman!(bkcopy_oftype(A, typeof(sqrt(oneunit(T)))), rook; check = check)
BunchKaufman{T}(B::BunchKaufman) where {T} =
BunchKaufman(convert(Matrix{T}, B.LD), B.ipiv, B.uplo, B.symmetric, B.rook, B.info)
Factorization{T}(B::BunchKaufman) where {T} = BunchKaufman{T}(B)
size(B::BunchKaufman) = size(getfield(B, :LD))
size(B::BunchKaufman, d::Integer) = size(getfield(B, :LD), d)
issymmetric(B::BunchKaufman) = B.symmetric
ishermitian(B::BunchKaufman{T}) where T = T<:Real || !B.symmetric
function _ipiv2perm_bk(v::AbstractVector{T}, maxi::Integer, uplo::AbstractChar, rook::Bool) where T
require_one_based_indexing(v)
p = T[1:maxi;]
uploL = uplo == 'L'
i = uploL ? 1 : maxi
# if uplo == 'U' we construct the permutation backwards
@inbounds while 1 <= i <= length(v)
vi = v[i]
if vi > 0 # the 1x1 blocks
p[i], p[vi] = p[vi], p[i]
i += uploL ? 1 : -1
else # the 2x2 blocks
if rook
p[i], p[-vi] = p[-vi], p[i]
end
if uploL
vp = rook ? -v[i+1] : -vi
p[i + 1], p[vp] = p[vp], p[i + 1]
i += 2
else # 'U'
vp = rook ? -v[i-1] : -vi
p[i - 1], p[vp] = p[vp], p[i - 1]
i -= 2
end
end
end
return p
end
function getproperty(B::BunchKaufman{TS},
d::Symbol) where TS <: ClosedScalar{TR} where TR <: ClosedReal
n = size(B, 1)
if d === :p
return _ipiv2perm_bk(getfield(B, :ipiv), n, getfield(B, :uplo), B.rook)
elseif d === :P
return Matrix{TS}(I, n, n)[:,invperm(B.p)]
elseif d === :L || d === :U || d === :D
if d === :D
_, od, md = generic_syconv(B, false)
elseif typeof(B) <: BunchKaufman{T,<:StridedMatrix} where {T<:BlasFloat}
# We use LAPACK whenever we can
if getfield(B, :rook)
LUD, _ = LAPACK.syconvf_rook!(getfield(B, :uplo), 'C',
copy(getfield(B, :LD)), getfield(B, :ipiv))
else
LUD, _ = LAPACK.syconv!(getfield(B, :uplo), copy(getfield(B, :LD)),
getfield(B, :ipiv))
end
else
LUD, _ = generic_syconv(B)
end
if d === :D
if getfield(B, :uplo) == 'L'
odl = od[1:n - 1]
return Tridiagonal(odl, md, getfield(B, :symmetric) ? odl : conj.(odl))
else # 'U'
odu = od[2:n]
return Tridiagonal(getfield(B, :symmetric) ? odu : conj.(odu), md, odu)
end
elseif d === :L
if getfield(B, :uplo) == 'L'
return UnitLowerTriangular(LUD)
else
throw(ArgumentError("factorization is U*D*U' but you requested L"))
end
else # :U
if B.uplo == 'U'
return UnitUpperTriangular(LUD)
else
throw(ArgumentError("factorization is L*D*L' but you requested U"))
end
end
else
getfield(B, d)
end
end
Base.propertynames(B::BunchKaufman, private::Bool=false) =
(:p, :P, :L, :U, :D, (private ? fieldnames(typeof(B)) : ())...)
function Base.:(==)(B1::BunchKaufman, B2::BunchKaufman)
# check for the equality between properties instead of fields
B1.p == B2.p || return false
if B1.uplo == 'L'
B1.L == B2.L || return false
else
B1.U == B2.U || return false
end
return (B1.D == B2.D)
end
function getproperties!(B::BunchKaufman{T,<:StridedMatrix}) where {T<:BlasFloat}
# NOTE: Unlike in the 'getproperty' function, in this function L/U and D are computed in place.
if B.rook
LUD, od = LAPACK.syconvf_rook!(B.uplo, 'C', B.LD, B.ipiv)
else
LUD, od = LAPACK.syconv!(B.uplo, B.LD, B.ipiv)
end
if B.uplo == 'U'
M = UnitUpperTriangular(LUD)
du = od[2:end]
# Avoid aliasing dl and du.
dl = B.symmetric ? du : conj.(du)
else
M = UnitLowerTriangular(LUD)
dl = od[1:end-1]
# Avoid aliasing dl and du.
du = B.symmetric ? dl : conj.(dl)
end
return (M, Tridiagonal(dl, diag(LUD), du), B.p)
end
issuccess(B::BunchKaufman) = B.info == 0
function adjoint(B::BunchKaufman)
if ishermitian(B)
return B
else
throw(ArgumentError("adjoint not implemented for complex symmetric matrices"))
end
end
function Base.show(io::IO, mime::MIME{Symbol("text/plain")}, B::BunchKaufman)
if issuccess(B)
summary(io, B); println(io)
println(io, "D factor:")
show(io, mime, B.D)
println(io, "\n$(B.uplo) factor:")
show(io, mime, B.uplo == 'L' ? B.L : B.U)
println(io, "\npermutation:")
show(io, mime, B.p)
else
print(io, "Failed factorization of type $(typeof(B))")
end
end
function inv(B::BunchKaufman{<:BlasReal,<:StridedMatrix})
if B.rook
copytri!(LAPACK.sytri_rook!(B.uplo, copy(B.LD), B.ipiv), B.uplo, true)
else
copytri!(LAPACK.sytri!(B.uplo, copy(B.LD), B.ipiv), B.uplo, true)
end
end
function inv(B::BunchKaufman{<:BlasComplex,<:StridedMatrix})
if issymmetric(B)
if B.rook
copytri!(LAPACK.sytri_rook!(B.uplo, copy(B.LD), B.ipiv), B.uplo)
else
copytri!(LAPACK.sytri!(B.uplo, copy(B.LD), B.ipiv), B.uplo)
end
else
if B.rook
copytri!(LAPACK.hetri_rook!(B.uplo, copy(B.LD), B.ipiv), B.uplo, true)
else
copytri!(LAPACK.hetri!(B.uplo, copy(B.LD), B.ipiv), B.uplo, true)
end
end
end
function ldiv!(B::BunchKaufman{T,<:StridedMatrix}, R::StridedVecOrMat{T}) where {T<:BlasReal}
if B.rook
LAPACK.sytrs_rook!(B.uplo, B.LD, B.ipiv, R)
else
LAPACK.sytrs!(B.uplo, B.LD, B.ipiv, R)
end
end
function ldiv!(B::BunchKaufman{T,<:StridedMatrix}, R::StridedVecOrMat{T}) where {T<:BlasComplex}
if B.rook
if issymmetric(B)
LAPACK.sytrs_rook!(B.uplo, B.LD, B.ipiv, R)
else
LAPACK.hetrs_rook!(B.uplo, B.LD, B.ipiv, R)
end
else
if issymmetric(B)
LAPACK.sytrs!(B.uplo, B.LD, B.ipiv, R)
else
LAPACK.hetrs!(B.uplo, B.LD, B.ipiv, R)
end
end
end
function logabsdet(F::BunchKaufman)
M = F.LD
p = F.ipiv
n = size(F.LD, 1)
if !issuccess(F)
return eltype(F)(-Inf), zero(eltype(F))
end
s = one(real(eltype(F)))
i = 1
abs_det = zero(real(eltype(F)))
while i <= n
if p[i] > 0
elm = M[i,i]
s *= sign(elm)
abs_det += log(abs(elm))
i += 1
else
# 2x2 pivot case. Make sure not to square before the subtraction by scaling
# with the off-diagonal element. This is safe because the off diagonal is
# always large for 2x2 pivots.
if F.uplo == 'U'
elm = M[i, i + 1]*(M[i,i]/M[i, i + 1]*M[i + 1, i + 1] -
(issymmetric(F) ? M[i, i + 1] : conj(M[i, i + 1])))
s *= sign(elm)
abs_det += log(abs(elm))
else
elm = M[i + 1,i]*(M[i, i]/M[i + 1, i]*M[i + 1, i + 1] -
(issymmetric(F) ? M[i + 1, i] : conj(M[i + 1, i])))
s *= sign(elm)
abs_det += log(abs(elm))
end
i += 2
end
end
return abs_det, s
end
## reconstruct the original matrix
## TODO: understand the procedure described at
## https://www.nag.com/numeric/FL/nagdoc_fl22/pdf/F07/f07mdf.pdf
##--------------------------------------------------------------------------
##------------- Start of generic Bunch-Kaufman Implementation --------------
##--------------------------------------------------------------------------
export inertia
function arg_illegal(fun_name::AbstractString,
info::Integer,
waer::AbstractChar)
if waer == 'W'
@warn " ** On entry to '$(fun_name)' parameter number " *
"$(info) had an illegal value"
else
error(" ** On entry to '$(fun_name)' parameter number " *
"$(info) had an illegal value")
end
end
function cabs1(z::T) where T <: Complex
return abs(real(z)) + abs(imag(z))
end
function cabsr(z::T) where T <: Complex
return abs(real(z))
end
"""
generic_adr1!(uplo, alpha, x, y, A, syhe) -> nothing
`generic_adr1!` performs the following adjoint (symmetric or Hermitian)
rank 1 operation
`A[1:K,1:L] = alpha*x*y' + A[1:K,1:L]`
in-place, where `alpha` is a scalar, `x` is a K element vector, `y`
is an L element vector and `A` is an `NxM` matrix. Note that `y'` can
denote either the transpose, i.e. `transpose(y)` or the conjugate
transpose , i.e. `adjoint(y)`.
`uplo` is a character, either `'U'`, `'L'` or `'F'`, indicating whether
the matrix is stored in the upper triangular part (`uplo=='U'`), the
lower triangular part (`uplo=='L'`), or the full storage space is used
(`uplo=='F'`). If `uplo!='F'` then only the corresponding triangular
part is updated. The values `'U'` or `'L'` can only be used when A is
square (`N==M`).
`syhe` is a character, either `'S'` or `'H'`, indicating whether the
symmetric adjoint (`syhe=='S'`, and `y'==transpose(y)`) or the hermitian
adjoint (`syhe=='H'`, and `y'==adjoint(y)`) must be used.
"""
function generic_adr1!(uplo::AbstractChar,
alpha::ClosedScalar{TR},
x::AbstractVector{TS},
y::AbstractVector{TS},
A::AbstractMatrix{TS},
syhe::AbstractChar
) where TS <: ClosedScalar{TR} where TR <: ClosedReal
# Inputs must be 1-indexed; bounds may not be checked.
Base.require_one_based_indexing(x, A)
# Check argument validity
K = length(x)
L = length(y)
N, M = size(A)
info = 0::BlasInt
if (uplo != 'U' && uplo != 'L' && uplo != 'F') || (uplo != 'F' && N != M)
info = (-1)::BlasInt
elseif K > N
info = (-3)::BlasInt
elseif L > M
info = (-4)::BlasInt
elseif syhe != 'S' && syhe != 'H'
info = (-6)::BlasInt
end
if info < 0
arg_illegal("generic_sadr1!", -info, 'E')
end
# Load the requested adjoining operator
adj_op = syhe == 'S' ? identity : conj
# Define loop range function according to the type of storage
# TODO: can we adjust the range without anonymous functions,
# but without having to write the same code thrice?
i_range = uplo == 'F' ? _ -> (1:K) : uplo == 'U' ? j -> (1:min(j,K)) : j -> (j:K)
# Compute rank update of A
for j in 1:L; @inbounds begin
if y[j] != 0
temp = alpha * adj_op(y[j])
for i in i_range(j)
A[i,j] += x[i] * temp
end
end
end; end
return
end
"""
generic_mvpv!(trans, alpha, A, x, beta, y) -> nothing
`generic_mvpv!` performs the following matrix-vector operation:
`y[1:K] = alpha*A'*x[1:L] + beta*y[1:K]`
in-place, where `alpha` and `beta` are scalars, `x` is a vector with at
least L elements, `y` is a vector with at least K elements, and `A` is
an `NxM` matrix. `A'` can denote the transpose, i.e. `transpose(A)` or
the conjugate transpose, i.e. `adjoint(A)`, and then `M==K && N==L`.
`A'` can also denote no adjoining at all, i.e. `A'==A`, and then
`N==K && M==L`.
`trans` is a character, either `'T'`, `'C'` or `'N'`, indicating whether
`A'=transpose(A)`, `A'=adjoint(A)` or `A'=A`, respectively.
"""
function generic_mvpv!(trans::AbstractChar,
alpha::ClosedScalar{TR},
A::AbstractMatrix{TS},
x::AbstractVector{TS},
beta::ClosedScalar{TR},
y::AbstractVector{TS},
) where TS <: ClosedScalar{TR} where TR <: ClosedReal
# Inputs must be 1-indexed; bounds may not be checked.
Base.require_one_based_indexing(A, x, y)
# Check argument validity
M, N = size(A)
K = trans == 'N' ? M : N
L = trans == 'N' ? N : M
info = 0::BlasInt
if trans != 'T' && trans != 'C' && trans != 'N'
info = (-1)::BlasInt
elseif length(y) < K
info = (-3)::BlasInt
elseif length(x) < L
info = (-4)::BlasInt
end
if info < 0
arg_illegal("generic_sadr1!", -info, 'E')
end
# Quick return if possible.
if K == 0 || (alpha == 0 && beta == 1); return; end
# Start the operations. In this version the elements of A are
# accessed sequentially with one pass through A.
# First form y := beta*y.
@inbounds begin
if beta != 1
if beta == 0
# Way less allocations and way faster for BigFloat.
# For Float64 there is some (acceptable IMO) performance loss.
y[1:K] .= 0
else
for i in 1:K; y[i] *= beta; end
end
end
if alpha == 0 || L == 0; return; end
if trans == 'N'
# Form y := alpha*A*x + y.
for j in 1:L
# Faster than a loop
axpy!(alpha*x[j], view(A, 1:K, j), view(y, 1:K))
end
else
# Form y := alpha*A**T*x + y or y := alpha*A**H*x + y.
noconj = (trans == 'T')
for i = 1:K
temp = 0
if noconj
for j = 1:L
temp = temp + A[j,i]*x[j]
end
else
for j = 1:L
temp = temp + conj(A[j,i])*x[j]
end
end
y[i] += alpha*temp
end
end
end
return
end
"""
bk_rowcol_swap!(A, k, kp, kstep, upper, herm) -> did_swap::Bool
Performs the row and column interchange of the Bunch-Kaufman factorization.
If `upper==true` then the rows and columns `kp` of `A[1:k,1:k]` are
interchanged with either rows and columns `k` or `k-1` of `A[1:k,1:k]`,
depending on whether `kstep==1` or `kstep==2`, respectively. If
`upper==false` then the rows and columns `kp-k+1` of `A[k:N,k:N]` are
interchanged with either rows and columns `1` or `2` of `A[k:N,k:N]`,
depending on whether `kstep==1` or `kstep==2`, respectively. `herm=true`
then it is assumed that `A` is Hermitian, and conjugation is applied to
the appropriate entries of the interchanged rows and columns. If
`herm=false` no conjugation is performed.
This is an internal helper function for the main Bunch-Kaufman
factorization function, `generic_bunchkaufman!`. As such, validity of the
input values is not verified.
"""
function bk_rowcol_swap!(
A::AbstractMatrix{TS},
k::Integer,
kp::Integer,
kstep::Integer,
upper::Bool,
herm::Bool
) where TS <: ClosedScalar{TR} where TR <: ClosedReal
kk = upper ? k - kstep + 1 : k + kstep - 1
if kp != kk
if kp > 1
thisview = upper ? view(A, 1:(kp-1), :) : view(A, (kp+1):size(A,1), :)
Base.swapcols!(thisview, kp, kk)
end
thisrange = upper ? ((kp+1):(kk-1)) : ((kk+1):(kp-1))
if !herm
# Real/complex symmetric case
for j in thisrange
A[j,kk], A[kp,j] = A[kp,j], A[j,kk]
end
A[kk,kk], A[kp,kp] = A[kp,kp], A[kk,kk]
else
# Hermitian case
for j in thisrange
A[j,kk], A[kp,j] = conj(A[kp,j]), conj(A[j,kk])
end
A[kp,kk] = conj(A[kp,kk])
A[kk,kk], A[kp,kp] = real(A[kp,kp]), real(A[kk,kk])
end
if kstep == 2
if herm
# Force diagonal entry to be purely real
A[k,k] = real(A[k,k])
end
if upper
A[k-1,k], A[kp,k] = A[kp,k], A[k-1,k]
else
A[k+1,k], A[kp,k] = A[kp,k], A[k+1,k]
end
end
return true
else
return false
end
end
"""
generic_bunchkaufman!(uplo, A, syhe, rook::Bool=false) ->
LD<:AbstractMatrix, ipiv<:AbstractVector{Integer}, info::BlasInt
Computes the Bunch-Kaufman factorization of a symmetric or Hermitian
matrix `A` of size `NxN` as `P'*U*D*U'*P` or `P'*L*D*L'*P`, depending on
which triangle is stored in `A`. Note that if `A` is complex symmetric
then `U'` and `L'` denote the unconjugated transposes, i.e.
`transpose(U)` and `transpose(L)`. The resulting `U` or `L` and D are
stored in-place in `A`, LAPACK style. `LD` is just a reference to `A`
(that is, `LD===A`). `ipiv` stores the permutation information of the
algorithm in LAPACK format. `info` indicates whether the factorization
was successful and non-singular when `info==0`, or else `info` takes a
different value. The outputs `LD`, `ipiv`, `info` follow the format of
the LAPACK functions of the Bunch-Kaufman factorization (`dsytrf`,
`csytrf`, `chetrf`, etc.), so this function can (ideally) be used
interchangeably with its LAPACK counterparts `LAPACK.sytrf!`,
`LAPACK.sytrf_rook!`, etc.
`uplo` is a character, either `'U'` or `'L'`, indicating whether the
matrix is stored in the upper triangular part (`uplo=='U'`) or in the
lower triangular part (`uplo=='L'`).
`syhe` is a character, either `'S'` or `'H'`, indicating whether the
matrix is real/complex symmetric (`syhe=='S'`, and the symmetric
Bunch-Kaufman factorization is performed) or complex hermitian
(`syhe=='H'`, and the hermitian Bunch-Kaufman factorization is
performed).
If `rook` is `true`, rook pivoting is used (also called bounded
Bunch-Kaufman factorization). If `rook` is `false`, rook pivoting is
not used (standard Bunch-Kaufman factorization). Rook pivoting can
require up to `~N^3/6` extra comparisons in addition to the `~N^3/3`
additions and `~N^3/3` multiplications of the standard Bunch-Kaufman
factorization. However, rook pivoting guarantees that the entries of
`U` or `L` are bounded.
This function implements the factorization algorithm entirely in
native Julia, so it supports any number type representing real or
complex numbers.
"""
function generic_bunchkaufman!(
uplo::AbstractChar,
A::AbstractMatrix{TS},
syhe::AbstractChar,
rook::Bool=false
) where TS <: ClosedScalar{TR} where TR <: ClosedReal
# Inputs must be 1-indexed; bounds may not be checked.
Base.require_one_based_indexing(A)
# Initialize info integer as 0
info = 0::BlasInt
# Get size of matrix
N, N2 = size(A)
# Initialize permutation vector
ipiv = Vector{BlasInt}(undef, N)
# Check input correctness
if uplo != 'U' && uplo != 'L'
info = (-1)::BlasInt
elseif N != N2
info = (-2)::BlasInt
elseif syhe != 'S' && syhe != 'H'
info = (-3)::BlasInt
end
if info < 0
arg_illegal("generic_bunchkaufman!", -info, 'W')
return A, ipiv, info
end
# if rook
# error("Rook pivoting not implemented yet.")
# end
# Initialize `alpha` for use in choosing pivot block size.
# The exact value is
# (1 + sqrt(17)) / 8 ~= 0.6404
# For rational matrices we a the small denominator approximation:
# 16/25 = 0.64 ~= (1 + sqrt(17)) / 8
# in order to not increase the denominator size too much in computations.
# The error of this approximation is ≤0.1%, and it still guarantees that a
# 2x2 block in the D factor has a positive-negative eigenvalue pair, as long
# as the approximation lies in (0,1).
alpha = TR <: AbstractFloat ? (1 + sqrt(TR(17))) / 8 : TR(16//25)
# Use complex 1-norm for pivot selection, as in LAPACK
abs1_fun = TS <: Real ? abs : cabs1
# Check if the matrix is symmetric of hermitian
if syhe == 'S' || (syhe == 'H' && TS <: Real)
# Use symmetric variant if matrix is real, regardless of 'syhe' value
syhe = 'S'
diag_abs_fun = abs1_fun
else
diag_abs_fun = cabsr
end
# Compute machine safe minimum when working with floating point numbers.
# LAPACK doesn't use this for diagonal pivoting though...
if rook
if TR <: AbstractFloat
# eps(0) gives the smallest subnormal number, and eps(1) gives the floating
# point type epsilon. eps(0)/eps(1) gives the smallest normal number, plus
# possibly some rounding error.
sfmin = nextfloat(eps(TR(0)) / eps(TR(1)), 2)
small = 1 / prevfloat(typemax(TR), 2)
if small >= sfmin
# 1/sfmin may overflow, so use 'small' plus a bit as the safe minimum
sfmin = nextfloat(small * (1 + eps(TR(1))), 2)
end
else
# We're working with rationals in this case, so the all results are exact.
sfmin = TR(0)
end
end
# Run factorization depending on where the data is stored
upper = (uplo == 'U')
herm = (syhe == 'H')
# TODO: Is this gonna inline properly?
@inline k_cond = upper ? k -> k >= 1 : k -> k <= N
@inline irange = upper ? j -> (j:-1:1) : j -> (j:N)
@inline conj_op = herm ? conj : identity
@inline diagreal_op = herm ? (j -> A[j,j] = TS(real(A[j,j]))) : _ -> ()
k = upper ? N : 1
# Main loop, comments refer to the upper triangular version of the factorization.
# The lower triangular version is analogous.
while k_cond(k); @inbounds begin
kstep = 1
knext = upper ? k - 1 : k + 1
p = k
# Determine rows and columns to be interchanged and whether
# a 1-by-1 or 2-by-2 pivot block will be used
absakk = diag_abs_fun(A[k,k])
# IMAX is the row-index of the largest off-diagonal element in
# column K, and COLMAX is its absolute value.
# Determine both COLMAX and IMAX.
if upper && k > 1
colmax, imax = findmax(abs1_fun, view(A, 1:(k-1), k))
elseif (!upper) && k < N
colmax, imax = findmax(abs1_fun, view(A, (k+1):N, k))
imax += k
else
colmax = 0
end
if (max(absakk, colmax) == 0) || isnan(absakk)
# Column K is zero or underflow, or contains a NaN:
# set INFO and continue
if info == 0
info = k::BlasInt
end
kp = k
if herm
# Force diagonal entry to be purely real
A[k,k] = real(A[k,k])
end
else
if absakk >= alpha*colmax
# no interchange, use 1-by-1 pivot block
kp = k
elseif rook
# Loop until pivot found
while true
# Begin pivot search loop body
# JMAX is the column-index of the largest off-diagonal
# element in row IMAX, and ROWMAX is its absolute value.
# Determine both ROWMAX and JMAX.
if imax != k
thisview = upper ? view(A, imax, (imax+1):k) :
view(A, imax, k:(imax-1))
rowmax, jmax = findmax(abs1_fun, thisview)
jmax += upper ? imax : k - 1
else
# LAPACK makes rowmax=0 in this case, but I believe it's
# better to make rowmax=-1, so that we guarantee that jmax
# will be define in the next if-block.
# TODO: is this correct/safe?
rowmax = 0
end
if (upper && imax > 1) || ((!upper) && imax < N)
# Remember that we only have the upper triangular part
# of the matrix. We inspect the part of the row in the
# lower triangular part by traversing the corresponding
# part of the transpose column.
if upper
stemp, itemp = findmax(abs1_fun, view(A, 1:(imax-1), imax))
else
stemp, itemp = findmax(abs1_fun, view(A, (imax+1):N, imax))
itemp += imax
end
if stemp > rowmax
rowmax = stemp
jmax = itemp
end
end
# Equivalent to testing for (used to handle NaN and Inf)
# CABS1( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX
if !(diag_abs_fun(A[imax,imax]) < alpha*rowmax)
# interchange rows and columns K and IMAX,
# use 1-by-1 pivot block
kp = imax
break
# Equivalent to testing for ROWMAX .EQ. COLMAX,
# used to handle NaN and Inf
elseif (p == jmax || rowmax <= colmax)
# interchange rows and columns K+1 and IMAX,
# use 2-by-2 pivot block
kp = imax
kstep = 2
break
else
# Pivot NOT found, set variables and repeat
p = imax
colmax = rowmax
imax = jmax
end
# End pivot search loop body
end
else
# JMAX is the column-index of the largest off-diagonal
# element in row IMAX, and ROWMAX is its absolute value
# We don't really need JMAX, se we don't store it
thisview = upper ? view(A, imax, (imax+1):k) : view(A, imax, k:(imax-1))
rowmax = findmax(abs1_fun, thisview)[1]
if (upper && imax > 1) || ((!upper) && imax < N)
# Remember that we only have the upper triangular part
# of the matrix. We inspect the part of the row in the
# lower triangular part by traversing the corresponding
# part of the transpose column.
thisview = upper ? view(A, 1:(imax-1), imax) :
view(A, (imax+1):N, imax)
rowmax = max(rowmax, findmax(abs1_fun, thisview)[1])
end
if absakk >= alpha * colmax * (colmax/rowmax)
# no interchange, use 1-by-1 pivot block
kp = k
elseif diag_abs_fun(A[imax,imax]) >= alpha * rowmax
# interchange rows and columns K and IMAX, use 1-by-1
# pivot block
kp = imax
else
# interchange rows and columns K-1 and IMAX, use 2-by-2
# pivot block
kp = imax
p = imax
kstep = 2
end
end
# Swap TWO rows and TWO columns
# First swap
# The first swap only needs to be done when using rook pivoting
if rook && kstep == 2
# Interchange rows and column K and P in the leading
# submatrix A(1:k,1:k) if we have a 2-by-2 pivot
bk_rowcol_swap!(A, k, p, 1, upper, herm)
end
# Second swap
did_swap = bk_rowcol_swap!(A, k, kp, kstep, upper, herm)
if herm && (!did_swap)
# Force diagonal entries to be purely real
A[k,k] = real(A[k,k])
if kstep == 2
A[knext,knext] = real(A[knext,knext])
end
end
if kstep == 1
# 1-by-1 pivot block D(k): column k now holds
# W(k) = U(k)*D(k)
# where U(k) is the k-th column of U
# When rook=false, sfmin is not defined, but the short-circuit
# evaluation of the conditional avoids an error.
if (!rook) || absakk >= sfmin
# Perform a rank-1 update of A(1:k-1,1:k-1) as
# A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)'
# Compute 1/D(k)
r1 = !herm ? 1 / A[k,k] : 1 / real(A[k,k])
# Perform rank-1 update to store the Schur complement
# in a submatrix of A
x = upper ? view(A, 1:(k-1), k) : view(A, (k+1):N, k)
# if 'upper' this should assign by reference
thisview = upper ? A : view(A, (k+1):N, (k+1):N)
generic_adr1!(uplo, -r1, x, x, thisview, syhe)
# Store U(k) in column k
thisrange = upper ? (1:(k-1)) : ((k+1):N)
for i in thisrange
A[i,k] *= r1
end
else
# Compute D(k)
r1 = !herm ? A[k,k] : real(A[k,k])
# Store U(k) in column k
thisrange = upper ? (1:(k-1)) : ((k+1):N)
for i in thisrange
A[i,k] /= r1
end