Skip to content

Commit

Permalink
Rewrite [ds]hgeqz to use FMA with Householder reflectors
Browse files Browse the repository at this point in the history
  • Loading branch information
angsch committed Jul 6, 2022
1 parent 9f9295f commit 0a6cd43
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 66 deletions.
70 changes: 37 additions & 33 deletions SRC/dhgeqz.f
Original file line number Diff line number Diff line change
Expand Up @@ -337,9 +337,9 @@ SUBROUTINE DHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT,
$ BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL,
$ CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX,
$ SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T1,
$ TAU, TEMP, TEMP2, TEMPI, TEMPR, U1, U12, U12L,
$ U2, ULP, VS, W11, W12, W21, W22, WABS, WI, WR,
$ WR2
$ T2, T3, TAU, TEMP, TEMP2, TEMPI, TEMPR, U1,
$ U12, U12L, U2, ULP, VS, W11, W12, W21, W22,
$ WABS, WI, WR, WR2
* ..
* .. Local Arrays ..
DOUBLE PRECISION V( 3 )
Expand Down Expand Up @@ -1132,25 +1132,27 @@ SUBROUTINE DHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT,
H( J+2, J-1 ) = ZERO
END IF
*
T2 = TAU*V( 2 )
T3 = TAU*V( 3 )
DO 230 JC = J, ILASTM
TEMP = TAU*( H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )*
$ H( J+2, JC ) )
H( J, JC ) = H( J, JC ) - TEMP
H( J+1, JC ) = H( J+1, JC ) - TEMP*V( 2 )
H( J+2, JC ) = H( J+2, JC ) - TEMP*V( 3 )
TEMP2 = TAU*( T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )*
$ T( J+2, JC ) )
T( J, JC ) = T( J, JC ) - TEMP2
T( J+1, JC ) = T( J+1, JC ) - TEMP2*V( 2 )
T( J+2, JC ) = T( J+2, JC ) - TEMP2*V( 3 )
TEMP = H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )*
$ H( J+2, JC )
H( J, JC ) = H( J, JC ) - TEMP*TAU
H( J+1, JC ) = H( J+1, JC ) - TEMP*T2
H( J+2, JC ) = H( J+2, JC ) - TEMP*T3
TEMP2 = T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )*
$ T( J+2, JC )
T( J, JC ) = T( J, JC ) - TEMP2*TAU
T( J+1, JC ) = T( J+1, JC ) - TEMP2*T2
T( J+2, JC ) = T( J+2, JC ) - TEMP2*T3
230 CONTINUE
IF( ILQ ) THEN
DO 240 JR = 1, N
TEMP = TAU*( Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )*
$ Q( JR, J+2 ) )
Q( JR, J ) = Q( JR, J ) - TEMP
Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*V( 2 )
Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*V( 3 )
TEMP = Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )*
$ Q( JR, J+2 )
Q( JR, J ) = Q( JR, J ) - TEMP*TAU
Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*T2
Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*T3
240 CONTINUE
END IF
*
Expand Down Expand Up @@ -1238,27 +1240,29 @@ SUBROUTINE DHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT,
*
* Apply transformations from the right.
*
T2 = TAU*V(2)
T3 = TAU*V(3)
DO 260 JR = IFRSTM, MIN( J+3, ILAST )
TEMP = TAU*( H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )*
$ H( JR, J+2 ) )
H( JR, J ) = H( JR, J ) - TEMP
H( JR, J+1 ) = H( JR, J+1 ) - TEMP*V( 2 )
H( JR, J+2 ) = H( JR, J+2 ) - TEMP*V( 3 )
TEMP = H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )*
$ H( JR, J+2 )
H( JR, J ) = H( JR, J ) - TEMP*TAU
H( JR, J+1 ) = H( JR, J+1 ) - TEMP*T2
H( JR, J+2 ) = H( JR, J+2 ) - TEMP*T3
260 CONTINUE
DO 270 JR = IFRSTM, J + 2
TEMP = TAU*( T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )*
$ T( JR, J+2 ) )
T( JR, J ) = T( JR, J ) - TEMP
T( JR, J+1 ) = T( JR, J+1 ) - TEMP*V( 2 )
T( JR, J+2 ) = T( JR, J+2 ) - TEMP*V( 3 )
TEMP = T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )*
$ T( JR, J+2 )
T( JR, J ) = T( JR, J ) - TEMP*TAU
T( JR, J+1 ) = T( JR, J+1 ) - TEMP*T2
T( JR, J+2 ) = T( JR, J+2 ) - TEMP*T3
270 CONTINUE
IF( ILZ ) THEN
DO 280 JR = 1, N
TEMP = TAU*( Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )*
$ Z( JR, J+2 ) )
Z( JR, J ) = Z( JR, J ) - TEMP
Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*V( 2 )
Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*V( 3 )
TEMP = Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )*
$ Z( JR, J+2 )
Z( JR, J ) = Z( JR, J ) - TEMP*TAU
Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*T2
Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*T3
280 CONTINUE
END IF
T( J+1, J ) = ZERO
Expand Down
70 changes: 37 additions & 33 deletions SRC/shgeqz.f
Original file line number Diff line number Diff line change
Expand Up @@ -337,9 +337,9 @@ SUBROUTINE SHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT,
$ BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL,
$ CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX,
$ SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T1,
$ TAU, TEMP, TEMP2, TEMPI, TEMPR, U1, U12, U12L,
$ U2, ULP, VS, W11, W12, W21, W22, WABS, WI, WR,
$ WR2
$ T2, T3, TAU, TEMP, TEMP2, TEMPI, TEMPR, U1,
$ U12, U12L, U2, ULP, VS, W11, W12, W21, W22,
$ WABS, WI, WR, WR2
* ..
* .. Local Arrays ..
REAL V( 3 )
Expand Down Expand Up @@ -1132,25 +1132,27 @@ SUBROUTINE SHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT,
H( J+2, J-1 ) = ZERO
END IF
*
T2 = TAU * V( 2 )
T3 = TAU * V( 3 )
DO 230 JC = J, ILASTM
TEMP = TAU*( H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )*
$ H( J+2, JC ) )
H( J, JC ) = H( J, JC ) - TEMP
H( J+1, JC ) = H( J+1, JC ) - TEMP*V( 2 )
H( J+2, JC ) = H( J+2, JC ) - TEMP*V( 3 )
TEMP2 = TAU*( T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )*
$ T( J+2, JC ) )
T( J, JC ) = T( J, JC ) - TEMP2
T( J+1, JC ) = T( J+1, JC ) - TEMP2*V( 2 )
T( J+2, JC ) = T( J+2, JC ) - TEMP2*V( 3 )
TEMP = H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )*
$ H( J+2, JC )
H( J, JC ) = H( J, JC ) - TEMP*TAU
H( J+1, JC ) = H( J+1, JC ) - TEMP*T2
H( J+2, JC ) = H( J+2, JC ) - TEMP*T3
TEMP2 = T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )*
$ T( J+2, JC )
T( J, JC ) = T( J, JC ) - TEMP2*TAU
T( J+1, JC ) = T( J+1, JC ) - TEMP2*T2
T( J+2, JC ) = T( J+2, JC ) - TEMP2*T3
230 CONTINUE
IF( ILQ ) THEN
DO 240 JR = 1, N
TEMP = TAU*( Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )*
$ Q( JR, J+2 ) )
Q( JR, J ) = Q( JR, J ) - TEMP
Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*V( 2 )
Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*V( 3 )
TEMP = Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )*
$ Q( JR, J+2 )
Q( JR, J ) = Q( JR, J ) - TEMP*TAU
Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*T2
Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*T3
240 CONTINUE
END IF
*
Expand Down Expand Up @@ -1238,27 +1240,29 @@ SUBROUTINE SHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT,
*
* Apply transformations from the right.
*
T2 = TAU*V( 2 )
T3 = TAU*V( 3 )
DO 260 JR = IFRSTM, MIN( J+3, ILAST )
TEMP = TAU*( H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )*
$ H( JR, J+2 ) )
H( JR, J ) = H( JR, J ) - TEMP
H( JR, J+1 ) = H( JR, J+1 ) - TEMP*V( 2 )
H( JR, J+2 ) = H( JR, J+2 ) - TEMP*V( 3 )
TEMP = H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )*
$ H( JR, J+2 )
H( JR, J ) = H( JR, J ) - TEMP*TAU
H( JR, J+1 ) = H( JR, J+1 ) - TEMP*T2
H( JR, J+2 ) = H( JR, J+2 ) - TEMP*T3
260 CONTINUE
DO 270 JR = IFRSTM, J + 2
TEMP = TAU*( T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )*
$ T( JR, J+2 ) )
T( JR, J ) = T( JR, J ) - TEMP
T( JR, J+1 ) = T( JR, J+1 ) - TEMP*V( 2 )
T( JR, J+2 ) = T( JR, J+2 ) - TEMP*V( 3 )
TEMP = T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )*
$ T( JR, J+2 )
T( JR, J ) = T( JR, J ) - TEMP*TAU
T( JR, J+1 ) = T( JR, J+1 ) - TEMP*T2
T( JR, J+2 ) = T( JR, J+2 ) - TEMP*T3
270 CONTINUE
IF( ILZ ) THEN
DO 280 JR = 1, N
TEMP = TAU*( Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )*
$ Z( JR, J+2 ) )
Z( JR, J ) = Z( JR, J ) - TEMP
Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*V( 2 )
Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*V( 3 )
TEMP = Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )*
$ Z( JR, J+2 )
Z( JR, J ) = Z( JR, J ) - TEMP*TAU
Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*T2
Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*T3
280 CONTINUE
END IF
T( J+1, J ) = ZERO
Expand Down

0 comments on commit 0a6cd43

Please sign in to comment.