| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
| 3 | SLEPc - Scalable Library for Eigenvalue Problem Computations | ||
| 4 | Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain | ||
| 5 | |||
| 6 | This file is part of SLEPc. | ||
| 7 | SLEPc is distributed under a 2-clause BSD license (see LICENSE). | ||
| 8 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
| 9 | */ | ||
| 10 | /* | ||
| 11 | BDC - Block-divide and conquer (see description in README file) | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <slepc/private/dsimpl.h> | ||
| 15 | #include <slepcblaslapack.h> | ||
| 16 | |||
| 17 | 105 | PetscErrorCode BDC_dsrtdf_(PetscBLASInt *k,PetscBLASInt n,PetscBLASInt n1, | |
| 18 | PetscReal *d,PetscReal *q,PetscBLASInt ldq,PetscBLASInt *indxq, | ||
| 19 | PetscReal *rho,PetscReal *z,PetscReal *dlamda,PetscReal *w, | ||
| 20 | PetscReal *q2,PetscBLASInt *indx,PetscBLASInt *indxc,PetscBLASInt *indxp, | ||
| 21 | PetscBLASInt *coltyp,PetscReal reltol,PetscBLASInt *dz,PetscBLASInt *de, | ||
| 22 | PetscBLASInt *info) | ||
| 23 | { | ||
| 24 | /* -- Routine written in LAPACK Version 3.0 style -- */ | ||
| 25 | /* *************************************************** */ | ||
| 26 | /* Written by */ | ||
| 27 | /* Michael Moldaschl and Wilfried Gansterer */ | ||
| 28 | /* University of Vienna */ | ||
| 29 | /* last modification: March 16, 2014 */ | ||
| 30 | |||
| 31 | /* Small adaptations of original code written by */ | ||
| 32 | /* Wilfried Gansterer and Bob Ward, */ | ||
| 33 | /* Department of Computer Science, University of Tennessee */ | ||
| 34 | /* see https://doi.org/10.1137/S1064827501399432 */ | ||
| 35 | /* *************************************************** */ | ||
| 36 | |||
| 37 | /* Purpose */ | ||
| 38 | /* ======= */ | ||
| 39 | |||
| 40 | /* DSRTDF merges the two sets of eigenvalues of a rank-one modified */ | ||
| 41 | /* diagonal matrix D+ZZ^T together into a single sorted set. Then it tries */ | ||
| 42 | /* to deflate the size of the problem. */ | ||
| 43 | /* There are two ways in which deflation can occur: when two or more */ | ||
| 44 | /* eigenvalues of D are close together or if there is a tiny entry in the */ | ||
| 45 | /* Z vector. For each such occurrence the order of the related secular */ | ||
| 46 | /* equation problem is reduced by one. */ | ||
| 47 | |||
| 48 | /* Arguments */ | ||
| 49 | /* ========= */ | ||
| 50 | |||
| 51 | /* K (output) INTEGER */ | ||
| 52 | /* The number of non-deflated eigenvalues, and the order of the */ | ||
| 53 | /* related secular equation. 0 <= K <=N. */ | ||
| 54 | |||
| 55 | /* N (input) INTEGER */ | ||
| 56 | /* The dimension of the diagonal matrix. N >= 0. */ | ||
| 57 | |||
| 58 | /* N1 (input) INTEGER */ | ||
| 59 | /* The location of the last eigenvalue in the leading sub-matrix. */ | ||
| 60 | /* min(1,N) <= N1 <= max(1,N). */ | ||
| 61 | |||
| 62 | /* D (input/output) DOUBLE PRECISION array, dimension (N) */ | ||
| 63 | /* On entry, D contains the eigenvalues of the two submatrices to */ | ||
| 64 | /* be combined. */ | ||
| 65 | /* On exit, D contains the trailing (N-K) updated eigenvalues */ | ||
| 66 | /* (those which were deflated) sorted into increasing order. */ | ||
| 67 | |||
| 68 | /* Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) */ | ||
| 69 | /* On entry, Q contains the eigenvectors of two submatrices in */ | ||
| 70 | /* the two square blocks with corners at (1,1), (N1,N1) */ | ||
| 71 | /* and (N1+1, N1+1), (N,N). */ | ||
| 72 | /* On exit, Q contains the trailing (N-K) updated eigenvectors */ | ||
| 73 | /* (those which were deflated) in its last N-K columns. */ | ||
| 74 | |||
| 75 | /* LDQ (input) INTEGER */ | ||
| 76 | /* The leading dimension of the array Q. LDQ >= max(1,N). */ | ||
| 77 | |||
| 78 | /* INDXQ (input/output) INTEGER array, dimension (N) */ | ||
| 79 | /* The permutation which separately sorts the two sub-problems */ | ||
| 80 | /* in D into ascending order. Note that elements in the second */ | ||
| 81 | /* half of this permutation must first have N1 added to their */ | ||
| 82 | /* values. Destroyed on exit. */ | ||
| 83 | |||
| 84 | /* RHO (input/output) DOUBLE PRECISION */ | ||
| 85 | /* On entry, the off-diagonal element associated with the rank-1 */ | ||
| 86 | /* cut which originally split the two submatrices which are now */ | ||
| 87 | /* being recombined. */ | ||
| 88 | /* On exit, RHO has been modified to the value required by */ | ||
| 89 | /* DLAED3M (made positive and multiplied by two, such that */ | ||
| 90 | /* the modification vector has norm one). */ | ||
| 91 | |||
| 92 | /* Z (input/output) DOUBLE PRECISION array, dimension (N) */ | ||
| 93 | /* On entry, Z contains the updating vector. */ | ||
| 94 | /* On exit, the contents of Z has been destroyed by the updating */ | ||
| 95 | /* process. */ | ||
| 96 | |||
| 97 | /* DLAMDA (output) DOUBLE PRECISION array, dimension (N) */ | ||
| 98 | /* A copy of the first K non-deflated eigenvalues which */ | ||
| 99 | /* will be used by DLAED3M to form the secular equation. */ | ||
| 100 | |||
| 101 | /* W (output) DOUBLE PRECISION array, dimension (N) */ | ||
| 102 | /* The first K values of the final deflation-altered z-vector */ | ||
| 103 | /* which will be passed to DLAED3M. */ | ||
| 104 | |||
| 105 | /* Q2 (output) DOUBLE PRECISION array, dimension */ | ||
| 106 | /* (N*N) (if everything is deflated) or */ | ||
| 107 | /* (N1*(COLTYP(1)+COLTYP(2)) + (N-N1)*(COLTYP(2)+COLTYP(3))) */ | ||
| 108 | /* (if not everything is deflated) */ | ||
| 109 | /* If everything is deflated, then N*N intermediate workspace */ | ||
| 110 | /* is needed in Q2. */ | ||
| 111 | /* If not everything is deflated, Q2 contains on exit a copy of the */ | ||
| 112 | /* first K non-deflated eigenvectors which will be used by */ | ||
| 113 | /* DLAED3M in a matrix multiply (DGEMM) to accumulate the new */ | ||
| 114 | /* eigenvectors, followed by the N-K deflated eigenvectors. */ | ||
| 115 | |||
| 116 | /* INDX (workspace) INTEGER array, dimension (N) */ | ||
| 117 | /* The permutation used to sort the contents of DLAMDA into */ | ||
| 118 | /* ascending order. */ | ||
| 119 | |||
| 120 | /* INDXC (output) INTEGER array, dimension (N) */ | ||
| 121 | /* The permutation used to arrange the columns of the deflated */ | ||
| 122 | /* Q matrix into three groups: columns in the first group contain */ | ||
| 123 | /* non-zero elements only at and above N1 (type 1), columns in the */ | ||
| 124 | /* second group are dense (type 2), and columns in the third group */ | ||
| 125 | /* contain non-zero elements only below N1 (type 3). */ | ||
| 126 | |||
| 127 | /* INDXP (workspace) INTEGER array, dimension (N) */ | ||
| 128 | /* The permutation used to place deflated values of D at the end */ | ||
| 129 | /* of the array. INDXP(1:K) points to the nondeflated D-values */ | ||
| 130 | /* and INDXP(K+1:N) points to the deflated eigenvalues. */ | ||
| 131 | |||
| 132 | /* COLTYP (workspace/output) INTEGER array, dimension (N) */ | ||
| 133 | /* During execution, a label which will indicate which of the */ | ||
| 134 | /* following types a column in the Q2 matrix is: */ | ||
| 135 | /* 1 : non-zero in the upper half only; */ | ||
| 136 | /* 2 : dense; */ | ||
| 137 | /* 3 : non-zero in the lower half only; */ | ||
| 138 | /* 4 : deflated. */ | ||
| 139 | /* On exit, COLTYP(i) is the number of columns of type i, */ | ||
| 140 | /* for i=1 to 4 only. */ | ||
| 141 | |||
| 142 | /* RELTOL (input) DOUBLE PRECISION */ | ||
| 143 | /* User specified deflation tolerance. If RELTOL.LT.20*EPS, then */ | ||
| 144 | /* the standard value used in the original LAPACK routines is used. */ | ||
| 145 | |||
| 146 | /* DZ (output) INTEGER, DZ.GE.0 */ | ||
| 147 | /* counts the deflation due to a small component in the modification */ | ||
| 148 | /* vector. */ | ||
| 149 | |||
| 150 | /* DE (output) INTEGER, DE.GE.0 */ | ||
| 151 | /* counts the deflation due to close eigenvalues. */ | ||
| 152 | |||
| 153 | /* INFO (output) INTEGER */ | ||
| 154 | /* = 0: successful exit. */ | ||
| 155 | /* < 0: if INFO = -i, the i-th argument had an illegal value. */ | ||
| 156 | |||
| 157 | /* Further Details */ | ||
| 158 | /* =============== */ | ||
| 159 | |||
| 160 | /* Based on code written by */ | ||
| 161 | /* Wilfried Gansterer and Bob Ward, */ | ||
| 162 | /* Department of Computer Science, University of Tennessee */ | ||
| 163 | |||
| 164 | /* Based on the design of the LAPACK code DLAED2 with modifications */ | ||
| 165 | /* to allow a block divide and conquer scheme */ | ||
| 166 | |||
| 167 | /* DLAED2 was written by Jeff Rutter, Computer Science Division, University */ | ||
| 168 | /* of California at Berkeley, USA, and modified by Francoise Tisseur, */ | ||
| 169 | /* University of Tennessee. */ | ||
| 170 | |||
| 171 | /* ===================================================================== */ | ||
| 172 | |||
| 173 | 105 | PetscReal c, s, t, eps, tau, tol, dmax, dmone = -1.; | |
| 174 | 105 | PetscBLASInt i, j, i1, k2, n2, ct, nj, pj=0, js, iq1, iq2; | |
| 175 | 105 | PetscBLASInt psm[4], imax, jmax, ctot[4], factmp=1, one=1; | |
| 176 | |||
| 177 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
105 | PetscFunctionBegin; |
| 178 | 105 | *info = 0; | |
| 179 | |||
| 180 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
105 | if (n < 0) *info = -2; |
| 181 |
2/4✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 5 times.
|
105 | else if (n1 < PetscMin(1,n) || n1 > PetscMax(1,n)) *info = -3; |
| 182 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
105 | else if (ldq < PetscMax(1,n)) *info = -6; |
| 183 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
105 | PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %" PetscBLASInt_FMT " in DSRTDF",-(*info)); |
| 184 | |||
| 185 | /* Initialize deflation counters */ | ||
| 186 | |||
| 187 | 105 | *dz = 0; | |
| 188 | 105 | *de = 0; | |
| 189 | |||
| 190 | /* **************************************************************************** */ | ||
| 191 | |||
| 192 | /* Quick return if possible */ | ||
| 193 | |||
| 194 |
2/14✓ Branch 0 taken 4 times.
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
|
105 | if (n == 0) PetscFunctionReturn(PETSC_SUCCESS); |
| 195 | |||
| 196 | /* **************************************************************************** */ | ||
| 197 | |||
| 198 | 105 | n2 = n - n1; | |
| 199 | |||
| 200 | /* Modify Z so that RHO is positive. */ | ||
| 201 | |||
| 202 |
1/22✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
|
105 | if (*rho < 0.) PetscCallBLAS("BLASscal",BLASscal_(&n2, &dmone, &z[n1], &one)); |
| 203 | |||
| 204 | /* Normalize z so that norm2(z) = 1. Since z is the concatenation of */ | ||
| 205 | /* two normalized vectors, norm2(z) = sqrt(2). (NOTE that this holds also */ | ||
| 206 | /* here in the approximate block-tridiagonal D&C because the two vectors are */ | ||
| 207 | /* singular vectors, but it would not necessarily hold in a D&C for a */ | ||
| 208 | /* general banded matrix !) */ | ||
| 209 | |||
| 210 | 105 | t = 1. / PETSC_SQRT2; | |
| 211 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
105 | PetscCallBLAS("BLASscal",BLASscal_(&n, &t, z, &one)); |
| 212 | |||
| 213 | /* NOTE: at this point the value of RHO is modified in order to */ | ||
| 214 | /* normalize Z: RHO = ABS( norm2(z)**2 * RHO */ | ||
| 215 | /* in our case: norm2(z) = sqrt(2), and therefore: */ | ||
| 216 | |||
| 217 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
105 | *rho = PetscAbs(*rho * 2.); |
| 218 | |||
| 219 | /* Sort the eigenvalues into increasing order */ | ||
| 220 | |||
| 221 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
285 | for (i = n1; i < n; ++i) indxq[i] += n1; |
| 222 | |||
| 223 | /* re-integrate the deflated parts from the last pass */ | ||
| 224 | |||
| 225 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
1185 | for (i = 0; i < n; ++i) dlamda[i] = d[indxq[i]-1]; |
| 226 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
105 | PetscCallBLAS("LAPACKlamrg",LAPACKlamrg_(&n1, &n2, dlamda, &one, &one, indxc)); |
| 227 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
1185 | for (i = 0; i < n; ++i) indx[i] = indxq[indxc[i]-1]; |
| 228 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
1185 | for (i = 0; i < n; ++i) indxq[i] = 0; |
| 229 | |||
| 230 | /* Calculate the allowable deflation tolerance */ | ||
| 231 | |||
| 232 | /* imax = BLASamax_(&n, z, &one); */ | ||
| 233 | 105 | imax = 1; | |
| 234 | 105 | dmax = PetscAbsReal(z[0]); | |
| 235 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
1080 | for (i=1;i<n;i++) { |
| 236 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
975 | if (PetscAbsReal(z[i])>dmax) { |
| 237 | 222 | imax = i+1; | |
| 238 | 222 | dmax = PetscAbsReal(z[i]); | |
| 239 | } | ||
| 240 | } | ||
| 241 | /* jmax = BLASamax_(&n, d, &one); */ | ||
| 242 | 105 | jmax = 1; | |
| 243 | 105 | dmax = PetscAbsReal(d[0]); | |
| 244 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
1080 | for (i=1;i<n;i++) { |
| 245 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
975 | if (PetscAbsReal(d[i])>dmax) { |
| 246 | 384 | jmax = i+1; | |
| 247 | 384 | dmax = PetscAbsReal(d[i]); | |
| 248 | } | ||
| 249 | } | ||
| 250 | |||
| 251 | 105 | eps = LAPACKlamch_("Epsilon"); | |
| 252 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
105 | if (reltol < eps * 20) { |
| 253 | /* use the standard deflation tolerance from the original LAPACK code */ | ||
| 254 |
6/6✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
✓ Branch 4 taken 5 times.
✓ Branch 5 taken 5 times.
|
190 | tol = eps * 8. * PetscMax(PetscAbs(d[jmax-1]),PetscAbs(z[imax-1])); |
| 255 | } else { | ||
| 256 | /* otherwise set TOL to the input parameter RELTOL ! */ | ||
| 257 | ✗ | tol = reltol * PetscMax(PetscAbs(d[jmax-1]),PetscAbs(z[imax-1])); | |
| 258 | } | ||
| 259 | |||
| 260 | /* If the rank-1 modifier is small enough, no more needs to be done */ | ||
| 261 | /* except to reorganize Q so that its columns correspond with the */ | ||
| 262 | /* elements in D. */ | ||
| 263 | |||
| 264 |
3/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 5 times.
|
105 | if (*rho * PetscAbs(z[imax-1]) <= tol) { |
| 265 | |||
| 266 | /* complete deflation because of small rank-one modifier */ | ||
| 267 | /* NOTE: in this case we need N*N space in the array Q2 ! */ | ||
| 268 | |||
| 269 | ✗ | *dz = n; *k = 0; | |
| 270 | ✗ | iq2 = 1; | |
| 271 | ✗ | for (j = 0; j < n; ++j) { | |
| 272 | ✗ | i = indx[j]; indxq[j] = i; | |
| 273 | ✗ | PetscCallBLAS("BLAScopy",BLAScopy_(&n, &q[(i-1)*ldq], &one, &q2[iq2-1], &one)); | |
| 274 | ✗ | dlamda[j] = d[i-1]; | |
| 275 | ✗ | iq2 += n; | |
| 276 | } | ||
| 277 | ✗ | for (j=0;j<n;j++) for (i=0;i<n;i++) q[i+j*ldq] = q2[i+j*n]; | |
| 278 | ✗ | PetscCallBLAS("BLAScopy",BLAScopy_(&n, dlamda, &one, d, &one)); | |
| 279 | ✗ | PetscFunctionReturn(PETSC_SUCCESS); | |
| 280 | } | ||
| 281 | |||
| 282 | /* If there are multiple eigenvalues then the problem deflates. Here */ | ||
| 283 | /* the number of equal eigenvalues is found. As each equal */ | ||
| 284 | /* eigenvalue is found, an elementary reflector is computed to rotate */ | ||
| 285 | /* the corresponding eigensubspace so that the corresponding */ | ||
| 286 | /* components of Z are zero in this new basis. */ | ||
| 287 | |||
| 288 | /* initialize the column types */ | ||
| 289 | |||
| 290 | /* first N1 columns are initially (before deflation) of type 1 */ | ||
| 291 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
1005 | for (i = 0; i < n1; ++i) coltyp[i] = 1; |
| 292 | /* columns N1+1 to N are initially (before deflation) of type 3 */ | ||
| 293 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
285 | for (i = n1; i < n; ++i) coltyp[i] = 3; |
| 294 | |||
| 295 | 105 | *k = 0; | |
| 296 | 105 | k2 = n + 1; | |
| 297 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
105 | for (j = 0; j < n; ++j) { |
| 298 | 105 | nj = indx[j]-1; | |
| 299 |
3/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 5 times.
|
105 | if (*rho * PetscAbs(z[nj]) <= tol) { |
| 300 | |||
| 301 | /* Deflate due to small z component. */ | ||
| 302 | ✗ | ++(*dz); | |
| 303 | ✗ | --k2; | |
| 304 | |||
| 305 | /* deflated eigenpair, therefore column type 4 */ | ||
| 306 | ✗ | coltyp[nj] = 4; | |
| 307 | ✗ | indxp[k2-1] = nj+1; | |
| 308 | ✗ | indxq[k2-1] = nj+1; | |
| 309 | ✗ | if (j+1 == n) goto L100; | |
| 310 | } else { | ||
| 311 | |||
| 312 | /* not deflated */ | ||
| 313 | 105 | pj = nj; | |
| 314 | 105 | goto L80; | |
| 315 | } | ||
| 316 | } | ||
| 317 | factmp = 1; | ||
| 318 | L80: | ||
| 319 | 1080 | ++j; | |
| 320 | 1080 | nj = indx[j]-1; | |
| 321 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
1080 | if (j+1 > n) goto L100; |
| 322 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
|
975 | if (*rho * PetscAbs(z[nj]) <= tol) { |
| 323 | |||
| 324 | /* Deflate due to small z component. */ | ||
| 325 | 186 | ++(*dz); | |
| 326 | 186 | --k2; | |
| 327 | 186 | coltyp[nj] = 4; | |
| 328 | 186 | indxp[k2-1] = nj+1; | |
| 329 | 186 | indxq[k2-1] = nj+1; | |
| 330 | } else { | ||
| 331 | |||
| 332 | /* Check if eigenvalues are close enough to allow deflation. */ | ||
| 333 | 789 | s = z[pj]; | |
| 334 | 789 | c = z[nj]; | |
| 335 | |||
| 336 | /* Find sqrt(a**2+b**2) without overflow or */ | ||
| 337 | /* destructive underflow. */ | ||
| 338 | |||
| 339 | 789 | tau = SlepcAbs(c, s); | |
| 340 | 789 | t = d[nj] - d[pj]; | |
| 341 | 789 | c /= tau; | |
| 342 | 789 | s = -s / tau; | |
| 343 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
|
789 | if (PetscAbs(t * c * s) <= tol) { |
| 344 | |||
| 345 | /* Deflate due to close eigenvalues. */ | ||
| 346 | 94 | ++(*de); | |
| 347 | 94 | z[nj] = tau; | |
| 348 | 94 | z[pj] = 0.; | |
| 349 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 2 times.
|
94 | if (coltyp[nj] != coltyp[pj]) coltyp[nj] = 2; |
| 350 | |||
| 351 | /* if deflation happens across the two eigenvector blocks */ | ||
| 352 | /* (eigenvalues corresponding to different blocks), then */ | ||
| 353 | /* on column in the eigenvector matrix fills up completely */ | ||
| 354 | /* (changes from type 1 or 3 to type 2) */ | ||
| 355 | |||
| 356 | /* eigenpair PJ is deflated, therefore the column type changes */ | ||
| 357 | /* to 4 */ | ||
| 358 | |||
| 359 | 94 | coltyp[pj] = 4; | |
| 360 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
94 | PetscCallBLAS("BLASrot",BLASrot_(&n, &q[pj*ldq], &one, &q[nj*ldq], &one, &c, &s)); |
| 361 | 94 | t = d[pj] * (c * c) + d[nj] * (s * s); | |
| 362 | 94 | d[nj] = d[pj] * (s * s) + d[nj] * (c * c); | |
| 363 | 94 | d[pj] = t; | |
| 364 | 94 | --k2; | |
| 365 | 94 | i = 1; | |
| 366 | 94 | L90: | |
| 367 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
94 | if (k2 + i <= n) { |
| 368 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
75 | if (d[pj] < d[indxp[k2 + i-1]-1]) { |
| 369 | ✗ | indxp[k2 + i - 2] = indxp[k2 + i - 1]; | |
| 370 | ✗ | indxq[k2 + i - 2] = indxq[k2 + i - 1]; | |
| 371 | ✗ | indxp[k2 + i - 1] = pj + 1; | |
| 372 | ✗ | indxq[k2 + i - 2] = pj + 1; | |
| 373 | ✗ | ++i; | |
| 374 | ✗ | goto L90; | |
| 375 | } else { | ||
| 376 | 75 | indxp[k2 + i - 2] = pj + 1; | |
| 377 | 75 | indxq[k2 + i - 2] = pj + 1; | |
| 378 | } | ||
| 379 | } else { | ||
| 380 | 19 | indxp[k2 + i - 2] = pj + 1; | |
| 381 | 19 | indxq[k2 + i - 2] = pj + 1; | |
| 382 | } | ||
| 383 | pj = nj; | ||
| 384 | factmp = -1; | ||
| 385 | } else { | ||
| 386 | |||
| 387 | /* do not deflate */ | ||
| 388 | 695 | ++(*k); | |
| 389 | 695 | dlamda[*k-1] = d[pj]; | |
| 390 | 695 | w[*k-1] = z[pj]; | |
| 391 | 695 | indxp[*k-1] = pj + 1; | |
| 392 | 695 | indxq[*k-1] = pj + 1; | |
| 393 | 695 | factmp = 1; | |
| 394 | 695 | pj = nj; | |
| 395 | } | ||
| 396 | } | ||
| 397 | 975 | goto L80; | |
| 398 | 105 | L100: | |
| 399 | |||
| 400 | /* Record the last eigenvalue. */ | ||
| 401 | 105 | ++(*k); | |
| 402 | 105 | dlamda[*k-1] = d[pj]; | |
| 403 | 105 | w[*k-1] = z[pj]; | |
| 404 | 105 | indxp[*k-1] = pj+1; | |
| 405 | 105 | indxq[*k-1] = (pj+1) * factmp; | |
| 406 | |||
| 407 | /* Count up the total number of the various types of columns, then */ | ||
| 408 | /* form a permutation which positions the four column types into */ | ||
| 409 | /* four uniform groups (although one or more of these groups may be */ | ||
| 410 | /* empty). */ | ||
| 411 | |||
| 412 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
525 | for (j = 0; j < 4; ++j) ctot[j] = 0; |
| 413 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
1185 | for (j = 0; j < n; ++j) { |
| 414 | 1080 | ct = coltyp[j]; | |
| 415 | 1080 | ++ctot[ct-1]; | |
| 416 | } | ||
| 417 | |||
| 418 | /* PSM(*) = Position in SubMatrix (of types 1 through 4) */ | ||
| 419 | 105 | psm[0] = 1; | |
| 420 | 105 | psm[1] = ctot[0] + 1; | |
| 421 | 105 | psm[2] = psm[1] + ctot[1]; | |
| 422 | 105 | psm[3] = psm[2] + ctot[2]; | |
| 423 | 105 | *k = n - ctot[3]; | |
| 424 | |||
| 425 | /* Fill out the INDXC array so that the permutation which it induces */ | ||
| 426 | /* will place all type-1 columns first, all type-2 columns next, */ | ||
| 427 | /* then all type-3's, and finally all type-4's. */ | ||
| 428 | |||
| 429 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
1185 | for (j = 0; j < n; ++j) { |
| 430 | 1080 | js = indxp[j]; | |
| 431 | 1080 | ct = coltyp[js-1]; | |
| 432 | 1080 | indx[psm[ct - 1]-1] = js; | |
| 433 | 1080 | indxc[psm[ct - 1]-1] = j+1; | |
| 434 | 1080 | ++psm[ct - 1]; | |
| 435 | } | ||
| 436 | |||
| 437 | /* Sort the eigenvalues and corresponding eigenvectors into DLAMDA */ | ||
| 438 | /* and Q2 respectively. The eigenvalues/vectors which were not */ | ||
| 439 | /* deflated go into the first K slots of DLAMDA and Q2 respectively, */ | ||
| 440 | /* while those which were deflated go into the last N - K slots. */ | ||
| 441 | |||
| 442 | 105 | i = 0; | |
| 443 | 105 | iq1 = 0; | |
| 444 | 105 | iq2 = (ctot[0] + ctot[1]) * n1; | |
| 445 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
725 | for (j = 0; j < ctot[0]; ++j) { |
| 446 | 620 | js = indx[i]; | |
| 447 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
620 | PetscCallBLAS("BLAScopy",BLAScopy_(&n1, &q[(js-1)*ldq], &one, &q2[iq1], &one)); |
| 448 | 620 | z[i] = d[js-1]; | |
| 449 | 620 | ++i; | |
| 450 | 620 | iq1 += n1; | |
| 451 | } | ||
| 452 | |||
| 453 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
195 | for (j = 0; j < ctot[1]; ++j) { |
| 454 | 90 | js = indx[i]; | |
| 455 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
90 | PetscCallBLAS("BLAScopy",BLAScopy_(&n1, &q[(js-1)*ldq], &one, &q2[iq1], &one)); |
| 456 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
90 | PetscCallBLAS("BLAScopy",BLAScopy_(&n2, &q[n1+(js-1)*ldq], &one, &q2[iq2], &one)); |
| 457 | 90 | z[i] = d[js-1]; | |
| 458 | 90 | ++i; | |
| 459 | 90 | iq1 += n1; | |
| 460 | 90 | iq2 += n2; | |
| 461 | } | ||
| 462 | |||
| 463 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
195 | for (j = 0; j < ctot[2]; ++j) { |
| 464 | 90 | js = indx[i]; | |
| 465 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
90 | PetscCallBLAS("BLAScopy",BLAScopy_(&n2, &q[n1+(js-1)*ldq], &one, &q2[iq2], &one)); |
| 466 | 90 | z[i] = d[js-1]; | |
| 467 | 90 | ++i; | |
| 468 | 90 | iq2 += n2; | |
| 469 | } | ||
| 470 | |||
| 471 | 385 | iq1 = iq2; | |
| 472 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
385 | for (j = 0; j < ctot[3]; ++j) { |
| 473 | 280 | js = indx[i]; | |
| 474 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
280 | PetscCallBLAS("BLAScopy",BLAScopy_(&n, &q[(js-1)*ldq], &one, &q2[iq2], &one)); |
| 475 | 280 | iq2 += n; | |
| 476 | 280 | z[i] = d[js-1]; | |
| 477 | 280 | ++i; | |
| 478 | } | ||
| 479 | |||
| 480 | /* The deflated eigenvalues and their corresponding vectors go back */ | ||
| 481 | /* into the last N - K slots of D and Q respectively. */ | ||
| 482 | |||
| 483 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
|
5305 | for (j=0;j<ctot[3];j++) for (i=0;i<n;i++) q[i+(j+*k)*ldq] = q2[iq1+i+j*n]; |
| 484 | 105 | i1 = n - *k; | |
| 485 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
105 | PetscCallBLAS("BLAScopy",BLAScopy_(&i1, &z[*k], &one, &d[*k], &one)); |
| 486 | |||
| 487 | /* Copy CTOT into COLTYP for referencing in DLAED3M. */ | ||
| 488 | |||
| 489 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
525 | for (j = 0; j < 4; ++j) coltyp[j] = ctot[j]; |
| 490 |
6/12✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
|
21 | PetscFunctionReturn(PETSC_SUCCESS); |
| 491 | } | ||
| 492 |