| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
| 3 | SLEPc - Scalable Library for Eigenvalue Problem Computations | ||
| 4 | Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain | ||
| 5 | |||
| 6 | This file is part of SLEPc. | ||
| 7 | SLEPc is distributed under a 2-clause BSD license (see LICENSE). | ||
| 8 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
| 9 | */ | ||
| 10 | /* | ||
| 11 | BDC - Block-divide and conquer (see description in README file) | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <slepc/private/dsimpl.h> | ||
| 15 | #include <slepcblaslapack.h> | ||
| 16 | |||
| 17 | 15 | static PetscErrorCode cutlr_(PetscBLASInt start,PetscBLASInt n,PetscBLASInt blkct, | |
| 18 | PetscBLASInt *bsizes,PetscBLASInt *ranks,PetscBLASInt *cut, | ||
| 19 | PetscBLASInt *lsum,PetscBLASInt *lblks,PetscBLASInt *info) | ||
| 20 | { | ||
| 21 | /* -- Routine written in LAPACK Version 3.0 style -- */ | ||
| 22 | /* *************************************************** */ | ||
| 23 | /* Written by */ | ||
| 24 | /* Michael Moldaschl and Wilfried Gansterer */ | ||
| 25 | /* University of Vienna */ | ||
| 26 | /* last modification: March 16, 2014 */ | ||
| 27 | |||
| 28 | /* Small adaptations of original code written by */ | ||
| 29 | /* Wilfried Gansterer and Bob Ward, */ | ||
| 30 | /* Department of Computer Science, University of Tennessee */ | ||
| 31 | /* see https://doi.org/10.1137/S1064827501399432 */ | ||
| 32 | /* *************************************************** */ | ||
| 33 | |||
| 34 | /* Purpose */ | ||
| 35 | /* ======= */ | ||
| 36 | |||
| 37 | /* CUTLR computes the optimal cut in a sequence of BLKCT neighboring */ | ||
| 38 | /* blocks whose sizes are given by the array BSIZES. */ | ||
| 39 | /* The sum of all block sizes in the sequence considered is given by N. */ | ||
| 40 | /* The cut is optimal in the sense that the difference of the sizes of */ | ||
| 41 | /* the resulting two halves is minimum over all cuts with minimum ranks */ | ||
| 42 | /* between blocks of the sequence considered. */ | ||
| 43 | |||
| 44 | /* Arguments */ | ||
| 45 | /* ========= */ | ||
| 46 | |||
| 47 | /* START (input) INTEGER */ | ||
| 48 | /* In the original array KSIZES of the calling routine DIBTDC, */ | ||
| 49 | /* the position where the sequence considered in this routine starts. */ | ||
| 50 | /* START >= 1. */ | ||
| 51 | |||
| 52 | /* N (input) INTEGER */ | ||
| 53 | /* The sum of all the block sizes of the sequence to be cut = */ | ||
| 54 | /* = sum_{i=1}^{BLKCT} BSIZES(I). */ | ||
| 55 | /* N >= 3. */ | ||
| 56 | |||
| 57 | /* BLKCT (input) INTEGER */ | ||
| 58 | /* The number of blocks in the sequence to be cut. */ | ||
| 59 | /* BLKCT >= 3. */ | ||
| 60 | |||
| 61 | /* BSIZES (input) INTEGER array, dimension (BLKCT) */ | ||
| 62 | /* The dimensions of the (quadratic) blocks of the sequence to be */ | ||
| 63 | /* cut. sum_{i=1}^{BLKCT} BSIZES(I) = N. */ | ||
| 64 | |||
| 65 | /* RANKS (input) INTEGER array, dimension (BLKCT-1) */ | ||
| 66 | /* The ranks determining the approximations of the off-diagonal */ | ||
| 67 | /* blocks in the sequence considered. */ | ||
| 68 | |||
| 69 | /* CUT (output) INTEGER */ | ||
| 70 | /* After the optimum cut has been determined, the position (in the */ | ||
| 71 | /* overall problem as worked on in DIBTDC !) of the last block in */ | ||
| 72 | /* the first half of the sequence to be cut. */ | ||
| 73 | /* START <= CUT <= START+BLKCT-2. */ | ||
| 74 | |||
| 75 | /* LSUM (output) INTEGER */ | ||
| 76 | /* After the optimum cut has been determined, the sum of the */ | ||
| 77 | /* block sizes in the first half of the sequence to be cut. */ | ||
| 78 | /* LSUM < N. */ | ||
| 79 | |||
| 80 | /* LBLKS (output) INTEGER */ | ||
| 81 | /* After the optimum cut has been determined, the number of the */ | ||
| 82 | /* blocks in the first half of the sequence to be cut. */ | ||
| 83 | /* 1 <= LBLKS < BLKCT. */ | ||
| 84 | |||
| 85 | /* INFO (output) INTEGER */ | ||
| 86 | /* = 0: successful exit. */ | ||
| 87 | /* < 0: illegal arguments. */ | ||
| 88 | /* if INFO = -i, the i-th (input) argument had an illegal */ | ||
| 89 | /* value. */ | ||
| 90 | /* > 0: illegal results. */ | ||
| 91 | /* if INFO = i, the i-th (output) argument had an illegal */ | ||
| 92 | /* value. */ | ||
| 93 | |||
| 94 | /* Further Details */ | ||
| 95 | /* =============== */ | ||
| 96 | |||
| 97 | /* Based on code written by */ | ||
| 98 | /* Wilfried Gansterer and Bob Ward, */ | ||
| 99 | /* Department of Computer Science, University of Tennessee */ | ||
| 100 | |||
| 101 | /* ===================================================================== */ | ||
| 102 | |||
| 103 | 15 | PetscBLASInt i, ksk, kchk, ksum, nhalf, deviat, mindev, minrnk, tmpsum; | |
| 104 | |||
| 105 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
15 | PetscFunctionBegin; |
| 106 | 15 | *info = 0; | |
| 107 | 15 | *lblks = 1; | |
| 108 | 15 | *lsum = 1; | |
| 109 | 15 | *cut = start; | |
| 110 | |||
| 111 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
15 | if (start < 1) *info = -1; |
| 112 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
15 | else if (n < 3) *info = -2; |
| 113 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
15 | else if (blkct < 3) *info = -3; |
| 114 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
15 | if (*info == 0) { |
| 115 | ksum = 0; | ||
| 116 | kchk = 0; | ||
| 117 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
95 | for (i = 0; i < blkct; ++i) { |
| 118 | 80 | ksk = bsizes[i]; | |
| 119 | 80 | ksum += ksk; | |
| 120 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
80 | if (ksk < 1) kchk = 1; |
| 121 | } | ||
| 122 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
15 | if (ksum != n || kchk == 1) *info = -4; |
| 123 | } | ||
| 124 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
15 | PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %" PetscBLASInt_FMT " in CUTLR",-(*info)); |
| 125 | |||
| 126 | /* determine smallest rank in the range considered */ | ||
| 127 | |||
| 128 | minrnk = n; | ||
| 129 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
80 | for (i = 0; i < blkct-1; ++i) { |
| 130 | 65 | if (ranks[i] < minrnk) minrnk = ranks[i]; | |
| 131 | } | ||
| 132 | |||
| 133 | /* determine best cut among those with smallest rank */ | ||
| 134 | |||
| 135 | 15 | nhalf = n / 2; | |
| 136 | 15 | tmpsum = 0; | |
| 137 | 15 | mindev = n; | |
| 138 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
95 | for (i = 0; i < blkct; ++i) { |
| 139 | 80 | tmpsum += bsizes[i]; | |
| 140 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
80 | if (ranks[i] == minrnk) { |
| 141 | |||
| 142 | /* determine deviation from "optimal" cut NHALF */ | ||
| 143 | |||
| 144 | 70 | deviat = tmpsum - nhalf; | |
| 145 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
70 | if (deviat<0) deviat = -deviat; |
| 146 | |||
| 147 | /* compare to best deviation so far */ | ||
| 148 | |||
| 149 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
70 | if (deviat < mindev) { |
| 150 | 40 | mindev = deviat; | |
| 151 | 40 | *cut = start + i; | |
| 152 | 40 | *lblks = i + 1; | |
| 153 | 40 | *lsum = tmpsum; | |
| 154 | } | ||
| 155 | } | ||
| 156 | } | ||
| 157 | |||
| 158 |
2/4✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 5 times.
|
15 | if (*cut < start || *cut >= start + blkct - 1) *info = 6; |
| 159 |
2/4✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 5 times.
|
15 | else if (*lsum < 1 || *lsum >= n) *info = 7; |
| 160 |
2/4✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 5 times.
|
15 | else if (*lblks < 1 || *lblks >= blkct) *info = 8; |
| 161 |
6/12✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
|
3 | PetscFunctionReturn(PETSC_SUCCESS); |
| 162 | } | ||
| 163 | |||
| 164 | 5 | PetscErrorCode BDC_dibtdc_(const char *jobz,PetscBLASInt n,PetscBLASInt nblks, | |
| 165 | PetscBLASInt *ksizes,PetscReal *d,PetscBLASInt l1d,PetscBLASInt l2d, | ||
| 166 | PetscReal *e,PetscBLASInt *rank,PetscBLASInt l1e,PetscBLASInt l2e, | ||
| 167 | PetscReal tol,PetscReal *ev,PetscReal *z,PetscBLASInt ldz,PetscReal *work, | ||
| 168 | PetscBLASInt lwork,PetscBLASInt *iwork,PetscBLASInt liwork, | ||
| 169 | PetscBLASInt *info,PetscBLASInt jobz_len) | ||
| 170 | { | ||
| 171 | /* -- Routine written in LAPACK Version 3.0 style -- */ | ||
| 172 | /* *************************************************** */ | ||
| 173 | /* Written by */ | ||
| 174 | /* Michael Moldaschl and Wilfried Gansterer */ | ||
| 175 | /* University of Vienna */ | ||
| 176 | /* last modification: March 16, 2014 */ | ||
| 177 | |||
| 178 | /* Small adaptations of original code written by */ | ||
| 179 | /* Wilfried Gansterer and Bob Ward, */ | ||
| 180 | /* Department of Computer Science, University of Tennessee */ | ||
| 181 | /* see https://doi.org/10.1137/S1064827501399432 */ | ||
| 182 | /* *************************************************** */ | ||
| 183 | |||
| 184 | /* Purpose */ | ||
| 185 | /* ======= */ | ||
| 186 | |||
| 187 | /* DIBTDC computes all eigenvalues and corresponding eigenvectors of a */ | ||
| 188 | /* symmetric irreducible block tridiagonal matrix with rank RANK matrices */ | ||
| 189 | /* as the subdiagonal blocks using a block divide and conquer method. */ | ||
| 190 | |||
| 191 | /* Arguments */ | ||
| 192 | /* ========= */ | ||
| 193 | |||
| 194 | /* JOBZ (input) CHARACTER*1 */ | ||
| 195 | /* = 'N': Compute eigenvalues only (not implemented); */ | ||
| 196 | /* = 'D': Compute eigenvalues and eigenvectors. */ | ||
| 197 | /* Eigenvectors are accumulated in the */ | ||
| 198 | /* divide-and-conquer process. */ | ||
| 199 | |||
| 200 | /* N (input) INTEGER */ | ||
| 201 | /* The dimension of the symmetric irreducible block tridiagonal */ | ||
| 202 | /* matrix. N >= 2. */ | ||
| 203 | |||
| 204 | /* NBLKS (input) INTEGER, 2 <= NBLKS <= N */ | ||
| 205 | /* The number of diagonal blocks in the matrix. */ | ||
| 206 | |||
| 207 | /* KSIZES (input) INTEGER array, dimension (NBLKS) */ | ||
| 208 | /* The dimension of the square diagonal blocks from top left */ | ||
| 209 | /* to bottom right. KSIZES(I) >= 1 for all I, and the sum of */ | ||
| 210 | /* KSIZES(I) for I = 1 to NBLKS has to be equal to N. */ | ||
| 211 | |||
| 212 | /* D (input) DOUBLE PRECISION array, dimension (L1D,L2D,NBLKS) */ | ||
| 213 | /* The lower triangular elements of the symmetric diagonal */ | ||
| 214 | /* blocks of the block tridiagonal matrix. Elements of the top */ | ||
| 215 | /* left diagonal block, which is of dimension KSIZES(1), are */ | ||
| 216 | /* contained in D(*,*,1); the elements of the next diagonal */ | ||
| 217 | /* block, which is of dimension KSIZES(2), are contained in */ | ||
| 218 | /* D(*,*,2); etc. */ | ||
| 219 | |||
| 220 | /* L1D (input) INTEGER */ | ||
| 221 | /* The leading dimension of the array D. L1D >= max(3,KMAX), */ | ||
| 222 | /* where KMAX is the dimension of the largest diagonal block. */ | ||
| 223 | |||
| 224 | /* L2D (input) INTEGER */ | ||
| 225 | /* The second dimension of the array D. L2D >= max(3,KMAX), */ | ||
| 226 | /* where KMAX is as stated in L1D above. */ | ||
| 227 | |||
| 228 | /* E (input) DOUBLE PRECISION array, dimension (L1E,L2E,NBLKS-1) */ | ||
| 229 | /* Contains the elements of the scalars (singular values) and */ | ||
| 230 | /* vectors (singular vectors) defining the rank RANK subdiagonal */ | ||
| 231 | /* blocks of the matrix. */ | ||
| 232 | /* E(1:RANK(K),RANK(K)+1,K) holds the RANK(K) scalars, */ | ||
| 233 | /* E(:,1:RANK(K),K) holds the RANK(K) column vectors, and */ | ||
| 234 | /* E(:,RANK(K)+2:2*RANK(K)+1,K) holds the row vectors for the K-th */ | ||
| 235 | /* subdiagonal block. */ | ||
| 236 | |||
| 237 | /* RANK (input) INTEGER array, dimension (NBLKS-1). */ | ||
| 238 | /* The ranks of all the subdiagonal blocks contained in the array E. */ | ||
| 239 | /* RANK(K) <= MIN(KSIZES(K), KSIZES(K+1)) */ | ||
| 240 | |||
| 241 | /* L1E (input) INTEGER */ | ||
| 242 | /* The leading dimension of the array E. L1E >= max(3,2*KMAX+1), */ | ||
| 243 | /* where KMAX is as stated in L1D above. */ | ||
| 244 | |||
| 245 | /* L2E (input) INTEGER */ | ||
| 246 | /* The second dimension of the array E. L2E >= max(3,2*KMAX+1), */ | ||
| 247 | /* where KMAX is as stated in L1D above. */ | ||
| 248 | |||
| 249 | /* TOL (input) DOUBLE PRECISION, TOL <= 1.0D-1 */ | ||
| 250 | /* User specified deflation tolerance for the routine DMERG2. */ | ||
| 251 | /* If (1.0D-1 >= TOL >= 20*EPS) then TOL is used as */ | ||
| 252 | /* the deflation tolerance in DSRTDF. */ | ||
| 253 | /* If (TOL < 20*EPS) then the standard deflation tolerance from */ | ||
| 254 | /* LAPACK is used as the deflation tolerance in DSRTDF. */ | ||
| 255 | |||
| 256 | /* EV (output) DOUBLE PRECISION array, dimension (N) */ | ||
| 257 | /* If INFO = 0, then EV contains the eigenvalues of the */ | ||
| 258 | /* symmetric block tridiagonal matrix in ascending order. */ | ||
| 259 | |||
| 260 | /* Z (input/output) DOUBLE PRECISION array, dimension (LDZ, N) */ | ||
| 261 | /* On entry, Z will be the identity matrix. */ | ||
| 262 | /* On exit, Z contains the eigenvectors of the block tridiagonal */ | ||
| 263 | /* matrix. */ | ||
| 264 | |||
| 265 | /* LDZ (input) INTEGER */ | ||
| 266 | /* The leading dimension of the array Z. LDZ >= max(1,N). */ | ||
| 267 | |||
| 268 | /* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK) */ | ||
| 269 | |||
| 270 | /* LWORK (input) INTEGER */ | ||
| 271 | /* The dimension of the array WORK. */ | ||
| 272 | /* In order to guarantee correct results in all cases, */ | ||
| 273 | /* LWORK must be at least (2*N**2 + 3*N). In many cases, */ | ||
| 274 | /* less workspace is required. The absolute minimum required is */ | ||
| 275 | /* (N**2 + 3*N). */ | ||
| 276 | /* If the workspace provided is not sufficient, the routine will */ | ||
| 277 | /* return a corresponding error code and report how much workspace */ | ||
| 278 | /* was missing (see INFO). */ | ||
| 279 | |||
| 280 | /* IWORK (workspace) INTEGER array, dimension (LIWORK) */ | ||
| 281 | |||
| 282 | /* LIWORK (input) INTEGER */ | ||
| 283 | /* The dimension of the array IWORK. */ | ||
| 284 | /* LIWORK must be at least (5*N + 3 + 4*NBLKS - 4): */ | ||
| 285 | /* 5*KMAX+3 for DSYEVD, 5*N for ????, */ | ||
| 286 | /* 4*NBLKS-4 for the preprocessing (merging order) */ | ||
| 287 | /* Summarizing, the minimum integer workspace needed is */ | ||
| 288 | /* MAX(5*N, 5*KMAX + 3) + 4*NBLKS - 4 */ | ||
| 289 | |||
| 290 | /* INFO (output) INTEGER */ | ||
| 291 | /* = 0: successful exit. */ | ||
| 292 | /* < 0, > -99: illegal arguments. */ | ||
| 293 | /* if INFO = -i, the i-th argument had an illegal value. */ | ||
| 294 | /* = -99: error in the preprocessing (call of CUTLR). */ | ||
| 295 | /* < -200: not enough workspace. Space for ABS(INFO + 200) */ | ||
| 296 | /* numbers is required in addition to the workspace provided, */ | ||
| 297 | /* otherwise some eigenvectors will be incorrect. */ | ||
| 298 | /* > 0: The algorithm failed to compute an eigenvalue while */ | ||
| 299 | /* working on the submatrix lying in rows and columns */ | ||
| 300 | /* INFO/(N+1) through mod(INFO,N+1). */ | ||
| 301 | |||
| 302 | /* Further Details */ | ||
| 303 | /* =============== */ | ||
| 304 | |||
| 305 | /* Based on code written by */ | ||
| 306 | /* Wilfried Gansterer and Bob Ward, */ | ||
| 307 | /* Department of Computer Science, University of Tennessee */ | ||
| 308 | |||
| 309 | /* This routine is comparable to Dlaed0.f from LAPACK. */ | ||
| 310 | |||
| 311 | /* ===================================================================== */ | ||
| 312 | |||
| 313 | 5 | PetscBLASInt i, j, k, np, rp1, ksk, one=1; | |
| 314 | 5 | PetscBLASInt cut, mat1, kchk, kbrk, blks, kmax, icut, size, ksum, lsum; | |
| 315 | 5 | PetscBLASInt lblks, rblks, isize, lwmin, ilsum; | |
| 316 | 5 | PetscBLASInt start, istck1, istck2, istck3, merged; | |
| 317 | 5 | PetscBLASInt liwmin, matsiz, startp, istrtp; | |
| 318 | 5 | PetscReal rho, done=1.0, dmone=-1.0; | |
| 319 | |||
| 320 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
5 | PetscFunctionBegin; |
| 321 | 5 | *info = 0; | |
| 322 | |||
| 323 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | if (*(unsigned char *)jobz != 'N' && *(unsigned char *)jobz != 'D') *info = -1; |
| 324 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | else if (n < 2) *info = -2; |
| 325 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | else if (nblks < 2 || nblks > n) *info = -3; |
| 326 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
5 | if (*info == 0) { |
| 327 | ksum = 0; | ||
| 328 | kmax = 0; | ||
| 329 | kchk = 0; | ||
| 330 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
45 | for (k = 0; k < nblks; ++k) { |
| 331 | 40 | ksk = ksizes[k]; | |
| 332 | 40 | ksum += ksk; | |
| 333 | 40 | if (ksk > kmax) kmax = ksk; | |
| 334 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
40 | if (ksk < 1) kchk = 1; |
| 335 | } | ||
| 336 | 5 | lwmin = n*n + n * 3; | |
| 337 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | liwmin = PetscMax(n * 5,kmax * 5 + 3) + 4*nblks - 4; |
| 338 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | if (ksum != n || kchk == 1) *info = -4; |
| 339 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | else if (l1d < PetscMax(3,kmax)) *info = -6; |
| 340 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | else if (l2d < PetscMax(3,kmax)) *info = -7; |
| 341 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | else if (l1e < PetscMax(3,2*kmax + 1)) *info = -10; |
| 342 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | else if (l2e < PetscMax(3,2*kmax + 1)) *info = -11; |
| 343 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | else if (tol > .1) *info = -12; |
| 344 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | else if (ldz < PetscMax(1,n)) *info = -15; |
| 345 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | else if (lwork < lwmin) *info = -17; |
| 346 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | else if (liwork < liwmin) *info = -19; |
| 347 | } | ||
| 348 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
5 | if (*info == 0) { |
| 349 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
40 | for (k = 0; k < nblks-1; ++k) { |
| 350 |
2/4✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 5 times.
|
35 | if (rank[k] > PetscMin(ksizes[k],ksizes[k+1]) || rank[k] < 1) *info = -9; |
| 351 | } | ||
| 352 | } | ||
| 353 | |||
| 354 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
5 | PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %" PetscBLASInt_FMT " in DIBTDC",-(*info)); |
| 355 | |||
| 356 | /* **************************************************************************** */ | ||
| 357 | |||
| 358 | /* ...Preprocessing..................................................... */ | ||
| 359 | /* Determine the optimal order for merging the subblocks and how much */ | ||
| 360 | /* workspace will be needed for the merging (determined by the last */ | ||
| 361 | /* merge). Cutpoints for the merging operations are determined and stored */ | ||
| 362 | /* in reverse chronological order (starting with the final merging */ | ||
| 363 | /* operation). */ | ||
| 364 | |||
| 365 | /* integer workspace requirements for the preprocessing: */ | ||
| 366 | /* 4*(NBLKS-1) for merging history */ | ||
| 367 | /* at most 3*(NBLKS-1) for stack */ | ||
| 368 | |||
| 369 | 5 | start = 1; | |
| 370 | 5 | size = n; | |
| 371 | 5 | blks = nblks; | |
| 372 | 5 | merged = 0; | |
| 373 | 5 | k = 0; | |
| 374 | |||
| 375 | /* integer workspace used for the stack is not needed any more after the */ | ||
| 376 | /* preprocessing and therefore can use part of the 5*N */ | ||
| 377 | /* integer workspace needed later on in the code */ | ||
| 378 | |||
| 379 | 5 | istck1 = 0; | |
| 380 | 5 | istck2 = istck1 + nblks; | |
| 381 | 5 | istck3 = istck2 + nblks; | |
| 382 | |||
| 383 | /* integer workspace used for storing the order of merges starts AFTER */ | ||
| 384 | /* the integer workspace 5*N+3 which is needed later on in the code */ | ||
| 385 | /* (5*KMAX+3 for DSYEVD, 4*N in DMERG2) */ | ||
| 386 | |||
| 387 | 5 | istrtp = n * 5 + 4; | |
| 388 | 5 | icut = istrtp + nblks - 1; | |
| 389 | 5 | isize = icut + nblks - 1; | |
| 390 | 5 | ilsum = isize + nblks - 1; | |
| 391 | |||
| 392 | 10 | L200: | |
| 393 | |||
| 394 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
15 | if (nblks >= 3) { |
| 395 | |||
| 396 | /* Determine the cut point. Note that in the routine CUTLR it is */ | ||
| 397 | /* chosen such that it yields the best balanced merging operation */ | ||
| 398 | /* among all the rank modifications with minimum rank. */ | ||
| 399 | |||
| 400 |
4/6✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 1 times.
|
15 | PetscCall(cutlr_(start, size, blks, &ksizes[start-1], &rank[start-1], &cut, &lsum, &lblks, info)); |
| 401 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
15 | PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_PLIB,"dibtdc: Error in cutlr, info = %" PetscBLASInt_FMT,*info); |
| 402 | |||
| 403 | } else { | ||
| 404 | ✗ | cut = 1; | |
| 405 | ✗ | lsum = ksizes[0]; | |
| 406 | ✗ | lblks = 1; | |
| 407 | } | ||
| 408 | |||
| 409 | 15 | ++merged; | |
| 410 | 15 | startp = 0; | |
| 411 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
35 | for (i = 0; i < start-1; ++i) startp += ksizes[i]; |
| 412 | 15 | iwork[istrtp + (nblks - 1) - merged-1] = startp + 1; | |
| 413 | 15 | iwork[icut + (nblks - 1) - merged-1] = cut; | |
| 414 | 15 | iwork[isize + (nblks - 1) - merged-1] = size; | |
| 415 | 15 | iwork[ilsum + (nblks - 1) - merged-1] = lsum; | |
| 416 | |||
| 417 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
15 | if (lblks == 2) { |
| 418 | |||
| 419 | /* one merge in left branch, left branch done */ | ||
| 420 | 10 | ++merged; | |
| 421 | 10 | iwork[istrtp + (nblks - 1) - merged-1] = startp + 1; | |
| 422 | 10 | iwork[icut + (nblks - 1) - merged-1] = start; | |
| 423 | 10 | iwork[isize + (nblks - 1) - merged-1] = lsum; | |
| 424 | 10 | iwork[ilsum + (nblks - 1) - merged-1] = ksizes[start-1]; | |
| 425 | } | ||
| 426 | |||
| 427 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
15 | if (lblks == 1 || lblks == 2) { |
| 428 | |||
| 429 | /* left branch done, continue on the right side */ | ||
| 430 | 10 | start += lblks; | |
| 431 | 10 | size -= lsum; | |
| 432 | 10 | blks -= lblks; | |
| 433 | |||
| 434 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
10 | PetscCheck(blks>0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"dibtdc: Error in preprocessing, blks = %" PetscBLASInt_FMT,blks); |
| 435 | |||
| 436 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
10 | if (blks == 2) { |
| 437 | |||
| 438 | /* one merge in right branch, right branch done */ | ||
| 439 | 10 | ++merged; | |
| 440 | 10 | startp += lsum; | |
| 441 | 10 | iwork[istrtp + (nblks - 1) - merged-1] = startp + 1; | |
| 442 | 10 | iwork[icut + (nblks - 1) - merged-1] = start; | |
| 443 | 10 | iwork[isize + (nblks - 1) - merged-1] = size; | |
| 444 | 10 | iwork[ilsum + (nblks - 1) - merged-1] = ksizes[start-1]; | |
| 445 | } | ||
| 446 | |||
| 447 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
10 | if (blks == 1 || blks == 2) { |
| 448 | |||
| 449 | /* get the next subproblem from the stack or finished */ | ||
| 450 | |||
| 451 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (k >= 1) { |
| 452 | |||
| 453 | /* something left on the stack */ | ||
| 454 | 5 | start = iwork[istck1 + k-1]; | |
| 455 | 5 | size = iwork[istck2 + k-1]; | |
| 456 | 5 | blks = iwork[istck3 + k-1]; | |
| 457 | 5 | --k; | |
| 458 | 5 | goto L200; | |
| 459 | } else { | ||
| 460 | |||
| 461 | /* nothing left on the stack */ | ||
| 462 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
5 | PetscCheck(merged==nblks-1,PETSC_COMM_SELF,PETSC_ERR_PLIB,"ERROR in preprocessing - not enough merges performed"); |
| 463 | |||
| 464 | /* exit preprocessing */ | ||
| 465 | |||
| 466 | } | ||
| 467 | } else { | ||
| 468 | |||
| 469 | /* BLKS.GE.3, and therefore analyze the right side */ | ||
| 470 | |||
| 471 | ✗ | goto L200; | |
| 472 | } | ||
| 473 | } else { | ||
| 474 | |||
| 475 | /* LBLKS.GE.3, and therefore check the right side and */ | ||
| 476 | /* put it on the stack if required */ | ||
| 477 | |||
| 478 | 5 | rblks = blks - lblks; | |
| 479 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
5 | if (rblks >= 3) { |
| 480 | 5 | ++k; | |
| 481 | 5 | iwork[istck1 + k-1] = cut + 1; | |
| 482 | 5 | iwork[istck2 + k-1] = size - lsum; | |
| 483 | 5 | iwork[istck3 + k-1] = rblks; | |
| 484 | ✗ | } else if (rblks == 2) { | |
| 485 | |||
| 486 | /* one merge in right branch, right branch done */ | ||
| 487 | /* (note that nothing needs to be done if RBLKS.EQ.1 !) */ | ||
| 488 | |||
| 489 | ✗ | ++merged; | |
| 490 | ✗ | startp += lsum; | |
| 491 | ✗ | iwork[istrtp + (nblks - 1) - merged-1] = startp + 1; | |
| 492 | ✗ | iwork[icut + (nblks - 1) - merged-1] = start + lblks; | |
| 493 | ✗ | iwork[isize + (nblks - 1) - merged-1] = size - lsum; | |
| 494 | ✗ | iwork[ilsum + (nblks - 1) - merged-1] = ksizes[start + lblks-1]; | |
| 495 | } | ||
| 496 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
5 | PetscCheck(rblks>0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"dibtdc: ERROR in preprocessing - rblks = %" PetscBLASInt_FMT,rblks); |
| 497 | |||
| 498 | /* continue on the left side */ | ||
| 499 | |||
| 500 | 5 | size = lsum; | |
| 501 | 5 | blks = lblks; | |
| 502 | 5 | goto L200; | |
| 503 | } | ||
| 504 | |||
| 505 | /* SIZE = IWORK(ISIZE+NBLKS-2) */ | ||
| 506 | /* MAT1 = IWORK(ILSUM+NBLKS-2) */ | ||
| 507 | |||
| 508 | /* Note: after the dimensions SIZE and MAT1 of the last merging */ | ||
| 509 | /* operation have been determined, an upper bound for the workspace */ | ||
| 510 | /* requirements which is independent of how much deflation occurs in */ | ||
| 511 | /* the last merging operation could be determined as follows */ | ||
| 512 | /* (based on (3.15) and (3.19) from UT-CS-00-447): */ | ||
| 513 | |||
| 514 | /* IF(MAT1.LE.N/2) THEN */ | ||
| 515 | /* WSPREQ = 3*N + 3/2*(SIZE-MAT1)**2 + N*N/2 + MAT1*MAT1 */ | ||
| 516 | /* ELSE */ | ||
| 517 | /* WSPREQ = 3*N + 3/2*MAT1*MAT1 + N*N/2 + (SIZE-MAT1)**2 */ | ||
| 518 | /* END IF */ | ||
| 519 | |||
| 520 | /* IF(LWORK-WSPREQ.LT.0)THEN */ | ||
| 521 | /* not enough work space provided */ | ||
| 522 | /* INFO = -200 - (WSPREQ-LWORK) */ | ||
| 523 | /* RETURN */ | ||
| 524 | /* END IF */ | ||
| 525 | /* However, this is not really useful, since the actual check whether */ | ||
| 526 | /* enough workspace is provided happens in DMERG2.f ! */ | ||
| 527 | |||
| 528 | /* ************************************************************************* */ | ||
| 529 | |||
| 530 | /* ...Solve subproblems................................... */ | ||
| 531 | |||
| 532 | /* Divide the matrix into NBLKS submatrices using rank-r */ | ||
| 533 | /* modifications (cuts) and solve for their eigenvalues and */ | ||
| 534 | /* eigenvectors. Initialize index array to sort eigenvalues. */ | ||
| 535 | |||
| 536 | /* first block: ...................................... */ | ||
| 537 | |||
| 538 | /* correction for block 1: D1 - V1 \Sigma1 V1^T */ | ||
| 539 | |||
| 540 | 5 | ksk = ksizes[0]; | |
| 541 | 5 | rp1 = rank[0]; | |
| 542 | |||
| 543 | /* initialize the proper part of Z with the diagonal block D1 */ | ||
| 544 | /* (the correction will be made in Z and then the call of DSYEVD will */ | ||
| 545 | /* overwrite it with the eigenvectors) */ | ||
| 546 | |||
| 547 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
|
50 | for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) z[i+j*ldz] = d[i+j*l1d]; |
| 548 | |||
| 549 | /* copy D1 into WORK (in order to be able to restore it afterwards) */ | ||
| 550 | |||
| 551 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
|
50 | for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) work[i+j*ksk] = d[i+j*l1d]; |
| 552 | |||
| 553 | /* copy V1 into the first RANK(1) columns of D1 and then */ | ||
| 554 | /* multiply with \Sigma1 */ | ||
| 555 | |||
| 556 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
20 | for (i = 0; i < rank[0]; ++i) { |
| 557 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
15 | PetscCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(rp1 + i+1)*l1e], &one, &d[i*l1d], &one)); |
| 558 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
15 | PetscCallBLAS("BLASscal",BLASscal_(&ksk, &e[i + rp1*l1e], &d[i*l1d], &one)); |
| 559 | } | ||
| 560 | |||
| 561 | /* multiply the first RANK(1) columns of D1 with V1^T and */ | ||
| 562 | /* subtract the result from the proper part of Z (previously */ | ||
| 563 | /* initialized with D1) */ | ||
| 564 | |||
| 565 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
5 | PetscCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, rank, &dmone, |
| 566 | d, &l1d, &e[(rank[0]+1)*l1e], &l1e, &done, z, &ldz)); | ||
| 567 | |||
| 568 | /* restore the original D1 from WORK */ | ||
| 569 | |||
| 570 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
|
50 | for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) d[i+j*l1d] = work[i+j*ksk]; |
| 571 | |||
| 572 | /* eigenanalysis of block 1 (using DSYEVD) */ | ||
| 573 | |||
| 574 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
5 | PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V", "L", &ksk, z, &ldz, ev, work, &lwork, info)); |
| 575 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
5 | SlepcCheckLapackInfo("syev",*info); |
| 576 | |||
| 577 | /* EV(1:) contains the eigenvalues in ascending order */ | ||
| 578 | /* (they are returned this way by DSYEVD) */ | ||
| 579 | |||
| 580 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
20 | for (i = 0; i < ksk; ++i) iwork[i] = i+1; |
| 581 | |||
| 582 | /* intermediate blocks: .............................. */ | ||
| 583 | |||
| 584 | 5 | np = ksk; | |
| 585 | |||
| 586 | /* remaining number of blocks */ | ||
| 587 | |||
| 588 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
5 | if (nblks > 2) { |
| 589 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
35 | for (k = 1; k < nblks-1; ++k) { |
| 590 | |||
| 591 | /* correction for block K: */ | ||
| 592 | /* Dk - U(k-1) \Sigma(k-1) U(k-1)^T - Vk \Sigmak Vk^T */ | ||
| 593 | |||
| 594 | 30 | ksk = ksizes[k]; | |
| 595 | 30 | rp1 = rank[k]; | |
| 596 | |||
| 597 | /* initialize the proper part of Z with the diagonal block Dk */ | ||
| 598 | /* (the correction will be made in Z and then the call of DSYEVD will */ | ||
| 599 | /* overwrite it with the eigenvectors) */ | ||
| 600 | |||
| 601 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
|
300 | for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) z[np+i+(np+j)*ldz] = d[i+(j+k*l2d)*l1d]; |
| 602 | |||
| 603 | /* copy Dk into WORK (in order to be able to restore it afterwards) */ | ||
| 604 | |||
| 605 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
|
300 | for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) work[i+j*ksk] = d[i+(j+k*l2d)*l1d]; |
| 606 | |||
| 607 | /* copy U(K-1) into the first RANK(K-1) columns of Dk and then */ | ||
| 608 | /* multiply with \Sigma(K-1) */ | ||
| 609 | |||
| 610 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
120 | for (i = 0; i < rank[k-1]; ++i) { |
| 611 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
90 | PetscCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(i+(k-1)*l2e)*l1e], &one, &d[(i+k*l2d)*l1d], &one)); |
| 612 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
90 | PetscCallBLAS("BLASscal",BLASscal_(&ksk, &e[i+(rank[k-1]+(k-1)*l2e)*l1e], &d[(i+k*l2d)*l1d], &one)); |
| 613 | } | ||
| 614 | |||
| 615 | /* multiply the first RANK(K-1) columns of Dk with U(k-1)^T and */ | ||
| 616 | /* subtract the result from the proper part of Z (previously */ | ||
| 617 | /* initialized with Dk) */ | ||
| 618 | |||
| 619 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
30 | PetscCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, &rank[k-1], |
| 620 | &dmone, &d[k*l1d*l2d], | ||
| 621 | &l1d, &e[(k-1)*l1e*l2e], &l1e, &done, &z[np+np*ldz], &ldz)); | ||
| 622 | |||
| 623 | /* copy Vk into the first RANK(K) columns of Dk and then */ | ||
| 624 | /* multiply with \Sigmak */ | ||
| 625 | |||
| 626 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
120 | for (i = 0; i < rank[k]; ++i) { |
| 627 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
90 | PetscCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(rp1+i+1 + k*l2e)*l1e], &one, &d[(i + k*l2d)*l1d], &one)); |
| 628 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
90 | PetscCallBLAS("BLASscal",BLASscal_(&ksk, &e[i + (rp1 + k*l2e)*l1e], &d[(i + k*l2d)*l1d], &one)); |
| 629 | } | ||
| 630 | |||
| 631 | /* multiply the first RANK(K) columns of Dk with Vk^T and */ | ||
| 632 | /* subtract the result from the proper part of Z (previously */ | ||
| 633 | /* updated with [- U(k-1) \Sigma(k-1) U(k-1)^T]) */ | ||
| 634 | |||
| 635 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
30 | PetscCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, &rank[k], |
| 636 | &dmone, &d[k*l1d*l2d], &l1d, | ||
| 637 | &e[(rank[k]+1 + k*l2e)*l1e], &l1e, &done, &z[np+np*ldz], &ldz)); | ||
| 638 | |||
| 639 | /* restore the original Dk from WORK */ | ||
| 640 | |||
| 641 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
|
300 | for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) d[i+(j+k*l2d)*l1d] = work[i+j*ksk]; |
| 642 | |||
| 643 | /* eigenanalysis of block K (using dsyevd) */ | ||
| 644 | |||
| 645 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
30 | PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V", "L", &ksk, &z[np+np*ldz], |
| 646 | &ldz, &ev[np], work, &lwork, info)); | ||
| 647 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
30 | SlepcCheckLapackInfo("syev",*info); |
| 648 | |||
| 649 | /* EV(NPP1:) contains the eigenvalues in ascending order */ | ||
| 650 | /* (they are returned this way by DSYEVD) */ | ||
| 651 | |||
| 652 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
120 | for (i = 0; i < ksk; ++i) iwork[np + i] = i+1; |
| 653 | |||
| 654 | /* update NP */ | ||
| 655 | 30 | np += ksk; | |
| 656 | } | ||
| 657 | } | ||
| 658 | |||
| 659 | /* last block: ....................................... */ | ||
| 660 | |||
| 661 | /* correction for block NBLKS: */ | ||
| 662 | /* D(nblks) - U(nblks-1) \Sigma(nblks-1) U(nblks-1)^T */ | ||
| 663 | |||
| 664 | 5 | ksk = ksizes[nblks-1]; | |
| 665 | |||
| 666 | /* initialize the proper part of Z with the diagonal block D(nblks) */ | ||
| 667 | /* (the correction will be made in Z and then the call of DSYEVD will */ | ||
| 668 | /* overwrite it with the eigenvectors) */ | ||
| 669 | |||
| 670 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
|
50 | for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) z[np+i+(np+j)*ldz] = d[i+(j+(nblks-1)*l2d)*l1d]; |
| 671 | |||
| 672 | /* copy D(nblks) into WORK (in order to be able to restore it afterwards) */ | ||
| 673 | |||
| 674 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
|
50 | for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) work[i+j*ksk] = d[i+(j+(nblks-1)*l2d)*l1d]; |
| 675 | |||
| 676 | /* copy U(nblks-1) into the first RANK(nblks-1) columns of D(nblks) and then */ | ||
| 677 | /* multiply with \Sigma(nblks-1) */ | ||
| 678 | |||
| 679 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
20 | for (i = 0; i < rank[nblks-2]; ++i) { |
| 680 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
15 | PetscCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(i + (nblks-2)*l2e)*l1e], |
| 681 | &one, &d[(i + (nblks-1)*l2d)*l1d], &one)); | ||
| 682 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
15 | PetscCallBLAS("BLASscal",BLASscal_(&ksk, |
| 683 | &e[i + (rank[nblks-2] + (nblks-2)*l2e)*l1e], | ||
| 684 | &d[(i + (nblks-1)*l2d)*l1d], &one)); | ||
| 685 | } | ||
| 686 | |||
| 687 | /* multiply the first RANK(nblks-1) columns of D(nblks) with U(nblks-1)^T */ | ||
| 688 | /* and subtract the result from the proper part of Z (previously */ | ||
| 689 | /* initialized with D(nblks)) */ | ||
| 690 | |||
| 691 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
5 | PetscCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, &rank[nblks - 2], |
| 692 | &dmone, &d[(nblks-1)*l1d*l2d], &l1d, | ||
| 693 | &e[(nblks-2)*l1e*l2e], &l1e, &done, &z[np+np*ldz], &ldz)); | ||
| 694 | |||
| 695 | /* restore the original D(nblks) from WORK */ | ||
| 696 | |||
| 697 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
|
50 | for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) d[i+(j+(nblks-1)*l2d)*l1d] = work[i+j*ksk]; |
| 698 | |||
| 699 | /* eigenanalysis of block NBLKS (using dsyevd) */ | ||
| 700 | |||
| 701 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
5 | PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V", "L", &ksk, &z[np+np*ldz], &ldz, &ev[np], work, &lwork, info)); |
| 702 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
5 | SlepcCheckLapackInfo("syev",*info); |
| 703 | |||
| 704 | /* EV(NPP1:) contains the eigenvalues in ascending order */ | ||
| 705 | /* (they are returned this way by DSYEVD) */ | ||
| 706 | |||
| 707 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
20 | for (i = 0; i < ksk; ++i) iwork[np + i] = i+1; |
| 708 | |||
| 709 | /* note that from here on the entire workspace is available again */ | ||
| 710 | |||
| 711 | /* Perform all the merging operations. */ | ||
| 712 | |||
| 713 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
40 | for (i = 0; i < nblks-1; ++i) { |
| 714 | |||
| 715 | /* MATSIZ = total size of the current rank RANK modification problem */ | ||
| 716 | |||
| 717 | 35 | matsiz = iwork[isize + i - 1]; | |
| 718 | 35 | np = iwork[istrtp + i - 1]; | |
| 719 | 35 | kbrk = iwork[icut + i - 1]; | |
| 720 | 35 | mat1 = iwork[ilsum + i - 1]; | |
| 721 | |||
| 722 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
140 | for (j = 0; j < rank[kbrk-1]; ++j) { |
| 723 | |||
| 724 | /* NOTE: The parameter RHO in DMERG2 is modified in DSRTDF */ | ||
| 725 | /* (multiplied by 2) ! In order not to change the */ | ||
| 726 | /* singular value stored in E(:, RANK(KBRK)+1, KBRK), */ | ||
| 727 | /* we do not pass on this variable as an argument to DMERG2, */ | ||
| 728 | /* but we assign a separate variable RHO here which is passed */ | ||
| 729 | /* on to DMERG2. */ | ||
| 730 | /* Alternative solution in F90: */ | ||
| 731 | /* pass E(:,RANK(KBRK)+1,KBRK) to an INTENT(IN) parameter */ | ||
| 732 | /* in DMERG2. */ | ||
| 733 | |||
| 734 | 105 | rho = e[j + (rank[kbrk-1] + (kbrk-1)*l2e)*l1e]; | |
| 735 | |||
| 736 | /* eigenvectors are accumulated (JOBZ.EQ.'D') */ | ||
| 737 | |||
| 738 |
4/6✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 1 times.
|
105 | PetscCall(BDC_dmerg2_(jobz, j+1, matsiz, &ev[np-1], &z[np-1+(np-1)*ldz], |
| 739 | ldz, &iwork[np-1], &rho, &e[(j + (kbrk-1)*l2e)*l1e], | ||
| 740 | ksizes[kbrk], &e[(rank[kbrk-1]+j+1 + (kbrk-1)*l2e)*l1e], | ||
| 741 | ksizes[kbrk-1], mat1, work, lwork, &iwork[n], tol, info, 1)); | ||
| 742 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
105 | PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_PLIB,"dibtdc: Error in dmerg2, info = %" PetscBLASInt_FMT,*info); |
| 743 | } | ||
| 744 | |||
| 745 | /* at this point all RANK(KBRK) rank-one modifications corresponding */ | ||
| 746 | /* to the current off-diagonal block are finished. */ | ||
| 747 | /* Move on to the next off-diagonal block. */ | ||
| 748 | |||
| 749 | } | ||
| 750 | |||
| 751 | /* Re-merge the eigenvalues/vectors which were deflated at the final */ | ||
| 752 | /* merging step by sorting all eigenvalues and eigenvectors according */ | ||
| 753 | /* to the permutation stored in IWORK. */ | ||
| 754 | |||
| 755 | /* copy eigenvalues and eigenvectors in ordered form into WORK */ | ||
| 756 | /* (eigenvalues into WORK(1:N), eigenvectors into WORK(N+1:N+1+N^2)) */ | ||
| 757 | |||
| 758 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
125 | for (i = 0; i < n; ++i) { |
| 759 | 120 | j = iwork[i]; | |
| 760 | 120 | work[i] = ev[j-1]; | |
| 761 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
120 | PetscCallBLAS("BLAScopy",BLAScopy_(&n, &z[(j-1)*ldz], &one, &work[n*(i+1)], &one)); |
| 762 | } | ||
| 763 | |||
| 764 | /* copy ordered eigenvalues back from WORK(1:N) into EV */ | ||
| 765 | |||
| 766 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
5 | PetscCallBLAS("BLAScopy",BLAScopy_(&n, work, &one, ev, &one)); |
| 767 | |||
| 768 | /* copy ordered eigenvectors back from WORK(N+1:N+1+N^2) into Z */ | ||
| 769 | |||
| 770 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
|
3005 | for (j=0;j<n;j++) for (i=0;i<n;i++) z[i+j*ldz] = work[i+(j+1)*n]; |
| 771 |
6/12✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
|
1 | PetscFunctionReturn(PETSC_SUCCESS); |
| 772 | } | ||
| 773 |