Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3 | SLEPc - Scalable Library for Eigenvalue Problem Computations | ||
4 | Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain | ||
5 | |||
6 | This file is part of SLEPc. | ||
7 | SLEPc is distributed under a 2-clause BSD license (see LICENSE). | ||
8 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
9 | */ | ||
10 | /* | ||
11 | BDC - Block-divide and conquer (see description in README file) | ||
12 | */ | ||
13 | |||
14 | #include <slepc/private/dsimpl.h> | ||
15 | #include <slepcblaslapack.h> | ||
16 | |||
17 | 15 | static PetscErrorCode cutlr_(PetscBLASInt start,PetscBLASInt n,PetscBLASInt blkct, | |
18 | PetscBLASInt *bsizes,PetscBLASInt *ranks,PetscBLASInt *cut, | ||
19 | PetscBLASInt *lsum,PetscBLASInt *lblks,PetscBLASInt *info) | ||
20 | { | ||
21 | /* -- Routine written in LAPACK Version 3.0 style -- */ | ||
22 | /* *************************************************** */ | ||
23 | /* Written by */ | ||
24 | /* Michael Moldaschl and Wilfried Gansterer */ | ||
25 | /* University of Vienna */ | ||
26 | /* last modification: March 16, 2014 */ | ||
27 | |||
28 | /* Small adaptations of original code written by */ | ||
29 | /* Wilfried Gansterer and Bob Ward, */ | ||
30 | /* Department of Computer Science, University of Tennessee */ | ||
31 | /* see https://doi.org/10.1137/S1064827501399432 */ | ||
32 | /* *************************************************** */ | ||
33 | |||
34 | /* Purpose */ | ||
35 | /* ======= */ | ||
36 | |||
37 | /* CUTLR computes the optimal cut in a sequence of BLKCT neighboring */ | ||
38 | /* blocks whose sizes are given by the array BSIZES. */ | ||
39 | /* The sum of all block sizes in the sequence considered is given by N. */ | ||
40 | /* The cut is optimal in the sense that the difference of the sizes of */ | ||
41 | /* the resulting two halves is minimum over all cuts with minimum ranks */ | ||
42 | /* between blocks of the sequence considered. */ | ||
43 | |||
44 | /* Arguments */ | ||
45 | /* ========= */ | ||
46 | |||
47 | /* START (input) INTEGER */ | ||
48 | /* In the original array KSIZES of the calling routine DIBTDC, */ | ||
49 | /* the position where the sequence considered in this routine starts. */ | ||
50 | /* START >= 1. */ | ||
51 | |||
52 | /* N (input) INTEGER */ | ||
53 | /* The sum of all the block sizes of the sequence to be cut = */ | ||
54 | /* = sum_{i=1}^{BLKCT} BSIZES(I). */ | ||
55 | /* N >= 3. */ | ||
56 | |||
57 | /* BLKCT (input) INTEGER */ | ||
58 | /* The number of blocks in the sequence to be cut. */ | ||
59 | /* BLKCT >= 3. */ | ||
60 | |||
61 | /* BSIZES (input) INTEGER array, dimension (BLKCT) */ | ||
62 | /* The dimensions of the (quadratic) blocks of the sequence to be */ | ||
63 | /* cut. sum_{i=1}^{BLKCT} BSIZES(I) = N. */ | ||
64 | |||
65 | /* RANKS (input) INTEGER array, dimension (BLKCT-1) */ | ||
66 | /* The ranks determining the approximations of the off-diagonal */ | ||
67 | /* blocks in the sequence considered. */ | ||
68 | |||
69 | /* CUT (output) INTEGER */ | ||
70 | /* After the optimum cut has been determined, the position (in the */ | ||
71 | /* overall problem as worked on in DIBTDC !) of the last block in */ | ||
72 | /* the first half of the sequence to be cut. */ | ||
73 | /* START <= CUT <= START+BLKCT-2. */ | ||
74 | |||
75 | /* LSUM (output) INTEGER */ | ||
76 | /* After the optimum cut has been determined, the sum of the */ | ||
77 | /* block sizes in the first half of the sequence to be cut. */ | ||
78 | /* LSUM < N. */ | ||
79 | |||
80 | /* LBLKS (output) INTEGER */ | ||
81 | /* After the optimum cut has been determined, the number of the */ | ||
82 | /* blocks in the first half of the sequence to be cut. */ | ||
83 | /* 1 <= LBLKS < BLKCT. */ | ||
84 | |||
85 | /* INFO (output) INTEGER */ | ||
86 | /* = 0: successful exit. */ | ||
87 | /* < 0: illegal arguments. */ | ||
88 | /* if INFO = -i, the i-th (input) argument had an illegal */ | ||
89 | /* value. */ | ||
90 | /* > 0: illegal results. */ | ||
91 | /* if INFO = i, the i-th (output) argument had an illegal */ | ||
92 | /* value. */ | ||
93 | |||
94 | /* Further Details */ | ||
95 | /* =============== */ | ||
96 | |||
97 | /* Based on code written by */ | ||
98 | /* Wilfried Gansterer and Bob Ward, */ | ||
99 | /* Department of Computer Science, University of Tennessee */ | ||
100 | |||
101 | /* ===================================================================== */ | ||
102 | |||
103 | 15 | PetscBLASInt i, ksk, kchk, ksum, nhalf, deviat, mindev, minrnk, tmpsum; | |
104 | |||
105 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
15 | PetscFunctionBegin; |
106 | 15 | *info = 0; | |
107 | 15 | *lblks = 1; | |
108 | 15 | *lsum = 1; | |
109 | 15 | *cut = start; | |
110 | |||
111 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
15 | if (start < 1) *info = -1; |
112 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
15 | else if (n < 3) *info = -2; |
113 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
15 | else if (blkct < 3) *info = -3; |
114 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
15 | if (*info == 0) { |
115 | ksum = 0; | ||
116 | kchk = 0; | ||
117 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
95 | for (i = 0; i < blkct; ++i) { |
118 | 80 | ksk = bsizes[i]; | |
119 | 80 | ksum += ksk; | |
120 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
80 | if (ksk < 1) kchk = 1; |
121 | } | ||
122 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
15 | if (ksum != n || kchk == 1) *info = -4; |
123 | } | ||
124 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
15 | PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %" PetscBLASInt_FMT " in CUTLR",-(*info)); |
125 | |||
126 | /* determine smallest rank in the range considered */ | ||
127 | |||
128 | minrnk = n; | ||
129 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
80 | for (i = 0; i < blkct-1; ++i) { |
130 | 65 | if (ranks[i] < minrnk) minrnk = ranks[i]; | |
131 | } | ||
132 | |||
133 | /* determine best cut among those with smallest rank */ | ||
134 | |||
135 | 15 | nhalf = n / 2; | |
136 | 15 | tmpsum = 0; | |
137 | 15 | mindev = n; | |
138 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
95 | for (i = 0; i < blkct; ++i) { |
139 | 80 | tmpsum += bsizes[i]; | |
140 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
80 | if (ranks[i] == minrnk) { |
141 | |||
142 | /* determine deviation from "optimal" cut NHALF */ | ||
143 | |||
144 | 70 | deviat = tmpsum - nhalf; | |
145 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
70 | if (deviat<0) deviat = -deviat; |
146 | |||
147 | /* compare to best deviation so far */ | ||
148 | |||
149 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
70 | if (deviat < mindev) { |
150 | 40 | mindev = deviat; | |
151 | 40 | *cut = start + i; | |
152 | 40 | *lblks = i + 1; | |
153 | 40 | *lsum = tmpsum; | |
154 | } | ||
155 | } | ||
156 | } | ||
157 | |||
158 |
2/4✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 5 times.
|
15 | if (*cut < start || *cut >= start + blkct - 1) *info = 6; |
159 |
2/4✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 5 times.
|
15 | else if (*lsum < 1 || *lsum >= n) *info = 7; |
160 |
2/4✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 5 times.
|
15 | else if (*lblks < 1 || *lblks >= blkct) *info = 8; |
161 |
6/12✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
|
3 | PetscFunctionReturn(PETSC_SUCCESS); |
162 | } | ||
163 | |||
164 | 5 | PetscErrorCode BDC_dibtdc_(const char *jobz,PetscBLASInt n,PetscBLASInt nblks, | |
165 | PetscBLASInt *ksizes,PetscReal *d,PetscBLASInt l1d,PetscBLASInt l2d, | ||
166 | PetscReal *e,PetscBLASInt *rank,PetscBLASInt l1e,PetscBLASInt l2e, | ||
167 | PetscReal tol,PetscReal *ev,PetscReal *z,PetscBLASInt ldz,PetscReal *work, | ||
168 | PetscBLASInt lwork,PetscBLASInt *iwork,PetscBLASInt liwork, | ||
169 | PetscBLASInt *info,PetscBLASInt jobz_len) | ||
170 | { | ||
171 | /* -- Routine written in LAPACK Version 3.0 style -- */ | ||
172 | /* *************************************************** */ | ||
173 | /* Written by */ | ||
174 | /* Michael Moldaschl and Wilfried Gansterer */ | ||
175 | /* University of Vienna */ | ||
176 | /* last modification: March 16, 2014 */ | ||
177 | |||
178 | /* Small adaptations of original code written by */ | ||
179 | /* Wilfried Gansterer and Bob Ward, */ | ||
180 | /* Department of Computer Science, University of Tennessee */ | ||
181 | /* see https://doi.org/10.1137/S1064827501399432 */ | ||
182 | /* *************************************************** */ | ||
183 | |||
184 | /* Purpose */ | ||
185 | /* ======= */ | ||
186 | |||
187 | /* DIBTDC computes all eigenvalues and corresponding eigenvectors of a */ | ||
188 | /* symmetric irreducible block tridiagonal matrix with rank RANK matrices */ | ||
189 | /* as the subdiagonal blocks using a block divide and conquer method. */ | ||
190 | |||
191 | /* Arguments */ | ||
192 | /* ========= */ | ||
193 | |||
194 | /* JOBZ (input) CHARACTER*1 */ | ||
195 | /* = 'N': Compute eigenvalues only (not implemented); */ | ||
196 | /* = 'D': Compute eigenvalues and eigenvectors. */ | ||
197 | /* Eigenvectors are accumulated in the */ | ||
198 | /* divide-and-conquer process. */ | ||
199 | |||
200 | /* N (input) INTEGER */ | ||
201 | /* The dimension of the symmetric irreducible block tridiagonal */ | ||
202 | /* matrix. N >= 2. */ | ||
203 | |||
204 | /* NBLKS (input) INTEGER, 2 <= NBLKS <= N */ | ||
205 | /* The number of diagonal blocks in the matrix. */ | ||
206 | |||
207 | /* KSIZES (input) INTEGER array, dimension (NBLKS) */ | ||
208 | /* The dimension of the square diagonal blocks from top left */ | ||
209 | /* to bottom right. KSIZES(I) >= 1 for all I, and the sum of */ | ||
210 | /* KSIZES(I) for I = 1 to NBLKS has to be equal to N. */ | ||
211 | |||
212 | /* D (input) DOUBLE PRECISION array, dimension (L1D,L2D,NBLKS) */ | ||
213 | /* The lower triangular elements of the symmetric diagonal */ | ||
214 | /* blocks of the block tridiagonal matrix. Elements of the top */ | ||
215 | /* left diagonal block, which is of dimension KSIZES(1), are */ | ||
216 | /* contained in D(*,*,1); the elements of the next diagonal */ | ||
217 | /* block, which is of dimension KSIZES(2), are contained in */ | ||
218 | /* D(*,*,2); etc. */ | ||
219 | |||
220 | /* L1D (input) INTEGER */ | ||
221 | /* The leading dimension of the array D. L1D >= max(3,KMAX), */ | ||
222 | /* where KMAX is the dimension of the largest diagonal block. */ | ||
223 | |||
224 | /* L2D (input) INTEGER */ | ||
225 | /* The second dimension of the array D. L2D >= max(3,KMAX), */ | ||
226 | /* where KMAX is as stated in L1D above. */ | ||
227 | |||
228 | /* E (input) DOUBLE PRECISION array, dimension (L1E,L2E,NBLKS-1) */ | ||
229 | /* Contains the elements of the scalars (singular values) and */ | ||
230 | /* vectors (singular vectors) defining the rank RANK subdiagonal */ | ||
231 | /* blocks of the matrix. */ | ||
232 | /* E(1:RANK(K),RANK(K)+1,K) holds the RANK(K) scalars, */ | ||
233 | /* E(:,1:RANK(K),K) holds the RANK(K) column vectors, and */ | ||
234 | /* E(:,RANK(K)+2:2*RANK(K)+1,K) holds the row vectors for the K-th */ | ||
235 | /* subdiagonal block. */ | ||
236 | |||
237 | /* RANK (input) INTEGER array, dimension (NBLKS-1). */ | ||
238 | /* The ranks of all the subdiagonal blocks contained in the array E. */ | ||
239 | /* RANK(K) <= MIN(KSIZES(K), KSIZES(K+1)) */ | ||
240 | |||
241 | /* L1E (input) INTEGER */ | ||
242 | /* The leading dimension of the array E. L1E >= max(3,2*KMAX+1), */ | ||
243 | /* where KMAX is as stated in L1D above. */ | ||
244 | |||
245 | /* L2E (input) INTEGER */ | ||
246 | /* The second dimension of the array E. L2E >= max(3,2*KMAX+1), */ | ||
247 | /* where KMAX is as stated in L1D above. */ | ||
248 | |||
249 | /* TOL (input) DOUBLE PRECISION, TOL <= 1.0D-1 */ | ||
250 | /* User specified deflation tolerance for the routine DMERG2. */ | ||
251 | /* If (1.0D-1 >= TOL >= 20*EPS) then TOL is used as */ | ||
252 | /* the deflation tolerance in DSRTDF. */ | ||
253 | /* If (TOL < 20*EPS) then the standard deflation tolerance from */ | ||
254 | /* LAPACK is used as the deflation tolerance in DSRTDF. */ | ||
255 | |||
256 | /* EV (output) DOUBLE PRECISION array, dimension (N) */ | ||
257 | /* If INFO = 0, then EV contains the eigenvalues of the */ | ||
258 | /* symmetric block tridiagonal matrix in ascending order. */ | ||
259 | |||
260 | /* Z (input/output) DOUBLE PRECISION array, dimension (LDZ, N) */ | ||
261 | /* On entry, Z will be the identity matrix. */ | ||
262 | /* On exit, Z contains the eigenvectors of the block tridiagonal */ | ||
263 | /* matrix. */ | ||
264 | |||
265 | /* LDZ (input) INTEGER */ | ||
266 | /* The leading dimension of the array Z. LDZ >= max(1,N). */ | ||
267 | |||
268 | /* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK) */ | ||
269 | |||
270 | /* LWORK (input) INTEGER */ | ||
271 | /* The dimension of the array WORK. */ | ||
272 | /* In order to guarantee correct results in all cases, */ | ||
273 | /* LWORK must be at least (2*N**2 + 3*N). In many cases, */ | ||
274 | /* less workspace is required. The absolute minimum required is */ | ||
275 | /* (N**2 + 3*N). */ | ||
276 | /* If the workspace provided is not sufficient, the routine will */ | ||
277 | /* return a corresponding error code and report how much workspace */ | ||
278 | /* was missing (see INFO). */ | ||
279 | |||
280 | /* IWORK (workspace) INTEGER array, dimension (LIWORK) */ | ||
281 | |||
282 | /* LIWORK (input) INTEGER */ | ||
283 | /* The dimension of the array IWORK. */ | ||
284 | /* LIWORK must be at least (5*N + 3 + 4*NBLKS - 4): */ | ||
285 | /* 5*KMAX+3 for DSYEVD, 5*N for ????, */ | ||
286 | /* 4*NBLKS-4 for the preprocessing (merging order) */ | ||
287 | /* Summarizing, the minimum integer workspace needed is */ | ||
288 | /* MAX(5*N, 5*KMAX + 3) + 4*NBLKS - 4 */ | ||
289 | |||
290 | /* INFO (output) INTEGER */ | ||
291 | /* = 0: successful exit. */ | ||
292 | /* < 0, > -99: illegal arguments. */ | ||
293 | /* if INFO = -i, the i-th argument had an illegal value. */ | ||
294 | /* = -99: error in the preprocessing (call of CUTLR). */ | ||
295 | /* < -200: not enough workspace. Space for ABS(INFO + 200) */ | ||
296 | /* numbers is required in addition to the workspace provided, */ | ||
297 | /* otherwise some eigenvectors will be incorrect. */ | ||
298 | /* > 0: The algorithm failed to compute an eigenvalue while */ | ||
299 | /* working on the submatrix lying in rows and columns */ | ||
300 | /* INFO/(N+1) through mod(INFO,N+1). */ | ||
301 | |||
302 | /* Further Details */ | ||
303 | /* =============== */ | ||
304 | |||
305 | /* Based on code written by */ | ||
306 | /* Wilfried Gansterer and Bob Ward, */ | ||
307 | /* Department of Computer Science, University of Tennessee */ | ||
308 | |||
309 | /* This routine is comparable to Dlaed0.f from LAPACK. */ | ||
310 | |||
311 | /* ===================================================================== */ | ||
312 | |||
313 | 5 | PetscBLASInt i, j, k, np, rp1, ksk, one=1; | |
314 | 5 | PetscBLASInt cut, mat1, kchk, kbrk, blks, kmax, icut, size, ksum, lsum; | |
315 | 5 | PetscBLASInt lblks, rblks, isize, lwmin, ilsum; | |
316 | 5 | PetscBLASInt start, istck1, istck2, istck3, merged; | |
317 | 5 | PetscBLASInt liwmin, matsiz, startp, istrtp; | |
318 | 5 | PetscReal rho, done=1.0, dmone=-1.0; | |
319 | |||
320 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
5 | PetscFunctionBegin; |
321 | 5 | *info = 0; | |
322 | |||
323 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | if (*(unsigned char *)jobz != 'N' && *(unsigned char *)jobz != 'D') *info = -1; |
324 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | else if (n < 2) *info = -2; |
325 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | else if (nblks < 2 || nblks > n) *info = -3; |
326 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
5 | if (*info == 0) { |
327 | ksum = 0; | ||
328 | kmax = 0; | ||
329 | kchk = 0; | ||
330 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
45 | for (k = 0; k < nblks; ++k) { |
331 | 40 | ksk = ksizes[k]; | |
332 | 40 | ksum += ksk; | |
333 | 40 | if (ksk > kmax) kmax = ksk; | |
334 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
40 | if (ksk < 1) kchk = 1; |
335 | } | ||
336 | 5 | lwmin = n*n + n * 3; | |
337 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | liwmin = PetscMax(n * 5,kmax * 5 + 3) + 4*nblks - 4; |
338 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | if (ksum != n || kchk == 1) *info = -4; |
339 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | else if (l1d < PetscMax(3,kmax)) *info = -6; |
340 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | else if (l2d < PetscMax(3,kmax)) *info = -7; |
341 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | else if (l1e < PetscMax(3,2*kmax + 1)) *info = -10; |
342 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | else if (l2e < PetscMax(3,2*kmax + 1)) *info = -11; |
343 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | else if (tol > .1) *info = -12; |
344 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | else if (ldz < PetscMax(1,n)) *info = -15; |
345 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | else if (lwork < lwmin) *info = -17; |
346 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | else if (liwork < liwmin) *info = -19; |
347 | } | ||
348 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
5 | if (*info == 0) { |
349 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
40 | for (k = 0; k < nblks-1; ++k) { |
350 |
2/4✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 5 times.
|
35 | if (rank[k] > PetscMin(ksizes[k],ksizes[k+1]) || rank[k] < 1) *info = -9; |
351 | } | ||
352 | } | ||
353 | |||
354 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
5 | PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %" PetscBLASInt_FMT " in DIBTDC",-(*info)); |
355 | |||
356 | /* **************************************************************************** */ | ||
357 | |||
358 | /* ...Preprocessing..................................................... */ | ||
359 | /* Determine the optimal order for merging the subblocks and how much */ | ||
360 | /* workspace will be needed for the merging (determined by the last */ | ||
361 | /* merge). Cutpoints for the merging operations are determined and stored */ | ||
362 | /* in reverse chronological order (starting with the final merging */ | ||
363 | /* operation). */ | ||
364 | |||
365 | /* integer workspace requirements for the preprocessing: */ | ||
366 | /* 4*(NBLKS-1) for merging history */ | ||
367 | /* at most 3*(NBLKS-1) for stack */ | ||
368 | |||
369 | 5 | start = 1; | |
370 | 5 | size = n; | |
371 | 5 | blks = nblks; | |
372 | 5 | merged = 0; | |
373 | 5 | k = 0; | |
374 | |||
375 | /* integer workspace used for the stack is not needed any more after the */ | ||
376 | /* preprocessing and therefore can use part of the 5*N */ | ||
377 | /* integer workspace needed later on in the code */ | ||
378 | |||
379 | 5 | istck1 = 0; | |
380 | 5 | istck2 = istck1 + nblks; | |
381 | 5 | istck3 = istck2 + nblks; | |
382 | |||
383 | /* integer workspace used for storing the order of merges starts AFTER */ | ||
384 | /* the integer workspace 5*N+3 which is needed later on in the code */ | ||
385 | /* (5*KMAX+3 for DSYEVD, 4*N in DMERG2) */ | ||
386 | |||
387 | 5 | istrtp = n * 5 + 4; | |
388 | 5 | icut = istrtp + nblks - 1; | |
389 | 5 | isize = icut + nblks - 1; | |
390 | 5 | ilsum = isize + nblks - 1; | |
391 | |||
392 | 10 | L200: | |
393 | |||
394 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
15 | if (nblks >= 3) { |
395 | |||
396 | /* Determine the cut point. Note that in the routine CUTLR it is */ | ||
397 | /* chosen such that it yields the best balanced merging operation */ | ||
398 | /* among all the rank modifications with minimum rank. */ | ||
399 | |||
400 |
4/6✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 1 times.
|
15 | PetscCall(cutlr_(start, size, blks, &ksizes[start-1], &rank[start-1], &cut, &lsum, &lblks, info)); |
401 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
15 | PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_PLIB,"dibtdc: Error in cutlr, info = %" PetscBLASInt_FMT,*info); |
402 | |||
403 | } else { | ||
404 | ✗ | cut = 1; | |
405 | ✗ | lsum = ksizes[0]; | |
406 | ✗ | lblks = 1; | |
407 | } | ||
408 | |||
409 | 15 | ++merged; | |
410 | 15 | startp = 0; | |
411 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
35 | for (i = 0; i < start-1; ++i) startp += ksizes[i]; |
412 | 15 | iwork[istrtp + (nblks - 1) - merged-1] = startp + 1; | |
413 | 15 | iwork[icut + (nblks - 1) - merged-1] = cut; | |
414 | 15 | iwork[isize + (nblks - 1) - merged-1] = size; | |
415 | 15 | iwork[ilsum + (nblks - 1) - merged-1] = lsum; | |
416 | |||
417 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
15 | if (lblks == 2) { |
418 | |||
419 | /* one merge in left branch, left branch done */ | ||
420 | 10 | ++merged; | |
421 | 10 | iwork[istrtp + (nblks - 1) - merged-1] = startp + 1; | |
422 | 10 | iwork[icut + (nblks - 1) - merged-1] = start; | |
423 | 10 | iwork[isize + (nblks - 1) - merged-1] = lsum; | |
424 | 10 | iwork[ilsum + (nblks - 1) - merged-1] = ksizes[start-1]; | |
425 | } | ||
426 | |||
427 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
15 | if (lblks == 1 || lblks == 2) { |
428 | |||
429 | /* left branch done, continue on the right side */ | ||
430 | 10 | start += lblks; | |
431 | 10 | size -= lsum; | |
432 | 10 | blks -= lblks; | |
433 | |||
434 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
10 | PetscCheck(blks>0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"dibtdc: Error in preprocessing, blks = %" PetscBLASInt_FMT,blks); |
435 | |||
436 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
10 | if (blks == 2) { |
437 | |||
438 | /* one merge in right branch, right branch done */ | ||
439 | 10 | ++merged; | |
440 | 10 | startp += lsum; | |
441 | 10 | iwork[istrtp + (nblks - 1) - merged-1] = startp + 1; | |
442 | 10 | iwork[icut + (nblks - 1) - merged-1] = start; | |
443 | 10 | iwork[isize + (nblks - 1) - merged-1] = size; | |
444 | 10 | iwork[ilsum + (nblks - 1) - merged-1] = ksizes[start-1]; | |
445 | } | ||
446 | |||
447 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
10 | if (blks == 1 || blks == 2) { |
448 | |||
449 | /* get the next subproblem from the stack or finished */ | ||
450 | |||
451 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (k >= 1) { |
452 | |||
453 | /* something left on the stack */ | ||
454 | 5 | start = iwork[istck1 + k-1]; | |
455 | 5 | size = iwork[istck2 + k-1]; | |
456 | 5 | blks = iwork[istck3 + k-1]; | |
457 | 5 | --k; | |
458 | 5 | goto L200; | |
459 | } else { | ||
460 | |||
461 | /* nothing left on the stack */ | ||
462 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
5 | PetscCheck(merged==nblks-1,PETSC_COMM_SELF,PETSC_ERR_PLIB,"ERROR in preprocessing - not enough merges performed"); |
463 | |||
464 | /* exit preprocessing */ | ||
465 | |||
466 | } | ||
467 | } else { | ||
468 | |||
469 | /* BLKS.GE.3, and therefore analyze the right side */ | ||
470 | |||
471 | ✗ | goto L200; | |
472 | } | ||
473 | } else { | ||
474 | |||
475 | /* LBLKS.GE.3, and therefore check the right side and */ | ||
476 | /* put it on the stack if required */ | ||
477 | |||
478 | 5 | rblks = blks - lblks; | |
479 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
5 | if (rblks >= 3) { |
480 | 5 | ++k; | |
481 | 5 | iwork[istck1 + k-1] = cut + 1; | |
482 | 5 | iwork[istck2 + k-1] = size - lsum; | |
483 | 5 | iwork[istck3 + k-1] = rblks; | |
484 | ✗ | } else if (rblks == 2) { | |
485 | |||
486 | /* one merge in right branch, right branch done */ | ||
487 | /* (note that nothing needs to be done if RBLKS.EQ.1 !) */ | ||
488 | |||
489 | ✗ | ++merged; | |
490 | ✗ | startp += lsum; | |
491 | ✗ | iwork[istrtp + (nblks - 1) - merged-1] = startp + 1; | |
492 | ✗ | iwork[icut + (nblks - 1) - merged-1] = start + lblks; | |
493 | ✗ | iwork[isize + (nblks - 1) - merged-1] = size - lsum; | |
494 | ✗ | iwork[ilsum + (nblks - 1) - merged-1] = ksizes[start + lblks-1]; | |
495 | } | ||
496 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
5 | PetscCheck(rblks>0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"dibtdc: ERROR in preprocessing - rblks = %" PetscBLASInt_FMT,rblks); |
497 | |||
498 | /* continue on the left side */ | ||
499 | |||
500 | 5 | size = lsum; | |
501 | 5 | blks = lblks; | |
502 | 5 | goto L200; | |
503 | } | ||
504 | |||
505 | /* SIZE = IWORK(ISIZE+NBLKS-2) */ | ||
506 | /* MAT1 = IWORK(ILSUM+NBLKS-2) */ | ||
507 | |||
508 | /* Note: after the dimensions SIZE and MAT1 of the last merging */ | ||
509 | /* operation have been determined, an upper bound for the workspace */ | ||
510 | /* requirements which is independent of how much deflation occurs in */ | ||
511 | /* the last merging operation could be determined as follows */ | ||
512 | /* (based on (3.15) and (3.19) from UT-CS-00-447): */ | ||
513 | |||
514 | /* IF(MAT1.LE.N/2) THEN */ | ||
515 | /* WSPREQ = 3*N + 3/2*(SIZE-MAT1)**2 + N*N/2 + MAT1*MAT1 */ | ||
516 | /* ELSE */ | ||
517 | /* WSPREQ = 3*N + 3/2*MAT1*MAT1 + N*N/2 + (SIZE-MAT1)**2 */ | ||
518 | /* END IF */ | ||
519 | |||
520 | /* IF(LWORK-WSPREQ.LT.0)THEN */ | ||
521 | /* not enough work space provided */ | ||
522 | /* INFO = -200 - (WSPREQ-LWORK) */ | ||
523 | /* RETURN */ | ||
524 | /* END IF */ | ||
525 | /* However, this is not really useful, since the actual check whether */ | ||
526 | /* enough workspace is provided happens in DMERG2.f ! */ | ||
527 | |||
528 | /* ************************************************************************* */ | ||
529 | |||
530 | /* ...Solve subproblems................................... */ | ||
531 | |||
532 | /* Divide the matrix into NBLKS submatrices using rank-r */ | ||
533 | /* modifications (cuts) and solve for their eigenvalues and */ | ||
534 | /* eigenvectors. Initialize index array to sort eigenvalues. */ | ||
535 | |||
536 | /* first block: ...................................... */ | ||
537 | |||
538 | /* correction for block 1: D1 - V1 \Sigma1 V1^T */ | ||
539 | |||
540 | 5 | ksk = ksizes[0]; | |
541 | 5 | rp1 = rank[0]; | |
542 | |||
543 | /* initialize the proper part of Z with the diagonal block D1 */ | ||
544 | /* (the correction will be made in Z and then the call of DSYEVD will */ | ||
545 | /* overwrite it with the eigenvectors) */ | ||
546 | |||
547 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
|
50 | for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) z[i+j*ldz] = d[i+j*l1d]; |
548 | |||
549 | /* copy D1 into WORK (in order to be able to restore it afterwards) */ | ||
550 | |||
551 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
|
50 | for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) work[i+j*ksk] = d[i+j*l1d]; |
552 | |||
553 | /* copy V1 into the first RANK(1) columns of D1 and then */ | ||
554 | /* multiply with \Sigma1 */ | ||
555 | |||
556 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
20 | for (i = 0; i < rank[0]; ++i) { |
557 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
15 | PetscCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(rp1 + i+1)*l1e], &one, &d[i*l1d], &one)); |
558 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
15 | PetscCallBLAS("BLASscal",BLASscal_(&ksk, &e[i + rp1*l1e], &d[i*l1d], &one)); |
559 | } | ||
560 | |||
561 | /* multiply the first RANK(1) columns of D1 with V1^T and */ | ||
562 | /* subtract the result from the proper part of Z (previously */ | ||
563 | /* initialized with D1) */ | ||
564 | |||
565 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
5 | PetscCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, rank, &dmone, |
566 | d, &l1d, &e[(rank[0]+1)*l1e], &l1e, &done, z, &ldz)); | ||
567 | |||
568 | /* restore the original D1 from WORK */ | ||
569 | |||
570 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
|
50 | for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) d[i+j*l1d] = work[i+j*ksk]; |
571 | |||
572 | /* eigenanalysis of block 1 (using DSYEVD) */ | ||
573 | |||
574 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
5 | PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V", "L", &ksk, z, &ldz, ev, work, &lwork, info)); |
575 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
5 | SlepcCheckLapackInfo("syev",*info); |
576 | |||
577 | /* EV(1:) contains the eigenvalues in ascending order */ | ||
578 | /* (they are returned this way by DSYEVD) */ | ||
579 | |||
580 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
20 | for (i = 0; i < ksk; ++i) iwork[i] = i+1; |
581 | |||
582 | /* intermediate blocks: .............................. */ | ||
583 | |||
584 | 5 | np = ksk; | |
585 | |||
586 | /* remaining number of blocks */ | ||
587 | |||
588 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
5 | if (nblks > 2) { |
589 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
35 | for (k = 1; k < nblks-1; ++k) { |
590 | |||
591 | /* correction for block K: */ | ||
592 | /* Dk - U(k-1) \Sigma(k-1) U(k-1)^T - Vk \Sigmak Vk^T */ | ||
593 | |||
594 | 30 | ksk = ksizes[k]; | |
595 | 30 | rp1 = rank[k]; | |
596 | |||
597 | /* initialize the proper part of Z with the diagonal block Dk */ | ||
598 | /* (the correction will be made in Z and then the call of DSYEVD will */ | ||
599 | /* overwrite it with the eigenvectors) */ | ||
600 | |||
601 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
|
300 | for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) z[np+i+(np+j)*ldz] = d[i+(j+k*l2d)*l1d]; |
602 | |||
603 | /* copy Dk into WORK (in order to be able to restore it afterwards) */ | ||
604 | |||
605 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
|
300 | for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) work[i+j*ksk] = d[i+(j+k*l2d)*l1d]; |
606 | |||
607 | /* copy U(K-1) into the first RANK(K-1) columns of Dk and then */ | ||
608 | /* multiply with \Sigma(K-1) */ | ||
609 | |||
610 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
120 | for (i = 0; i < rank[k-1]; ++i) { |
611 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
90 | PetscCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(i+(k-1)*l2e)*l1e], &one, &d[(i+k*l2d)*l1d], &one)); |
612 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
90 | PetscCallBLAS("BLASscal",BLASscal_(&ksk, &e[i+(rank[k-1]+(k-1)*l2e)*l1e], &d[(i+k*l2d)*l1d], &one)); |
613 | } | ||
614 | |||
615 | /* multiply the first RANK(K-1) columns of Dk with U(k-1)^T and */ | ||
616 | /* subtract the result from the proper part of Z (previously */ | ||
617 | /* initialized with Dk) */ | ||
618 | |||
619 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
30 | PetscCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, &rank[k-1], |
620 | &dmone, &d[k*l1d*l2d], | ||
621 | &l1d, &e[(k-1)*l1e*l2e], &l1e, &done, &z[np+np*ldz], &ldz)); | ||
622 | |||
623 | /* copy Vk into the first RANK(K) columns of Dk and then */ | ||
624 | /* multiply with \Sigmak */ | ||
625 | |||
626 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
120 | for (i = 0; i < rank[k]; ++i) { |
627 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
90 | PetscCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(rp1+i+1 + k*l2e)*l1e], &one, &d[(i + k*l2d)*l1d], &one)); |
628 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
90 | PetscCallBLAS("BLASscal",BLASscal_(&ksk, &e[i + (rp1 + k*l2e)*l1e], &d[(i + k*l2d)*l1d], &one)); |
629 | } | ||
630 | |||
631 | /* multiply the first RANK(K) columns of Dk with Vk^T and */ | ||
632 | /* subtract the result from the proper part of Z (previously */ | ||
633 | /* updated with [- U(k-1) \Sigma(k-1) U(k-1)^T]) */ | ||
634 | |||
635 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
30 | PetscCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, &rank[k], |
636 | &dmone, &d[k*l1d*l2d], &l1d, | ||
637 | &e[(rank[k]+1 + k*l2e)*l1e], &l1e, &done, &z[np+np*ldz], &ldz)); | ||
638 | |||
639 | /* restore the original Dk from WORK */ | ||
640 | |||
641 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
|
300 | for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) d[i+(j+k*l2d)*l1d] = work[i+j*ksk]; |
642 | |||
643 | /* eigenanalysis of block K (using dsyevd) */ | ||
644 | |||
645 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
30 | PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V", "L", &ksk, &z[np+np*ldz], |
646 | &ldz, &ev[np], work, &lwork, info)); | ||
647 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
30 | SlepcCheckLapackInfo("syev",*info); |
648 | |||
649 | /* EV(NPP1:) contains the eigenvalues in ascending order */ | ||
650 | /* (they are returned this way by DSYEVD) */ | ||
651 | |||
652 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
120 | for (i = 0; i < ksk; ++i) iwork[np + i] = i+1; |
653 | |||
654 | /* update NP */ | ||
655 | 30 | np += ksk; | |
656 | } | ||
657 | } | ||
658 | |||
659 | /* last block: ....................................... */ | ||
660 | |||
661 | /* correction for block NBLKS: */ | ||
662 | /* D(nblks) - U(nblks-1) \Sigma(nblks-1) U(nblks-1)^T */ | ||
663 | |||
664 | 5 | ksk = ksizes[nblks-1]; | |
665 | |||
666 | /* initialize the proper part of Z with the diagonal block D(nblks) */ | ||
667 | /* (the correction will be made in Z and then the call of DSYEVD will */ | ||
668 | /* overwrite it with the eigenvectors) */ | ||
669 | |||
670 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
|
50 | for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) z[np+i+(np+j)*ldz] = d[i+(j+(nblks-1)*l2d)*l1d]; |
671 | |||
672 | /* copy D(nblks) into WORK (in order to be able to restore it afterwards) */ | ||
673 | |||
674 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
|
50 | for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) work[i+j*ksk] = d[i+(j+(nblks-1)*l2d)*l1d]; |
675 | |||
676 | /* copy U(nblks-1) into the first RANK(nblks-1) columns of D(nblks) and then */ | ||
677 | /* multiply with \Sigma(nblks-1) */ | ||
678 | |||
679 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
20 | for (i = 0; i < rank[nblks-2]; ++i) { |
680 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
15 | PetscCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(i + (nblks-2)*l2e)*l1e], |
681 | &one, &d[(i + (nblks-1)*l2d)*l1d], &one)); | ||
682 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
15 | PetscCallBLAS("BLASscal",BLASscal_(&ksk, |
683 | &e[i + (rank[nblks-2] + (nblks-2)*l2e)*l1e], | ||
684 | &d[(i + (nblks-1)*l2d)*l1d], &one)); | ||
685 | } | ||
686 | |||
687 | /* multiply the first RANK(nblks-1) columns of D(nblks) with U(nblks-1)^T */ | ||
688 | /* and subtract the result from the proper part of Z (previously */ | ||
689 | /* initialized with D(nblks)) */ | ||
690 | |||
691 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
5 | PetscCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, &rank[nblks - 2], |
692 | &dmone, &d[(nblks-1)*l1d*l2d], &l1d, | ||
693 | &e[(nblks-2)*l1e*l2e], &l1e, &done, &z[np+np*ldz], &ldz)); | ||
694 | |||
695 | /* restore the original D(nblks) from WORK */ | ||
696 | |||
697 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
|
50 | for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) d[i+(j+(nblks-1)*l2d)*l1d] = work[i+j*ksk]; |
698 | |||
699 | /* eigenanalysis of block NBLKS (using dsyevd) */ | ||
700 | |||
701 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
5 | PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V", "L", &ksk, &z[np+np*ldz], &ldz, &ev[np], work, &lwork, info)); |
702 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
5 | SlepcCheckLapackInfo("syev",*info); |
703 | |||
704 | /* EV(NPP1:) contains the eigenvalues in ascending order */ | ||
705 | /* (they are returned this way by DSYEVD) */ | ||
706 | |||
707 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
20 | for (i = 0; i < ksk; ++i) iwork[np + i] = i+1; |
708 | |||
709 | /* note that from here on the entire workspace is available again */ | ||
710 | |||
711 | /* Perform all the merging operations. */ | ||
712 | |||
713 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
40 | for (i = 0; i < nblks-1; ++i) { |
714 | |||
715 | /* MATSIZ = total size of the current rank RANK modification problem */ | ||
716 | |||
717 | 35 | matsiz = iwork[isize + i - 1]; | |
718 | 35 | np = iwork[istrtp + i - 1]; | |
719 | 35 | kbrk = iwork[icut + i - 1]; | |
720 | 35 | mat1 = iwork[ilsum + i - 1]; | |
721 | |||
722 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
140 | for (j = 0; j < rank[kbrk-1]; ++j) { |
723 | |||
724 | /* NOTE: The parameter RHO in DMERG2 is modified in DSRTDF */ | ||
725 | /* (multiplied by 2) ! In order not to change the */ | ||
726 | /* singular value stored in E(:, RANK(KBRK)+1, KBRK), */ | ||
727 | /* we do not pass on this variable as an argument to DMERG2, */ | ||
728 | /* but we assign a separate variable RHO here which is passed */ | ||
729 | /* on to DMERG2. */ | ||
730 | /* Alternative solution in F90: */ | ||
731 | /* pass E(:,RANK(KBRK)+1,KBRK) to an INTENT(IN) parameter */ | ||
732 | /* in DMERG2. */ | ||
733 | |||
734 | 105 | rho = e[j + (rank[kbrk-1] + (kbrk-1)*l2e)*l1e]; | |
735 | |||
736 | /* eigenvectors are accumulated (JOBZ.EQ.'D') */ | ||
737 | |||
738 |
4/6✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 1 times.
|
105 | PetscCall(BDC_dmerg2_(jobz, j+1, matsiz, &ev[np-1], &z[np-1+(np-1)*ldz], |
739 | ldz, &iwork[np-1], &rho, &e[(j + (kbrk-1)*l2e)*l1e], | ||
740 | ksizes[kbrk], &e[(rank[kbrk-1]+j+1 + (kbrk-1)*l2e)*l1e], | ||
741 | ksizes[kbrk-1], mat1, work, lwork, &iwork[n], tol, info, 1)); | ||
742 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
105 | PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_PLIB,"dibtdc: Error in dmerg2, info = %" PetscBLASInt_FMT,*info); |
743 | } | ||
744 | |||
745 | /* at this point all RANK(KBRK) rank-one modifications corresponding */ | ||
746 | /* to the current off-diagonal block are finished. */ | ||
747 | /* Move on to the next off-diagonal block. */ | ||
748 | |||
749 | } | ||
750 | |||
751 | /* Re-merge the eigenvalues/vectors which were deflated at the final */ | ||
752 | /* merging step by sorting all eigenvalues and eigenvectors according */ | ||
753 | /* to the permutation stored in IWORK. */ | ||
754 | |||
755 | /* copy eigenvalues and eigenvectors in ordered form into WORK */ | ||
756 | /* (eigenvalues into WORK(1:N), eigenvectors into WORK(N+1:N+1+N^2)) */ | ||
757 | |||
758 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
125 | for (i = 0; i < n; ++i) { |
759 | 120 | j = iwork[i]; | |
760 | 120 | work[i] = ev[j-1]; | |
761 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
120 | PetscCallBLAS("BLAScopy",BLAScopy_(&n, &z[(j-1)*ldz], &one, &work[n*(i+1)], &one)); |
762 | } | ||
763 | |||
764 | /* copy ordered eigenvalues back from WORK(1:N) into EV */ | ||
765 | |||
766 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
5 | PetscCallBLAS("BLAScopy",BLAScopy_(&n, work, &one, ev, &one)); |
767 | |||
768 | /* copy ordered eigenvectors back from WORK(N+1:N+1+N^2) into Z */ | ||
769 | |||
770 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
|
3005 | for (j=0;j<n;j++) for (i=0;i<n;i++) z[i+j*ldz] = work[i+(j+1)*n]; |
771 |
6/12✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
|
1 | PetscFunctionReturn(PETSC_SUCCESS); |
772 | } | ||
773 |