GCC Code Coverage Report


Directory: ./
File: src/sys/classes/ds/impls/hep/bdc/dsbtdc.c
Date: 2025-10-03 04:28:47
Exec Total Coverage
Lines: 136 160 85.0%
Functions: 1 1 100.0%
Branches: 150 346 43.4%

Line Branch Exec Source
1 /*
2 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3 SLEPc - Scalable Library for Eigenvalue Problem Computations
4 Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
5
6 This file is part of SLEPc.
7 SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9 */
10 /*
11 BDC - Block-divide and conquer (see description in README file)
12 */
13
14 #include <slepc/private/dsimpl.h>
15 #include <slepcblaslapack.h>
16
17 10 PetscErrorCode BDC_dsbtdc_(const char *jobz,const char *jobacc,PetscBLASInt n,
18 PetscBLASInt nblks,PetscBLASInt *ksizes,PetscReal *d,PetscBLASInt l1d,
19 PetscBLASInt l2d,PetscReal *e,PetscBLASInt l1e,PetscBLASInt l2e,PetscReal tol,
20 PetscReal tau1,PetscReal tau2,PetscReal *ev,PetscReal *z,PetscBLASInt ldz,
21 PetscReal *work,PetscBLASInt lwork,PetscBLASInt *iwork,PetscBLASInt liwork,
22 PetscReal *mingap,PetscBLASInt *mingapi,PetscBLASInt *info,
23 PetscBLASInt jobz_len,PetscBLASInt jobacc_len)
24 {
25 /* -- Routine written in LAPACK Version 3.0 style -- */
26 /* *************************************************** */
27 /* Written by */
28 /* Michael Moldaschl and Wilfried Gansterer */
29 /* University of Vienna */
30 /* last modification: March 28, 2014 */
31
32 /* Small adaptations of original code written by */
33 /* Wilfried Gansterer and Bob Ward, */
34 /* Department of Computer Science, University of Tennessee */
35 /* see https://doi.org/10.1137/S1064827501399432 */
36 /* *************************************************** */
37
38 /* Purpose */
39 /* ======= */
40
41 /* DSBTDC computes approximations to all eigenvalues and eigenvectors */
42 /* of a symmetric block tridiagonal matrix using the divide and */
43 /* conquer method with lower rank approximations to the subdiagonal blocks. */
44
45 /* This code makes very mild assumptions about floating point */
46 /* arithmetic. It will work on machines with a guard digit in */
47 /* add/subtract, or on those binary machines without guard digits */
48 /* which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. */
49 /* It could conceivably fail on hexadecimal or decimal machines */
50 /* without guard digits, but we know of none. See DLAED3M for details. */
51
52 /* Arguments */
53 /* ========= */
54
55 /* JOBZ (input) CHARACTER*1 */
56 /* = 'N': Compute eigenvalues only (not implemented); */
57 /* = 'D': Compute eigenvalues and eigenvectors. Eigenvectors */
58 /* are accumulated in the divide-and-conquer process. */
59
60 /* JOBACC (input) CHARACTER*1 */
61 /* = 'A' ("automatic"): The accuracy parameters TAU1 and TAU2 */
62 /* are determined automatically from the */
63 /* parameter TOL according to the analytical */
64 /* bounds. In that case the input values of */
65 /* TAU1 and TAU2 are irrelevant (ignored). */
66 /* = 'M' ("manual"): The input values of the accuracy parameters */
67 /* TAU1 and TAU2 are used. In that case the input */
68 /* value of the parameter TOL is irrelevant */
69 /* (ignored). */
70
71 /* N (input) INTEGER */
72 /* The dimension of the symmetric block tridiagonal matrix. */
73 /* N >= 1. */
74
75 /* NBLKS (input) INTEGER, 1 <= NBLKS <= N */
76 /* The number of diagonal blocks in the matrix. */
77
78 /* KSIZES (input) INTEGER array, dimension (NBLKS) */
79 /* The dimensions of the square diagonal blocks from top left */
80 /* to bottom right. KSIZES(I) >= 1 for all I, and the sum of */
81 /* KSIZES(I) for I = 1 to NBLKS has to be equal to N. */
82
83 /* D (input) DOUBLE PRECISION array, dimension (L1D,L2D,NBLKS) */
84 /* The lower triangular elements of the symmetric diagonal */
85 /* blocks of the block tridiagonal matrix. The elements of the top */
86 /* left diagonal block, which is of dimension KSIZES(1), have to */
87 /* be placed in D(*,*,1); the elements of the next diagonal */
88 /* block, which is of dimension KSIZES(2), have to be placed in */
89 /* D(*,*,2); etc. */
90
91 /* L1D (input) INTEGER */
92 /* The leading dimension of the array D. L1D >= max(3,KMAX), */
93 /* where KMAX is the dimension of the largest diagonal block, */
94 /* i.e., KMAX = max_I (KSIZES(I)). */
95
96 /* L2D (input) INTEGER */
97 /* The second dimension of the array D. L2D >= max(3,KMAX), */
98 /* where KMAX is as stated in L1D above. */
99
100 /* E (input) DOUBLE PRECISION array, dimension (L1E,L2E,NBLKS-1) */
101 /* The elements of the subdiagonal blocks of the */
102 /* block tridiagonal matrix. The elements of the top left */
103 /* subdiagonal block, which is KSIZES(2) x KSIZES(1), have to be */
104 /* placed in E(*,*,1); the elements of the next subdiagonal block, */
105 /* which is KSIZES(3) x KSIZES(2), have to be placed in E(*,*,2); etc. */
106 /* During runtime, the original contents of E(*,*,K) is */
107 /* overwritten by the singular vectors and singular values of */
108 /* the lower rank representation. */
109
110 /* L1E (input) INTEGER */
111 /* The leading dimension of the array E. L1E >= max(3,2*KMAX+1), */
112 /* where KMAX is as stated in L1D above. The size of L1E enables */
113 /* the storage of ALL singular vectors and singular values for */
114 /* the corresponding off-diagonal block in E(*,*,K) and therefore */
115 /* there are no restrictions on the rank of the approximation */
116 /* (only the "natural" restriction */
117 /* RANK(K) .LE. MIN(KSIZES(K),KSIZES(K+1))). */
118
119 /* L2E (input) INTEGER */
120 /* The second dimension of the array E. L2E >= max(3,2*KMAX+1), */
121 /* where KMAX is as stated in L1D above. The size of L2E enables */
122 /* the storage of ALL singular vectors and singular values for */
123 /* the corresponding off-diagonal block in E(*,*,K) and therefore */
124 /* there are no restrictions on the rank of the approximation */
125 /* (only the "natural" restriction */
126 /* RANK(K) .LE. MIN(KSIZES(K),KSIZES(K+1))). */
127
128 /* TOL (input) DOUBLE PRECISION, TOL.LE.TOLMAX */
129 /* User specified tolerance for the residuals of the computed */
130 /* eigenpairs. If (JOBACC.EQ.'A') then it is used to determine */
131 /* TAU1 and TAU2; ignored otherwise. */
132 /* If (TOL.LT.40*EPS .AND. JOBACC.EQ.'A') then TAU1 is set to machine */
133 /* epsilon and TAU2 is set to the standard deflation tolerance from */
134 /* LAPACK. */
135
136 /* TAU1 (input) DOUBLE PRECISION, TAU1.LE.TOLMAX/2 */
137 /* User specified tolerance for determining the rank of the */
138 /* lower rank approximations to the off-diagonal blocks. */
139 /* The rank for each off-diagonal block is determined such that */
140 /* the resulting absolute eigenvalue error is less than or equal */
141 /* to TAU1. */
142 /* If (JOBACC.EQ.'A') then TAU1 is determined automatically from */
143 /* TOL and the input value is ignored. */
144 /* If (JOBACC.EQ.'M' .AND. TAU1.LT.20*EPS) then TAU1 is set to */
145 /* machine epsilon. */
146
147 /* TAU2 (input) DOUBLE PRECISION, TAU2.LE.TOLMAX/2 */
148 /* User specified deflation tolerance for the routine DIBTDC. */
149 /* If (1.0D-1.GT.TAU2.GT.20*EPS) then TAU2 is used as */
150 /* the deflation tolerance in DSRTDF (EPS is the machine epsilon). */
151 /* If (TAU2.LE.20*EPS) then the standard deflation tolerance from */
152 /* LAPACK is used as the deflation tolerance in DSRTDF. */
153 /* If (JOBACC.EQ.'A') then TAU2 is determined automatically from */
154 /* TOL and the input value is ignored. */
155 /* If (JOBACC.EQ.'M' .AND. TAU2.LT.20*EPS) then TAU2 is set to */
156 /* the standard deflation tolerance from LAPACK. */
157
158 /* EV (output) DOUBLE PRECISION array, dimension (N) */
159 /* If INFO = 0, then EV contains the computed eigenvalues of the */
160 /* symmetric block tridiagonal matrix in ascending order. */
161
162 /* Z (output) DOUBLE PRECISION array, dimension (LDZ,N) */
163 /* If (JOBZ.EQ.'D' .AND. INFO = 0) */
164 /* then Z contains the orthonormal eigenvectors of the symmetric */
165 /* block tridiagonal matrix computed by the routine DIBTDC */
166 /* (accumulated in the divide-and-conquer process). */
167 /* If (-199 < INFO < -99) then Z contains the orthonormal */
168 /* eigenvectors of the symmetric block tridiagonal matrix, */
169 /* computed without divide-and-conquer (quick returns). */
170 /* Otherwise not referenced. */
171
172 /* LDZ (input) INTEGER */
173 /* The leading dimension of the array Z. LDZ >= max(1,N). */
174
175 /* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) */
176
177 /* LWORK (input) INTEGER */
178 /* The dimension of the array WORK. */
179 /* If NBLKS.EQ.1, then LWORK has to be at least 2N^2+6N+1 */
180 /* (for the call of DSYEVD). */
181 /* If NBLKS.GE.2 and (JOBZ.EQ.'D') then the absolute minimum */
182 /* required for DIBTDC is (N**2 + 3*N). This will not always */
183 /* suffice, though, the routine will return a corresponding */
184 /* error code and report how much work space was missing (see */
185 /* INFO). */
186 /* In order to guarantee correct results in all cases where */
187 /* NBLKS.GE.2, LWORK must be at least (2*N**2 + 3*N). */
188
189 /* IWORK (workspace/output) INTEGER array, dimension (LIWORK) */
190
191 /* LIWORK (input) INTEGER */
192 /* The dimension of the array IWORK. */
193 /* LIWORK must be at least (5*N + 5*NBLKS - 1) (for DIBTDC) */
194 /* Note that this should also suffice for the call of DSYEVD on a */
195 /* diagonal block which requires (5*KMAX + 3). */
196
197 /* MINGAP (output) DOUBLE PRECISION */
198 /* The minimum "gap" between the approximate eigenvalues */
199 /* computed, i.e., MIN( ABS(EV(I+1)-EV(I)) for I=1,2,..., N-1 */
200 /* IF (MINGAP.LE.TOL/10) THEN a warning flag is returned in INFO, */
201 /* because the computed eigenvectors may be unreliable individually */
202 /* (only the subspaces spanned are approximated reliably). */
203
204 /* MINGAPI (output) INTEGER */
205 /* Index I where the minimum gap in the spectrum occurred. */
206
207 /* INFO (output) INTEGER */
208 /* = 0: successful exit, no special cases occurred. */
209 /* < -200: not enough workspace. Space for ABS(INFO + 200) */
210 /* numbers is required in addition to the workspace provided, */
211 /* otherwise some of the computed eigenvectors will be incorrect. */
212 /* < -99, > -199: successful exit, but quick returns. */
213 /* if INFO = -100, successful exit, but the input matrix */
214 /* was the zero matrix and no */
215 /* divide-and-conquer was performed */
216 /* if INFO = -101, successful exit, but N was 1 and no */
217 /* divide-and-conquer was performed */
218 /* if INFO = -102, successful exit, but only a single */
219 /* dense block. Standard dense solver */
220 /* was called, no divide-and-conquer was */
221 /* performed */
222 /* if INFO = -103, successful exit, but warning that */
223 /* MINGAP.LE.TOL/10 and therefore the */
224 /* eigenvectors corresponding to close */
225 /* approximate eigenvalues may individually */
226 /* be unreliable (although taken together they */
227 /* do approximate the corresponding subspace to */
228 /* the desired accuracy) */
229 /* = -99: error in the preprocessing in DIBTDC (when determining */
230 /* the merging order). */
231 /* < 0, > -99: illegal arguments. */
232 /* if INFO = -i, the i-th argument had an illegal value. */
233 /* > 0: The algorithm failed to compute an eigenvalue while */
234 /* working on the submatrix lying in rows and columns */
235 /* INFO/(N+1) through mod(INFO,N+1). */
236
237 /* Further Details */
238 /* =============== */
239
240 /* Small modifications of code written by */
241 /* Wilfried Gansterer and Bob Ward, */
242 /* Department of Computer Science, University of Tennessee */
243 /* see https://doi.org/10.1137/S1064827501399432 */
244
245 /* Based on the design of the LAPACK code sstedc.f written by Jeff */
246 /* Rutter, Computer Science Division, University of California at */
247 /* Berkeley, and modified by Francoise Tisseur, University of Tennessee. */
248
249 /* ===================================================================== */
250
251 /* .. Parameters .. */
252
253 #define TOLMAX 0.1
254
255 /* TOLMAX .... upper bound for tolerances TOL, TAU1, TAU2 */
256 /* NOTE: in the routine DIBTDC, the value */
257 /* 1.D-1 is hardcoded for TOLMAX ! */
258
259 10 PetscBLASInt i, j, k, i1, iwspc, lwmin, start;
260 10 PetscBLASInt ii, ip, nk, rk, np, iu, rp1, ldu;
261 10 PetscBLASInt ksk, ivt, iend, kchk=0, kmax=0, one=1, zero=0;
262 10 PetscBLASInt ldvt, ksum=0, kskp1, spneed, nrblks, liwmin, isvals;
263 10 PetscReal p, d2, eps, dmax, emax, done = 1.0;
264 10 PetscReal dnrm, tiny, anorm, exdnrm=0, dropsv, absdiff;
265
266
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
10 PetscFunctionBegin;
267 /* Determine machine epsilon. */
268 10 eps = LAPACKlamch_("Epsilon");
269
270 10 *info = 0;
271
272
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
10 if (*(unsigned char *)jobz != 'N' && *(unsigned char *)jobz != 'D') *info = -1;
273
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
10 else if (*(unsigned char *)jobacc != 'A' && *(unsigned char *)jobacc != 'M') *info = -2;
274
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
10 else if (n < 1) *info = -3;
275
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
10 else if (nblks < 1 || nblks > n) *info = -4;
276
1/2
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
10 if (*info == 0) {
277
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
55 for (k = 0; k < nblks; ++k) {
278 45 ksk = ksizes[k];
279 45 ksum += ksk;
280 45 if (ksk > kmax) kmax = ksk;
281
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
45 if (ksk < 1) kchk = 1;
282 }
283
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 if (nblks == 1) lwmin = 2*n*n + n*6 + 1;
284 5 else lwmin = n*n + n*3;
285 10 liwmin = n * 5 + nblks * 5 - 4;
286
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
10 if (ksum != n || kchk == 1) *info = -5;
287
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
10 else if (l1d < PetscMax(3,kmax)) *info = -7;
288
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
10 else if (l2d < PetscMax(3,kmax)) *info = -8;
289
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
10 else if (l1e < PetscMax(3,2*kmax+1)) *info = -10;
290
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
10 else if (l2e < PetscMax(3,2*kmax+1)) *info = -11;
291
2/4
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 5 times.
10 else if (*(unsigned char *)jobacc == 'A' && tol > TOLMAX) *info = -12;
292
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
10 else if (*(unsigned char *)jobacc == 'M' && tau1 > TOLMAX/2) *info = -13;
293
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
10 else if (*(unsigned char *)jobacc == 'M' && tau2 > TOLMAX/2) *info = -14;
294
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
10 else if (ldz < PetscMax(1,n)) *info = -17;
295
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
10 else if (lwork < lwmin) *info = -19;
296
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
10 else if (liwork < liwmin) *info = -21;
297 }
298
299
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
10 PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %" PetscBLASInt_FMT " in DSBTDC",-(*info));
300
301 /* Quick return if possible */
302
303
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
10 if (n == 1) {
304 ev[0] = d[0]; z[0] = 1.;
305 *info = -101;
306 PetscFunctionReturn(PETSC_SUCCESS);
307 }
308
309 /* If NBLKS is equal to 1, then solve the problem with standard */
310 /* dense solver (in this case KSIZES(1) = N). */
311
312
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 if (nblks == 1) {
313
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
20 for (i = 0; i < n; ++i) {
314
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
45 for (j = 0; j <= i; ++j) {
315 30 z[i + j*ldz] = d[i + j*l1d];
316 }
317 }
318
10/20
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
5 PetscCallBLAS("LAPACKsyevd",LAPACKsyevd_("V", "L", &n, z, &ldz, ev, work, &lwork, iwork, &liwork, info));
319
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
5 SlepcCheckLapackInfo("syevd",*info);
320 5 *info = -102;
321
6/12
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
5 PetscFunctionReturn(PETSC_SUCCESS);
322 }
323
324 /* determine the accuracy parameters (if requested) */
325
326
1/2
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
5 if (*(unsigned char *)jobacc == 'A') {
327 5 tau1 = tol / 2;
328
1/2
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
5 if (tau1 < eps * 20) tau1 = eps;
329 tau2 = tol / 2;
330 }
331
332 /* Initialize Z as the identity matrix */
333
334
1/2
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
5 if (*(unsigned char *)jobz == 'D') {
335
4/4
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
3005 for (j=0;j<n;j++) for (i=0;i<n;i++) z[i+j*ldz] = 0.0;
336
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
125 for (i=0;i<n;i++) z[i+i*ldz] = 1.0;
337 }
338
339 /* Determine the off-diagonal ranks, form and store the lower rank */
340 /* approximations based on the tolerance parameters, the */
341 /* RANK(K) largest singular values and the associated singular */
342 /* vectors of each subdiagonal block. Also find the maximum norm of */
343 /* the subdiagonal blocks (in EMAX). */
344
345 /* Compute SVDs of the subdiagonal blocks.... */
346
347 /* EMAX .... maximum norm of the off-diagonal blocks */
348
349 emax = 0.;
350
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
40 for (k = 0; k < nblks-1; ++k) {
351 35 ksk = ksizes[k];
352 35 kskp1 = ksizes[k+1];
353 35 isvals = 0;
354
355 /* Note that min(KSKP1,KSK).LE.N/2 (equality possible for */
356 /* NBLKS=2), and therefore storing the singular values requires */
357 /* at most N/2 entries of the * array WORK. */
358
359 35 iu = isvals + n / 2;
360 35 ivt = isvals + n / 2;
361
362 /* Call of DGESVD: The space for U is not referenced, since */
363 /* JOBU='O' and therefore this portion of the array WORK */
364 /* is not referenced for U. */
365
366 35 ldu = kskp1;
367 35 ldvt = PetscMin(kskp1,ksk);
368 35 iwspc = ivt + n * n / 2;
369
370 /* Note that the minimum workspace required for this call */
371 /* of DGESVD is: N/2 for storing the singular values + N**2/2 for */
372 /* storing V^T + 5*N/2 workspace = N**2/2 + 3*N. */
373
374 35 i1 = lwork - iwspc;
375
10/20
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
35 PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O", "S", &kskp1, &ksk,
376 &e[k*l1e*l2e], &l1e, &work[isvals],
377 &work[iu], &ldu, &work[ivt], &ldvt, &work[iwspc], &i1, info));
378
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
35 SlepcCheckLapackInfo("gesvd",*info);
379
380 /* Note that after the return from DGESVD U is stored in */
381 /* E(*,*,K), and V^{\top} is stored in WORK(IVT, IVT+1, ....) */
382
383 /* determine the ranks RANK() for the approximations */
384
385 35 rk = PetscMin(ksk,kskp1);
386 35 L8:
387 35 dropsv = work[isvals - 1 + rk];
388
389
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
35 if (dropsv * 2. <= tau1) {
390
391 /* the error caused by dropping singular value RK is */
392 /* small enough, try to reduce the rank by one more */
393
394 if (--rk > 0) goto L8;
395 else iwork[k] = 0;
396 } else {
397
398 /* the error caused by dropping singular value RK is */
399 /* too large already, RK is the rank required to achieve the */
400 /* desired accuracy */
401
402 35 iwork[k] = rk;
403 }
404
405 /* ************************************************************************** */
406
407 /* Store the first RANK(K) terms of the SVD of the current */
408 /* off-diagonal block. */
409 /* NOTE that here it is required that L1E, L2E >= 2*KMAX+1 in order */
410 /* to have enough space for storing singular vectors and values up */
411 /* to the full SVD of an off-diagonal block !!!! */
412
413 /* u1-u_RANK(K) is already contained in E(:,1:RANK(K),K) (as a */
414 /* result of the call of DGESVD !), the sigma1-sigmaK are to be */
415 /* stored in E(1:RANK(K),RANK(K)+1,K), and v1-v_RANK(K) are to be */
416 /* stored in E(:,RANK(K)+2:2*RANK(K)+1,K) */
417
418 35 rp1 = iwork[k];
419
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
140 for (j = 0; j < iwork[k]; ++j) {
420
421 /* store sigma_J in E(J,RANK(K)+1,K) */
422
423 105 e[j + (rp1 + k*l2e)* l1e] = work[isvals + j];
424
425 /* update maximum norm of subdiagonal blocks */
426
427
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
105 if (e[j + (rp1 + k*l2e)*l1e] > emax) {
428 5 emax = e[j + (rp1 + k*l2e)*l1e];
429 }
430
431 /* store v_J in E(:,RANK(K)+1+J,K) */
432 /* (note that WORK contains V^{\top} and therefore */
433 /* we need to read rowwise !) */
434
435
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
420 for (i = 1; i <= ksk; ++i) {
436 315 e[i-1 + (rp1+j+1 + k*l2e)*l1e] = work[ivt+j + (i-1)*ldvt];
437 }
438 }
439
440 }
441
442 /* Compute the maximum norm of diagonal blocks and store the norm */
443 /* of each diagonal block in E(RP1,RP1,K) (after the singular values); */
444 /* store the norm of the last diagonal block in EXDNRM. */
445
446 /* DMAX .... maximum one-norm of the diagonal blocks */
447
448 dmax = 0.;
449
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
45 for (k = 0; k < nblks; ++k) {
450 40 rp1 = iwork[k];
451
452 /* compute the one-norm of diagonal block K */
453
454 40 dnrm = LAPACKlansy_("1", "L", &ksizes[k], &d[k*l1d*l2d], &l1d, work);
455
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
40 if (k+1 == nblks) exdnrm = dnrm;
456 35 else e[rp1 + (rp1 + k*l2e)*l1e] = dnrm;
457
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
40 if (dnrm > dmax) dmax = dnrm;
458 }
459
460 /* Check for zero matrix. */
461
462
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
5 if (emax == 0. && dmax == 0.) {
463 for (i = 0; i < n; ++i) ev[i] = 0.;
464 *info = -100;
465 PetscFunctionReturn(PETSC_SUCCESS);
466 }
467
468 /* **************************************************************** */
469
470 /* ....Identify irreducible parts of the block tridiagonal matrix */
471 /* [while (START <= NBLKS)].... */
472
473 start = 0;
474 np = 0;
475 5 L10:
476
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 if (start < nblks) {
477
478 /* Let IEND be the number of the next subdiagonal block such that */
479 /* its RANK is 0 or IEND = NBLKS if no such subdiagonal exists. */
480 /* The matrix identified by the elements between the diagonal block START */
481 /* and the diagonal block IEND constitutes an independent (irreducible) */
482 /* sub-problem. */
483
484 iend = start;
485
486 40 L20:
487
1/2
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
40 if (iend < nblks) {
488 40 rk = iwork[iend];
489
490 /* NOTE: if RANK(IEND).EQ.0 then decoupling happens due to */
491 /* reduced accuracy requirements ! (because in this case */
492 /* we would not merge the corresponding two diagonal blocks) */
493
494 /* NOTE: seems like any combination may potentially happen: */
495 /* (i) RANK = 0 but no decoupling due to small norm of */
496 /* off-diagonal block (corresponding diagonal blocks */
497 /* also have small norm) as well as */
498 /* (ii) RANK > 0 but decoupling due to small norm of */
499 /* off-diagonal block (corresponding diagonal blocks */
500 /* have very large norm) */
501 /* case (i) is ruled out by checking for RANK = 0 above */
502 /* (we decide to decouple all the time when the rank */
503 /* of an off-diagonal block is zero, independently of */
504 /* the norms of the corresponding diagonal blocks. */
505
506
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
40 if (rk > 0) {
507
508 /* check for decoupling due to small norm of off-diagonal block */
509 /* (relative to the norms of the corresponding diagonal blocks) */
510
511
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
35 if (iend == nblks-2) {
512 5 d2 = PetscSqrtReal(exdnrm);
513 } else {
514 30 d2 = PetscSqrtReal(e[iwork[iend+1] + (iwork[iend+1] + (iend+1)*l2e)*l1e]);
515 }
516
517 /* this definition of TINY is analogous to the definition */
518 /* in the tridiagonal divide&conquer (dstedc) */
519
520 35 tiny = eps * PetscSqrtReal(e[iwork[iend] + (iwork[iend] + iend*l2e)*l1e])*d2;
521
1/2
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
35 if (e[(iwork[iend] + iend*l2e)*l1e] > tiny) {
522
523 /* no decoupling due to small norm of off-diagonal block */
524
525 35 ++iend;
526 35 goto L20;
527 }
528 }
529 }
530
531 /* ....(Sub) Problem determined: between diagonal blocks */
532 /* START and IEND. Compute its size and solve it.... */
533
534 5 nrblks = iend - start + 1;
535
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
5 if (nrblks == 1) {
536
537 /* Isolated problem is a single diagonal block */
538
539 nk = ksizes[start];
540
541 /* copy this isolated block into Z */
542
543 for (i = 0; i < nk; ++i) {
544 ip = np + i + 1;
545 for (j = 0; j <= i; ++j) z[ip + (np+j+1)*ldz] = d[i + (j + start*l2d)*l1d];
546 }
547
548 /* check whether there is enough workspace */
549
550 spneed = 2*nk*nk + nk * 6 + 1;
551 PetscCheck(spneed<=lwork,PETSC_COMM_SELF,PETSC_ERR_MEM,"dsbtdc: not enough workspace for DSYEVD, info = %" PetscBLASInt_FMT,lwork - 200 - spneed);
552
553 PetscCallBLAS("LAPACKsyevd",LAPACKsyevd_("V", "L", &nk,
554 &z[np + np*ldz], &ldz, &ev[np],
555 work, &lwork, &iwork[nblks-1], &liwork, info));
556 SlepcCheckLapackInfo("syevd",*info);
557 start = iend + 1;
558 np += nk;
559
560 /* go to the next irreducible subproblem */
561
562 goto L10;
563 }
564
565 /* ....Isolated problem consists of more than one diagonal block. */
566 /* Start the divide and conquer algorithm.... */
567
568 /* Scale: Divide by the maximum of all norms of diagonal blocks */
569 /* and singular values of the subdiagonal blocks */
570
571 /* ....determine maximum of the norms of all diagonal and subdiagonal */
572 /* blocks.... */
573
574
1/2
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
5 if (iend == nblks-1) anorm = exdnrm;
575 else anorm = e[iwork[iend] + (iwork[iend] + iend*l2e)*l1e];
576
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
40 for (k = start; k < iend; ++k) {
577 35 rp1 = iwork[k];
578
579 /* norm of diagonal block */
580
1/2
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
35 anorm = PetscMax(anorm,e[rp1 + (rp1 + k*l2e)*l1e]);
581
582 /* singular value of subdiagonal block */
583
1/2
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
70 anorm = PetscMax(anorm,e[(rp1 + k*l2e)*l1e]);
584 }
585
586 5 nk = 0;
587
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
45 for (k = start; k < iend+1; ++k) {
588 40 ksk = ksizes[k];
589 40 nk += ksk;
590
591 /* scale the diagonal block */
592
10/20
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
40 PetscCallBLAS("LAPACKlascl",LAPACKlascl_("L", &zero, &zero,
593 &anorm, &done, &ksk, &ksk, &d[k*l2d*l1d], &l1d, info));
594
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
40 SlepcCheckLapackInfo("lascl",*info);
595
596 /* scale the (approximated) off-diagonal block by dividing its */
597 /* singular values */
598
599
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
40 if (k != iend) {
600
601 /* the last subdiagonal block has index IEND-1 !!!! */
602
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
140 for (i = 0; i < iwork[k]; ++i) {
603 105 e[i + (iwork[k] + k*l2e)*l1e] /= anorm;
604 }
605 }
606 }
607
608 /* call the block-tridiagonal divide-and-conquer on the */
609 /* irreducible subproblem which has been identified */
610
611
4/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 1 times.
5 PetscCall(BDC_dibtdc_(jobz, nk, nrblks, &ksizes[start], &d[start*l1d*l2d], l1d, l2d,
612 &e[start*l2e*l1e], &iwork[start], l1e, l2e, tau2, &ev[np],
613 &z[np + np*ldz], ldz, work, lwork, &iwork[nblks-1], liwork, info, 1));
614
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
5 PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_LIB,"dsbtdc: Error in DIBTDC, info = %" PetscBLASInt_FMT,*info);
615
616 /* ************************************************************************** */
617
618 /* Scale back the computed eigenvalues. */
619
620
10/20
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
5 PetscCallBLAS("LAPACKlascl",LAPACKlascl_("G", &zero, &zero, &done,
621 &anorm, &nk, &one, &ev[np], &nk, info));
622
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
5 SlepcCheckLapackInfo("lascl",*info);
623
624 5 start = iend + 1;
625 5 np += nk;
626
627 /* Go to the next irreducible subproblem. */
628
629 5 goto L10;
630 }
631
632 /* ....If the problem split any number of times, then the eigenvalues */
633 /* will not be properly ordered. Here we permute the eigenvalues */
634 /* (and the associated eigenvectors) across the irreducible parts */
635 /* into ascending order.... */
636
637 /* IF(NRBLKS.LT.NBLKS)THEN */
638
639 /* Use Selection Sort to minimize swaps of eigenvectors */
640
641
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
120 for (ii = 1; ii < n; ++ii) {
642 115 i = ii;
643 115 k = i;
644 115 p = ev[i];
645
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
1495 for (j = ii; j < n; ++j) {
646
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
1380 if (ev[j] < p) {
647 k = j;
648 p = ev[j];
649 }
650 }
651
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
115 if (k != i) {
652 ev[k] = ev[i];
653 ev[i] = p;
654
0/20
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
115 PetscCallBLAS("BLASswap",BLASswap_(&n, &z[i*ldz], &one, &z[k*ldz], &one));
655 }
656 }
657
658 /* ...Compute MINGAP (minimum difference between neighboring eigenvalue */
659 /* approximations).............................................. */
660
661 5 *mingap = ev[1] - ev[0];
662
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
5 PetscCheck(*mingap>=0.,PETSC_COMM_SELF,PETSC_ERR_LIB,"dsbtdc: Eigenvalue approximations are not ordered properly. Approximation 1 is larger than approximation 2.");
663 5 *mingapi = 1;
664
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
115 for (i = 2; i < n; ++i) {
665 110 absdiff = ev[i] - ev[i-1];
666
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
110 PetscCheck(absdiff>=0.,PETSC_COMM_SELF,PETSC_ERR_LIB,"dsbtdc: Eigenvalue approximations are not ordered properly. Approximation %" PetscBLASInt_FMT " is larger than approximation %" PetscBLASInt_FMT ".",i,i+1);
667
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
110 if (absdiff < *mingap) {
668 5 *mingap = absdiff;
669 5 *mingapi = i;
670 }
671 }
672
673 /* check whether the minimum gap between eigenvalue approximations */
674 /* may indicate severe inaccuracies in the eigenvector approximations */
675
676
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
5 if (*mingap <= tol / 10) *info = -103;
677
6/12
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
1 PetscFunctionReturn(PETSC_SUCCESS);
678 }
679