GCC Code Coverage Report


Directory: ./
File: src/sys/classes/ds/impls/hep/bdc/dmerg2.c
Date: 2025-10-04 04:19:13
Exec Total Coverage
Lines: 53 54 98.1%
Functions: 1 1 100.0%
Branches: 84 180 46.7%

Line Branch Exec Source
1 /*
2 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3 SLEPc - Scalable Library for Eigenvalue Problem Computations
4 Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
5
6 This file is part of SLEPc.
7 SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9 */
10 /*
11 BDC - Block-divide and conquer (see description in README file)
12 */
13
14 #include <slepc/private/dsimpl.h>
15 #include <slepcblaslapack.h>
16
17 105 PetscErrorCode BDC_dmerg2_(const char *jobz,PetscBLASInt rkct,PetscBLASInt n,
18 PetscReal *ev,PetscReal *q,PetscBLASInt ldq,PetscBLASInt *indxq,
19 PetscReal *rho,PetscReal *u,PetscBLASInt sbrkp1,PetscReal *v,
20 PetscBLASInt sbrk,PetscBLASInt cutpnt,PetscReal *work,PetscBLASInt lwork,
21 PetscBLASInt *iwork,PetscReal tol,PetscBLASInt *info,PetscBLASInt jobz_len)
22 {
23 /* -- Routine written in LAPACK Version 3.0 style -- */
24 /* *************************************************** */
25 /* Written by */
26 /* Michael Moldaschl and Wilfried Gansterer */
27 /* University of Vienna */
28 /* last modification: March 16, 2014 */
29
30 /* Small adaptations of original code written by */
31 /* Wilfried Gansterer and Bob Ward, */
32 /* Department of Computer Science, University of Tennessee */
33 /* see https://doi.org/10.1137/S1064827501399432 */
34 /* *************************************************** */
35
36 /* Purpose */
37 /* ======= */
38
39 /* DMERG2 computes the updated eigensystem of a diagonal matrix after */
40 /* modification by a rank-one symmetric matrix. The diagonal matrix */
41 /* consists of two diagonal submatrices, and the vectors defining the */
42 /* rank-1 matrix similarly have two underlying subvectors each. */
43 /* The dimension of the first subproblem is CUTPNT, the dimension of */
44 /* the second subproblem is N-CUTPNT. */
45
46 /* T = Q(in) (EV(in) + RHO * Z*Z') Q'(in) = Q(out) * EV(out) * Q'(out) */
47
48 /* where Z = Q'[V U']', where V is a row vector and U is a column */
49 /* vector with dimensions corresponding to the two underlying */
50 /* subproblems. */
51
52 /* The eigenvectors of the original matrix are stored in Q, and the */
53 /* eigenvalues in EV. The algorithm consists of three stages: */
54
55 /* The first stage consists of deflating the size of the problem */
56 /* when there are multiple eigenvalues or if there is a zero in */
57 /* the Z vector. For each such occurrence the dimension of the */
58 /* secular equation problem is reduced by one. This stage is */
59 /* performed by the routine DSRTDF. */
60
61 /* The second stage consists of calculating the updated */
62 /* eigenvalues. This is done by finding the roots of the secular */
63 /* equation via the routine DLAED4 (as called by DLAED3M). */
64 /* This routine also calculates the eigenvectors of the current */
65 /* problem. */
66
67 /* If(JOBZ.EQ.'D') then the final stage consists */
68 /* of computing the updated eigenvectors directly using the updated */
69 /* eigenvalues. The eigenvectors for the current problem are multiplied */
70 /* with the eigenvectors from the overall problem. */
71
72 /* Arguments */
73 /* ========= */
74
75 /* JOBZ (input) CHARACTER*1 */
76 /* = 'N': Compute eigenvalues only (not implemented); */
77 /* = 'D': Compute eigenvalues and eigenvectors. */
78 /* Eigenvectors are accumulated in the divide-and-conquer */
79 /* process. */
80
81 /* RKCT (input) INTEGER */
82 /* The number of the rank modification which is accounted for */
83 /* (RKCT >= 1). Required parameter, because the update operation of the */
84 /* modification vector can be performed much more efficiently */
85 /* if RKCT.EQ.1. In that case, the eigenvector matrix is still */
86 /* block-diagonal. For RKCT.GE.2 the eigenvector matrix for the update */
87 /* operation has filled up and is a full matrix. */
88
89 /* N (input) INTEGER */
90 /* The dimension of the symmetric block tridiagonal matrix. */
91 /* N >= 0. */
92
93 /* EV (input/output) DOUBLE PRECISION array, dimension (N) */
94 /* On entry, the eigenvalues (=diagonal values) of the */
95 /* rank-1-perturbed matrix. */
96 /* On exit, the eigenvalues of the repaired matrix. */
97
98 /* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) */
99 /* On entry, the eigenvectors of the rank-1-perturbed matrix. */
100 /* On exit, the eigenvectors of the repaired tridiagonal matrix. */
101
102 /* LDQ (input) INTEGER */
103 /* The leading dimension of the array Q. LDQ >= max(1,N). */
104
105 /* INDXQ (input/output) INTEGER array, dimension (N) */
106 /* On entry, the permutation which separately sorts the two */
107 /* subproblems in EV into ascending order. */
108 /* On exit, the permutation which will reintegrate the */
109 /* subproblems back into sorted order, */
110 /* i.e. EV(INDXQ(I = 1, N)) will be in ascending order. */
111
112 /* RHO (input/output) DOUBLE PRECISION */
113 /* The scalar in the rank-1 perturbation. Modified (multiplied */
114 /* by 2) in DSRTDF. */
115
116 /* U (input) DOUBLE PRECISION array; dimension (SBRKP1), where SBRKP1 */
117 /* is the size of the first (original) block after CUTPNT. */
118 /* The column vector of the rank-1 subdiagonal connecting the */
119 /* two diagonal subproblems. */
120 /* Theoretically, zero entries might have to be appended after U */
121 /* in order to make it have dimension (N-CUTPNT). However, this */
122 /* is not required because it can be accounted for in the */
123 /* matrix-vector product using the argument SBRKP1. */
124
125 /* SBRKP1 (input) INTEGER */
126 /* Dimension of the relevant (non-zero) part of the vector U. */
127 /* Equal to the size of the first original block after the */
128 /* breakpoint. */
129
130 /* V (input) DOUBLE PRECISION array; dimension (SBRK), where SBRK */
131 /* is the size of the last (original) block before CUTPNT. */
132 /* The row vector of the rank-1 subdiagonal connecting the two */
133 /* diagonal subproblems. */
134 /* Theoretically, zero entries might have to be inserted in front */
135 /* of V in order to make it have dimension (CUTPNT). However, this */
136 /* is not required because it can be accounted for in the */
137 /* matrix-vector product using the argument SBRK. */
138
139 /* SBRK (input) INTEGER */
140 /* Dimension of the relevant (non-zero) part of the vector V. */
141 /* Equal to the size of the last original block before the */
142 /* breakpoint. */
143
144 /* CUTPNT (input) INTEGER */
145 /* The location of the last eigenvalue of the leading diagonal */
146 /* block. min(1,N) <= CUTPNT <= max(1,N). */
147
148 /* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK) */
149
150 /* LWORK (input) INTEGER */
151 /* The dimension of the array WORK. */
152 /* In order to guarantee correct results in all cases, */
153 /* LWORK must be at least (2*N**2 + 3*N). In many cases, */
154 /* less workspace is required. The absolute minimum required is */
155 /* (N**2 + 3*N). */
156 /* If the workspace provided is not sufficient, the routine will */
157 /* return a corresponding error code and report how much workspace */
158 /* was missing (see INFO). */
159 /* NOTE: This parameter is needed for determining whether enough */
160 /* workspace is provided, and, if not, for computing how */
161 /* much workspace is needed. */
162
163 /* IWORK (workspace) INTEGER array, dimension (4*N) */
164
165 /* TOL (input) DOUBLE PRECISION */
166 /* User specified deflation tolerance for the routine DSRTDF. */
167
168 /* INFO (output) INTEGER */
169 /* = 0: successful exit. */
170 /* < -200: not enough workspace */
171 /* ABS(INFO + 200) numbers have to be stored in addition */
172 /* to the workspace provided, otherwise some eigenvectors */
173 /* will be incorrect. */
174 /* < 0, > -99: if INFO.EQ.-i, the i-th argument had an */
175 /* illegal value. */
176 /* > 0: if INFO.EQ.1, an eigenvalue did not converge */
177 /* if INFO.EQ.2, the deflation counters DZ and DE do not sum */
178 /* up to the total number N-K of components */
179 /* deflated */
180
181 /* Further Details */
182 /* =============== */
183
184 /* Based on code written by */
185 /* Wilfried Gansterer and Bob Ward, */
186 /* Department of Computer Science, University of Tennessee */
187
188 /* Based on the design of the LAPACK code Dlaed1.f written by Jeff */
189 /* Rutter, Computer Science Division, University of California at */
190 /* Berkeley, and modified by Francoise Tisseur, University of Tennessee. */
191
192 /* ===================================================================== */
193
194 105 PetscBLASInt i, k, n1, n2, de, is, dz, iw, iz, iq2, nmc, cpp1;
195 105 PetscBLASInt indx, indxc, indxp, lwmin, idlmda;
196 105 PetscBLASInt spneed, coltyp, tmpcut, i__1, i__2, one=1, mone=-1;
197 105 char defl[1];
198 105 PetscReal done = 1.0, dzero = 0.0;
199
200
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
105 PetscFunctionBegin;
201 105 *info = 0;
202 105 lwmin = n*n + n * 3;
203
204
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
105 if (n < 0) *info = -3;
205
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
105 else if (ldq < PetscMax(1,n)) *info = -6;
206
2/4
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 5 times.
105 else if (cutpnt < PetscMin(1,n) || cutpnt > PetscMax(1,n)) *info = -13;
207
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
105 else if (lwork < lwmin) *info = -15;
208
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
105 PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %" PetscBLASInt_FMT " in DMERG2",-(*info));
209
210 /* **************************************************************************** */
211
212 /* Quick return if possible */
213
214
2/14
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
105 if (n == 0) PetscFunctionReturn(PETSC_SUCCESS);
215
216 /* **************************************************************************** */
217
218 /* The following values are integer pointers which indicate */
219 /* the portion of the workspace used by a particular array in DSRTDF */
220 /* and DLAED3M. */
221
222 105 iz = 0;
223 105 idlmda = iz + n;
224 105 iw = idlmda + n;
225 105 iq2 = iw + n;
226 105 is = iq2 + n * n;
227
228 /* After the pointer IS the matrix S is stored and read in WORK */
229 /* in the routine DLAED3M. */
230
231 105 indx = 0;
232 105 indxc = indx + n;
233 105 coltyp = indxc + n;
234 105 indxp = coltyp + n;
235
236 /* If eigenvectors are to be accumulated in the divide-and-conquer */
237 /* process (JOBZ.EQ.'D') form the z-vector which consists of */
238 /* Q_1^T * V and Q_2^T * U. */
239
240 105 cpp1 = cutpnt + 1;
241
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
105 if (rkct == 1) {
242
243 /* for the first rank modification the eigenvector matrix has */
244 /* special block-diagonal structure and therefore Q_1^T * V and */
245 /* Q_2^T * U can be formed separately. */
246
247
10/20
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
35 PetscCallBLAS("BLASgemv",BLASgemv_("T", &sbrk, &cutpnt, &done,
248 &q[cutpnt - sbrk], &ldq, v, &one, &dzero, &work[iz], &one));
249 35 nmc = n - cutpnt;
250
10/20
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
35 PetscCallBLAS("BLASgemv",BLASgemv_("T", &sbrkp1, &nmc, &done,
251 &q[cpp1-1 + (cpp1-1)*ldq], &ldq, u,
252 &one, &dzero, &work[iz + cutpnt], &one));
253
254 } else {
255
256 /* for the higher rank modifications, the vectors V and U */
257 /* have to be multiplied with the full eigenvector matrix */
258
259
10/20
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
70 PetscCallBLAS("BLASgemv",BLASgemv_("T", &sbrk, &n, &done,
260 &q[cutpnt - sbrk], &ldq, v, &one, &dzero, &work[iz], &one));
261
10/20
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
70 PetscCallBLAS("BLASgemv",BLASgemv_("T", &sbrkp1, &n, &done, &q[cpp1-1],
262 &ldq, u, &one, &done, &work[iz], &one));
263
264 }
265
266 /* **************************************************************************** */
267
268 /* Deflate eigenvalues. */
269
270
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
105 if (rkct == 1) {
271
272 /* for the first rank modification we need the actual cutpoint */
273
274 35 n1 = cutpnt;
275 35 tmpcut = cutpnt;
276 } else {
277
278 /* for the later rank modifications there is no actual cutpoint any more */
279
280 70 n1 = n;
281
282 /* The original value of CUTPNT has to be preserved for the next time */
283 /* this subroutine is called (therefore, CUTPNT is an INPUT parameter */
284 /* and not to be changed). Thus, assign N to TMPCUT and use the local */
285 /* variable TMPCUT from now on for the cut point. */
286
287 70 tmpcut = n;
288 }
289
290 /* initialize the flag DEFL (indicates whether deflation occurred - */
291 /* this information is needed later in DLAED3M) */
292
293 105 *(unsigned char *)defl = '0';
294
295 /* call DSRTDF for deflation */
296
297
4/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 1 times.
105 PetscCall(BDC_dsrtdf_(&k, n, n1, ev, q, ldq, indxq, rho, &work[iz],
298 &work[idlmda], &work[iw], &work[iq2], &iwork[indx],
299 &iwork[indxc], &iwork[indxp], &iwork[coltyp], tol, &dz, &de, info));
300
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
105 PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_LIB,"dmerg2: error in dsrtdf, info = %" PetscBLASInt_FMT,*info);
301
302
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
105 if (k < n) {
303
304 /* ....some deflation occurred in dsrtdf, set the flag DEFL */
305 /* (needed in DLAED3M.f, since Givens rotations need to be */
306 /* applied to the eigenvector matrix only if some deflation */
307 /* happened) */
308
309 45 *(unsigned char *)defl = '1';
310 }
311
312 /* **************************************************************************** */
313
314 /* Solve the Secular Equation. */
315
316
1/2
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
105 if (k != 0) {
317
318 /* ....not everything was deflated. */
319
320 /* ....check whether enough workspace is available: */
321
322 /* Note that the following (upper) bound SPNEED for the workspace */
323 /* requirements should also hold in the extreme case TMPCUT=N, */
324 /* which happens for every rank modification after the first one. */
325
326 105 i__1 = (iwork[coltyp] + iwork[coltyp+1]) * k;
327 105 i__2 = (iwork[coltyp+1] + iwork[coltyp + 2]) * k;
328 105 spneed = is + PetscMax(i__1,i__2) - 1;
329
330
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
105 PetscCheck(spneed<=lwork,PETSC_COMM_SELF,PETSC_ERR_MEM,"dmerg2: Workspace needed exceeds the workspace provided by %" PetscBLASInt_FMT " numbers",spneed-lwork);
331
332 /* calling DLAED3M for solving the secular equation. */
333
334
4/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 1 times.
105 PetscCall(BDC_dlaed3m_(jobz, defl, k, n, tmpcut, ev, q, ldq,
335 *rho, &work[idlmda], &work[iq2], &iwork[indxc], &iwork[coltyp],
336 &work[iw], &work[is], info, 1, 1));
337
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
105 PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_LIB,"dmerg2: error in dlaed3m, info = %" PetscBLASInt_FMT,*info);
338
339 /* Prepare the INDXQ sorting permutation. */
340
341 105 n1 = k;
342 105 n2 = n - k;
343
10/20
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
105 PetscCallBLAS("LAPACKlamrg",LAPACKlamrg_(&n1, &n2, ev, &one, &mone, indxq));
344
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
105 if (k == 0) for (i = 0; i < n; ++i) indxq[i] = i+1;
345
346 } else {
347
348 /* ....everything was deflated (K.EQ.0) */
349
350 for (i = 0; i < n; ++i) indxq[i] = i+1;
351 }
352
6/12
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
21 PetscFunctionReturn(PETSC_SUCCESS);
353 }
354