Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3 | SLEPc - Scalable Library for Eigenvalue Problem Computations | ||
4 | Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain | ||
5 | |||
6 | This file is part of SLEPc. | ||
7 | SLEPc is distributed under a 2-clause BSD license (see LICENSE). | ||
8 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
9 | */ | ||
10 | /* | ||
11 | BDC - Block-divide and conquer (see description in README file) | ||
12 | */ | ||
13 | |||
14 | #include <slepc/private/dsimpl.h> | ||
15 | #include <slepcblaslapack.h> | ||
16 | |||
17 | 105 | PetscErrorCode BDC_dsrtdf_(PetscBLASInt *k,PetscBLASInt n,PetscBLASInt n1, | |
18 | PetscReal *d,PetscReal *q,PetscBLASInt ldq,PetscBLASInt *indxq, | ||
19 | PetscReal *rho,PetscReal *z,PetscReal *dlamda,PetscReal *w, | ||
20 | PetscReal *q2,PetscBLASInt *indx,PetscBLASInt *indxc,PetscBLASInt *indxp, | ||
21 | PetscBLASInt *coltyp,PetscReal reltol,PetscBLASInt *dz,PetscBLASInt *de, | ||
22 | PetscBLASInt *info) | ||
23 | { | ||
24 | /* -- Routine written in LAPACK Version 3.0 style -- */ | ||
25 | /* *************************************************** */ | ||
26 | /* Written by */ | ||
27 | /* Michael Moldaschl and Wilfried Gansterer */ | ||
28 | /* University of Vienna */ | ||
29 | /* last modification: March 16, 2014 */ | ||
30 | |||
31 | /* Small adaptations of original code written by */ | ||
32 | /* Wilfried Gansterer and Bob Ward, */ | ||
33 | /* Department of Computer Science, University of Tennessee */ | ||
34 | /* see https://doi.org/10.1137/S1064827501399432 */ | ||
35 | /* *************************************************** */ | ||
36 | |||
37 | /* Purpose */ | ||
38 | /* ======= */ | ||
39 | |||
40 | /* DSRTDF merges the two sets of eigenvalues of a rank-one modified */ | ||
41 | /* diagonal matrix D+ZZ^T together into a single sorted set. Then it tries */ | ||
42 | /* to deflate the size of the problem. */ | ||
43 | /* There are two ways in which deflation can occur: when two or more */ | ||
44 | /* eigenvalues of D are close together or if there is a tiny entry in the */ | ||
45 | /* Z vector. For each such occurrence the order of the related secular */ | ||
46 | /* equation problem is reduced by one. */ | ||
47 | |||
48 | /* Arguments */ | ||
49 | /* ========= */ | ||
50 | |||
51 | /* K (output) INTEGER */ | ||
52 | /* The number of non-deflated eigenvalues, and the order of the */ | ||
53 | /* related secular equation. 0 <= K <=N. */ | ||
54 | |||
55 | /* N (input) INTEGER */ | ||
56 | /* The dimension of the diagonal matrix. N >= 0. */ | ||
57 | |||
58 | /* N1 (input) INTEGER */ | ||
59 | /* The location of the last eigenvalue in the leading sub-matrix. */ | ||
60 | /* min(1,N) <= N1 <= max(1,N). */ | ||
61 | |||
62 | /* D (input/output) DOUBLE PRECISION array, dimension (N) */ | ||
63 | /* On entry, D contains the eigenvalues of the two submatrices to */ | ||
64 | /* be combined. */ | ||
65 | /* On exit, D contains the trailing (N-K) updated eigenvalues */ | ||
66 | /* (those which were deflated) sorted into increasing order. */ | ||
67 | |||
68 | /* Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) */ | ||
69 | /* On entry, Q contains the eigenvectors of two submatrices in */ | ||
70 | /* the two square blocks with corners at (1,1), (N1,N1) */ | ||
71 | /* and (N1+1, N1+1), (N,N). */ | ||
72 | /* On exit, Q contains the trailing (N-K) updated eigenvectors */ | ||
73 | /* (those which were deflated) in its last N-K columns. */ | ||
74 | |||
75 | /* LDQ (input) INTEGER */ | ||
76 | /* The leading dimension of the array Q. LDQ >= max(1,N). */ | ||
77 | |||
78 | /* INDXQ (input/output) INTEGER array, dimension (N) */ | ||
79 | /* The permutation which separately sorts the two sub-problems */ | ||
80 | /* in D into ascending order. Note that elements in the second */ | ||
81 | /* half of this permutation must first have N1 added to their */ | ||
82 | /* values. Destroyed on exit. */ | ||
83 | |||
84 | /* RHO (input/output) DOUBLE PRECISION */ | ||
85 | /* On entry, the off-diagonal element associated with the rank-1 */ | ||
86 | /* cut which originally split the two submatrices which are now */ | ||
87 | /* being recombined. */ | ||
88 | /* On exit, RHO has been modified to the value required by */ | ||
89 | /* DLAED3M (made positive and multiplied by two, such that */ | ||
90 | /* the modification vector has norm one). */ | ||
91 | |||
92 | /* Z (input/output) DOUBLE PRECISION array, dimension (N) */ | ||
93 | /* On entry, Z contains the updating vector. */ | ||
94 | /* On exit, the contents of Z has been destroyed by the updating */ | ||
95 | /* process. */ | ||
96 | |||
97 | /* DLAMDA (output) DOUBLE PRECISION array, dimension (N) */ | ||
98 | /* A copy of the first K non-deflated eigenvalues which */ | ||
99 | /* will be used by DLAED3M to form the secular equation. */ | ||
100 | |||
101 | /* W (output) DOUBLE PRECISION array, dimension (N) */ | ||
102 | /* The first K values of the final deflation-altered z-vector */ | ||
103 | /* which will be passed to DLAED3M. */ | ||
104 | |||
105 | /* Q2 (output) DOUBLE PRECISION array, dimension */ | ||
106 | /* (N*N) (if everything is deflated) or */ | ||
107 | /* (N1*(COLTYP(1)+COLTYP(2)) + (N-N1)*(COLTYP(2)+COLTYP(3))) */ | ||
108 | /* (if not everything is deflated) */ | ||
109 | /* If everything is deflated, then N*N intermediate workspace */ | ||
110 | /* is needed in Q2. */ | ||
111 | /* If not everything is deflated, Q2 contains on exit a copy of the */ | ||
112 | /* first K non-deflated eigenvectors which will be used by */ | ||
113 | /* DLAED3M in a matrix multiply (DGEMM) to accumulate the new */ | ||
114 | /* eigenvectors, followed by the N-K deflated eigenvectors. */ | ||
115 | |||
116 | /* INDX (workspace) INTEGER array, dimension (N) */ | ||
117 | /* The permutation used to sort the contents of DLAMDA into */ | ||
118 | /* ascending order. */ | ||
119 | |||
120 | /* INDXC (output) INTEGER array, dimension (N) */ | ||
121 | /* The permutation used to arrange the columns of the deflated */ | ||
122 | /* Q matrix into three groups: columns in the first group contain */ | ||
123 | /* non-zero elements only at and above N1 (type 1), columns in the */ | ||
124 | /* second group are dense (type 2), and columns in the third group */ | ||
125 | /* contain non-zero elements only below N1 (type 3). */ | ||
126 | |||
127 | /* INDXP (workspace) INTEGER array, dimension (N) */ | ||
128 | /* The permutation used to place deflated values of D at the end */ | ||
129 | /* of the array. INDXP(1:K) points to the nondeflated D-values */ | ||
130 | /* and INDXP(K+1:N) points to the deflated eigenvalues. */ | ||
131 | |||
132 | /* COLTYP (workspace/output) INTEGER array, dimension (N) */ | ||
133 | /* During execution, a label which will indicate which of the */ | ||
134 | /* following types a column in the Q2 matrix is: */ | ||
135 | /* 1 : non-zero in the upper half only; */ | ||
136 | /* 2 : dense; */ | ||
137 | /* 3 : non-zero in the lower half only; */ | ||
138 | /* 4 : deflated. */ | ||
139 | /* On exit, COLTYP(i) is the number of columns of type i, */ | ||
140 | /* for i=1 to 4 only. */ | ||
141 | |||
142 | /* RELTOL (input) DOUBLE PRECISION */ | ||
143 | /* User specified deflation tolerance. If RELTOL.LT.20*EPS, then */ | ||
144 | /* the standard value used in the original LAPACK routines is used. */ | ||
145 | |||
146 | /* DZ (output) INTEGER, DZ.GE.0 */ | ||
147 | /* counts the deflation due to a small component in the modification */ | ||
148 | /* vector. */ | ||
149 | |||
150 | /* DE (output) INTEGER, DE.GE.0 */ | ||
151 | /* counts the deflation due to close eigenvalues. */ | ||
152 | |||
153 | /* INFO (output) INTEGER */ | ||
154 | /* = 0: successful exit. */ | ||
155 | /* < 0: if INFO = -i, the i-th argument had an illegal value. */ | ||
156 | |||
157 | /* Further Details */ | ||
158 | /* =============== */ | ||
159 | |||
160 | /* Based on code written by */ | ||
161 | /* Wilfried Gansterer and Bob Ward, */ | ||
162 | /* Department of Computer Science, University of Tennessee */ | ||
163 | |||
164 | /* Based on the design of the LAPACK code DLAED2 with modifications */ | ||
165 | /* to allow a block divide and conquer scheme */ | ||
166 | |||
167 | /* DLAED2 was written by Jeff Rutter, Computer Science Division, University */ | ||
168 | /* of California at Berkeley, USA, and modified by Francoise Tisseur, */ | ||
169 | /* University of Tennessee. */ | ||
170 | |||
171 | /* ===================================================================== */ | ||
172 | |||
173 | 105 | PetscReal c, s, t, eps, tau, tol, dmax, dmone = -1.; | |
174 | 105 | PetscBLASInt i, j, i1, k2, n2, ct, nj, pj=0, js, iq1, iq2; | |
175 | 105 | PetscBLASInt psm[4], imax, jmax, ctot[4], factmp=1, one=1; | |
176 | |||
177 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
105 | PetscFunctionBegin; |
178 | 105 | *info = 0; | |
179 | |||
180 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
105 | if (n < 0) *info = -2; |
181 |
2/4✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 5 times.
|
105 | else if (n1 < PetscMin(1,n) || n1 > PetscMax(1,n)) *info = -3; |
182 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
105 | else if (ldq < PetscMax(1,n)) *info = -6; |
183 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
105 | PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %" PetscBLASInt_FMT " in DSRTDF",-(*info)); |
184 | |||
185 | /* Initialize deflation counters */ | ||
186 | |||
187 | 105 | *dz = 0; | |
188 | 105 | *de = 0; | |
189 | |||
190 | /* **************************************************************************** */ | ||
191 | |||
192 | /* Quick return if possible */ | ||
193 | |||
194 |
2/14✓ Branch 0 taken 4 times.
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
|
105 | if (n == 0) PetscFunctionReturn(PETSC_SUCCESS); |
195 | |||
196 | /* **************************************************************************** */ | ||
197 | |||
198 | 105 | n2 = n - n1; | |
199 | |||
200 | /* Modify Z so that RHO is positive. */ | ||
201 | |||
202 |
1/22✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
|
105 | if (*rho < 0.) PetscCallBLAS("BLASscal",BLASscal_(&n2, &dmone, &z[n1], &one)); |
203 | |||
204 | /* Normalize z so that norm2(z) = 1. Since z is the concatenation of */ | ||
205 | /* two normalized vectors, norm2(z) = sqrt(2). (NOTE that this holds also */ | ||
206 | /* here in the approximate block-tridiagonal D&C because the two vectors are */ | ||
207 | /* singular vectors, but it would not necessarily hold in a D&C for a */ | ||
208 | /* general banded matrix !) */ | ||
209 | |||
210 | 105 | t = 1. / PETSC_SQRT2; | |
211 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
105 | PetscCallBLAS("BLASscal",BLASscal_(&n, &t, z, &one)); |
212 | |||
213 | /* NOTE: at this point the value of RHO is modified in order to */ | ||
214 | /* normalize Z: RHO = ABS( norm2(z)**2 * RHO */ | ||
215 | /* in our case: norm2(z) = sqrt(2), and therefore: */ | ||
216 | |||
217 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
105 | *rho = PetscAbs(*rho * 2.); |
218 | |||
219 | /* Sort the eigenvalues into increasing order */ | ||
220 | |||
221 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
285 | for (i = n1; i < n; ++i) indxq[i] += n1; |
222 | |||
223 | /* re-integrate the deflated parts from the last pass */ | ||
224 | |||
225 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
1185 | for (i = 0; i < n; ++i) dlamda[i] = d[indxq[i]-1]; |
226 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
105 | PetscCallBLAS("LAPACKlamrg",LAPACKlamrg_(&n1, &n2, dlamda, &one, &one, indxc)); |
227 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
1185 | for (i = 0; i < n; ++i) indx[i] = indxq[indxc[i]-1]; |
228 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
1185 | for (i = 0; i < n; ++i) indxq[i] = 0; |
229 | |||
230 | /* Calculate the allowable deflation tolerance */ | ||
231 | |||
232 | /* imax = BLASamax_(&n, z, &one); */ | ||
233 | 105 | imax = 1; | |
234 | 105 | dmax = PetscAbsReal(z[0]); | |
235 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
1080 | for (i=1;i<n;i++) { |
236 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
975 | if (PetscAbsReal(z[i])>dmax) { |
237 | 222 | imax = i+1; | |
238 | 222 | dmax = PetscAbsReal(z[i]); | |
239 | } | ||
240 | } | ||
241 | /* jmax = BLASamax_(&n, d, &one); */ | ||
242 | 105 | jmax = 1; | |
243 | 105 | dmax = PetscAbsReal(d[0]); | |
244 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
1080 | for (i=1;i<n;i++) { |
245 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
975 | if (PetscAbsReal(d[i])>dmax) { |
246 | 384 | jmax = i+1; | |
247 | 384 | dmax = PetscAbsReal(d[i]); | |
248 | } | ||
249 | } | ||
250 | |||
251 | 105 | eps = LAPACKlamch_("Epsilon"); | |
252 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
105 | if (reltol < eps * 20) { |
253 | /* use the standard deflation tolerance from the original LAPACK code */ | ||
254 |
6/6✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
✓ Branch 4 taken 5 times.
✓ Branch 5 taken 5 times.
|
190 | tol = eps * 8. * PetscMax(PetscAbs(d[jmax-1]),PetscAbs(z[imax-1])); |
255 | } else { | ||
256 | /* otherwise set TOL to the input parameter RELTOL ! */ | ||
257 | ✗ | tol = reltol * PetscMax(PetscAbs(d[jmax-1]),PetscAbs(z[imax-1])); | |
258 | } | ||
259 | |||
260 | /* If the rank-1 modifier is small enough, no more needs to be done */ | ||
261 | /* except to reorganize Q so that its columns correspond with the */ | ||
262 | /* elements in D. */ | ||
263 | |||
264 |
3/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 5 times.
|
105 | if (*rho * PetscAbs(z[imax-1]) <= tol) { |
265 | |||
266 | /* complete deflation because of small rank-one modifier */ | ||
267 | /* NOTE: in this case we need N*N space in the array Q2 ! */ | ||
268 | |||
269 | ✗ | *dz = n; *k = 0; | |
270 | ✗ | iq2 = 1; | |
271 | ✗ | for (j = 0; j < n; ++j) { | |
272 | ✗ | i = indx[j]; indxq[j] = i; | |
273 | ✗ | PetscCallBLAS("BLAScopy",BLAScopy_(&n, &q[(i-1)*ldq], &one, &q2[iq2-1], &one)); | |
274 | ✗ | dlamda[j] = d[i-1]; | |
275 | ✗ | iq2 += n; | |
276 | } | ||
277 | ✗ | for (j=0;j<n;j++) for (i=0;i<n;i++) q[i+j*ldq] = q2[i+j*n]; | |
278 | ✗ | PetscCallBLAS("BLAScopy",BLAScopy_(&n, dlamda, &one, d, &one)); | |
279 | ✗ | PetscFunctionReturn(PETSC_SUCCESS); | |
280 | } | ||
281 | |||
282 | /* If there are multiple eigenvalues then the problem deflates. Here */ | ||
283 | /* the number of equal eigenvalues is found. As each equal */ | ||
284 | /* eigenvalue is found, an elementary reflector is computed to rotate */ | ||
285 | /* the corresponding eigensubspace so that the corresponding */ | ||
286 | /* components of Z are zero in this new basis. */ | ||
287 | |||
288 | /* initialize the column types */ | ||
289 | |||
290 | /* first N1 columns are initially (before deflation) of type 1 */ | ||
291 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
1005 | for (i = 0; i < n1; ++i) coltyp[i] = 1; |
292 | /* columns N1+1 to N are initially (before deflation) of type 3 */ | ||
293 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
285 | for (i = n1; i < n; ++i) coltyp[i] = 3; |
294 | |||
295 | 105 | *k = 0; | |
296 | 105 | k2 = n + 1; | |
297 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
105 | for (j = 0; j < n; ++j) { |
298 | 105 | nj = indx[j]-1; | |
299 |
3/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 5 times.
|
105 | if (*rho * PetscAbs(z[nj]) <= tol) { |
300 | |||
301 | /* Deflate due to small z component. */ | ||
302 | ✗ | ++(*dz); | |
303 | ✗ | --k2; | |
304 | |||
305 | /* deflated eigenpair, therefore column type 4 */ | ||
306 | ✗ | coltyp[nj] = 4; | |
307 | ✗ | indxp[k2-1] = nj+1; | |
308 | ✗ | indxq[k2-1] = nj+1; | |
309 | ✗ | if (j+1 == n) goto L100; | |
310 | } else { | ||
311 | |||
312 | /* not deflated */ | ||
313 | 105 | pj = nj; | |
314 | 105 | goto L80; | |
315 | } | ||
316 | } | ||
317 | factmp = 1; | ||
318 | L80: | ||
319 | 1080 | ++j; | |
320 | 1080 | nj = indx[j]-1; | |
321 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
1080 | if (j+1 > n) goto L100; |
322 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
|
975 | if (*rho * PetscAbs(z[nj]) <= tol) { |
323 | |||
324 | /* Deflate due to small z component. */ | ||
325 | 186 | ++(*dz); | |
326 | 186 | --k2; | |
327 | 186 | coltyp[nj] = 4; | |
328 | 186 | indxp[k2-1] = nj+1; | |
329 | 186 | indxq[k2-1] = nj+1; | |
330 | } else { | ||
331 | |||
332 | /* Check if eigenvalues are close enough to allow deflation. */ | ||
333 | 789 | s = z[pj]; | |
334 | 789 | c = z[nj]; | |
335 | |||
336 | /* Find sqrt(a**2+b**2) without overflow or */ | ||
337 | /* destructive underflow. */ | ||
338 | |||
339 | 789 | tau = SlepcAbs(c, s); | |
340 | 789 | t = d[nj] - d[pj]; | |
341 | 789 | c /= tau; | |
342 | 789 | s = -s / tau; | |
343 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
|
789 | if (PetscAbs(t * c * s) <= tol) { |
344 | |||
345 | /* Deflate due to close eigenvalues. */ | ||
346 | 94 | ++(*de); | |
347 | 94 | z[nj] = tau; | |
348 | 94 | z[pj] = 0.; | |
349 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 2 times.
|
94 | if (coltyp[nj] != coltyp[pj]) coltyp[nj] = 2; |
350 | |||
351 | /* if deflation happens across the two eigenvector blocks */ | ||
352 | /* (eigenvalues corresponding to different blocks), then */ | ||
353 | /* on column in the eigenvector matrix fills up completely */ | ||
354 | /* (changes from type 1 or 3 to type 2) */ | ||
355 | |||
356 | /* eigenpair PJ is deflated, therefore the column type changes */ | ||
357 | /* to 4 */ | ||
358 | |||
359 | 94 | coltyp[pj] = 4; | |
360 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
94 | PetscCallBLAS("BLASrot",BLASrot_(&n, &q[pj*ldq], &one, &q[nj*ldq], &one, &c, &s)); |
361 | 94 | t = d[pj] * (c * c) + d[nj] * (s * s); | |
362 | 94 | d[nj] = d[pj] * (s * s) + d[nj] * (c * c); | |
363 | 94 | d[pj] = t; | |
364 | 94 | --k2; | |
365 | 94 | i = 1; | |
366 | 94 | L90: | |
367 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
94 | if (k2 + i <= n) { |
368 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
75 | if (d[pj] < d[indxp[k2 + i-1]-1]) { |
369 | ✗ | indxp[k2 + i - 2] = indxp[k2 + i - 1]; | |
370 | ✗ | indxq[k2 + i - 2] = indxq[k2 + i - 1]; | |
371 | ✗ | indxp[k2 + i - 1] = pj + 1; | |
372 | ✗ | indxq[k2 + i - 2] = pj + 1; | |
373 | ✗ | ++i; | |
374 | ✗ | goto L90; | |
375 | } else { | ||
376 | 75 | indxp[k2 + i - 2] = pj + 1; | |
377 | 75 | indxq[k2 + i - 2] = pj + 1; | |
378 | } | ||
379 | } else { | ||
380 | 19 | indxp[k2 + i - 2] = pj + 1; | |
381 | 19 | indxq[k2 + i - 2] = pj + 1; | |
382 | } | ||
383 | pj = nj; | ||
384 | factmp = -1; | ||
385 | } else { | ||
386 | |||
387 | /* do not deflate */ | ||
388 | 695 | ++(*k); | |
389 | 695 | dlamda[*k-1] = d[pj]; | |
390 | 695 | w[*k-1] = z[pj]; | |
391 | 695 | indxp[*k-1] = pj + 1; | |
392 | 695 | indxq[*k-1] = pj + 1; | |
393 | 695 | factmp = 1; | |
394 | 695 | pj = nj; | |
395 | } | ||
396 | } | ||
397 | 975 | goto L80; | |
398 | 105 | L100: | |
399 | |||
400 | /* Record the last eigenvalue. */ | ||
401 | 105 | ++(*k); | |
402 | 105 | dlamda[*k-1] = d[pj]; | |
403 | 105 | w[*k-1] = z[pj]; | |
404 | 105 | indxp[*k-1] = pj+1; | |
405 | 105 | indxq[*k-1] = (pj+1) * factmp; | |
406 | |||
407 | /* Count up the total number of the various types of columns, then */ | ||
408 | /* form a permutation which positions the four column types into */ | ||
409 | /* four uniform groups (although one or more of these groups may be */ | ||
410 | /* empty). */ | ||
411 | |||
412 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
525 | for (j = 0; j < 4; ++j) ctot[j] = 0; |
413 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
1185 | for (j = 0; j < n; ++j) { |
414 | 1080 | ct = coltyp[j]; | |
415 | 1080 | ++ctot[ct-1]; | |
416 | } | ||
417 | |||
418 | /* PSM(*) = Position in SubMatrix (of types 1 through 4) */ | ||
419 | 105 | psm[0] = 1; | |
420 | 105 | psm[1] = ctot[0] + 1; | |
421 | 105 | psm[2] = psm[1] + ctot[1]; | |
422 | 105 | psm[3] = psm[2] + ctot[2]; | |
423 | 105 | *k = n - ctot[3]; | |
424 | |||
425 | /* Fill out the INDXC array so that the permutation which it induces */ | ||
426 | /* will place all type-1 columns first, all type-2 columns next, */ | ||
427 | /* then all type-3's, and finally all type-4's. */ | ||
428 | |||
429 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
1185 | for (j = 0; j < n; ++j) { |
430 | 1080 | js = indxp[j]; | |
431 | 1080 | ct = coltyp[js-1]; | |
432 | 1080 | indx[psm[ct - 1]-1] = js; | |
433 | 1080 | indxc[psm[ct - 1]-1] = j+1; | |
434 | 1080 | ++psm[ct - 1]; | |
435 | } | ||
436 | |||
437 | /* Sort the eigenvalues and corresponding eigenvectors into DLAMDA */ | ||
438 | /* and Q2 respectively. The eigenvalues/vectors which were not */ | ||
439 | /* deflated go into the first K slots of DLAMDA and Q2 respectively, */ | ||
440 | /* while those which were deflated go into the last N - K slots. */ | ||
441 | |||
442 | 105 | i = 0; | |
443 | 105 | iq1 = 0; | |
444 | 105 | iq2 = (ctot[0] + ctot[1]) * n1; | |
445 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
725 | for (j = 0; j < ctot[0]; ++j) { |
446 | 620 | js = indx[i]; | |
447 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
620 | PetscCallBLAS("BLAScopy",BLAScopy_(&n1, &q[(js-1)*ldq], &one, &q2[iq1], &one)); |
448 | 620 | z[i] = d[js-1]; | |
449 | 620 | ++i; | |
450 | 620 | iq1 += n1; | |
451 | } | ||
452 | |||
453 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
195 | for (j = 0; j < ctot[1]; ++j) { |
454 | 90 | js = indx[i]; | |
455 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
90 | PetscCallBLAS("BLAScopy",BLAScopy_(&n1, &q[(js-1)*ldq], &one, &q2[iq1], &one)); |
456 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
90 | PetscCallBLAS("BLAScopy",BLAScopy_(&n2, &q[n1+(js-1)*ldq], &one, &q2[iq2], &one)); |
457 | 90 | z[i] = d[js-1]; | |
458 | 90 | ++i; | |
459 | 90 | iq1 += n1; | |
460 | 90 | iq2 += n2; | |
461 | } | ||
462 | |||
463 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
195 | for (j = 0; j < ctot[2]; ++j) { |
464 | 90 | js = indx[i]; | |
465 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
90 | PetscCallBLAS("BLAScopy",BLAScopy_(&n2, &q[n1+(js-1)*ldq], &one, &q2[iq2], &one)); |
466 | 90 | z[i] = d[js-1]; | |
467 | 90 | ++i; | |
468 | 90 | iq2 += n2; | |
469 | } | ||
470 | |||
471 | 385 | iq1 = iq2; | |
472 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
385 | for (j = 0; j < ctot[3]; ++j) { |
473 | 280 | js = indx[i]; | |
474 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
280 | PetscCallBLAS("BLAScopy",BLAScopy_(&n, &q[(js-1)*ldq], &one, &q2[iq2], &one)); |
475 | 280 | iq2 += n; | |
476 | 280 | z[i] = d[js-1]; | |
477 | 280 | ++i; | |
478 | } | ||
479 | |||
480 | /* The deflated eigenvalues and their corresponding vectors go back */ | ||
481 | /* into the last N - K slots of D and Q respectively. */ | ||
482 | |||
483 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
|
5305 | for (j=0;j<ctot[3];j++) for (i=0;i<n;i++) q[i+(j+*k)*ldq] = q2[iq1+i+j*n]; |
484 | 105 | i1 = n - *k; | |
485 |
10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
105 | PetscCallBLAS("BLAScopy",BLAScopy_(&i1, &z[*k], &one, &d[*k], &one)); |
486 | |||
487 | /* Copy CTOT into COLTYP for referencing in DLAED3M. */ | ||
488 | |||
489 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
525 | for (j = 0; j < 4; ++j) coltyp[j] = ctot[j]; |
490 |
6/12✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
|
21 | PetscFunctionReturn(PETSC_SUCCESS); |
491 | } | ||
492 |