Line data Source code
1 : /*
2 : - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3 : SLEPc - Scalable Library for Eigenvalue Problem Computations
4 : Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
5 :
6 : This file is part of SLEPc.
7 : SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8 : - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9 : */
10 : /*
11 : BDC - Block-divide and conquer (see description in README file)
12 : */
13 :
14 : #include <slepc/private/dsimpl.h>
15 : #include <slepcblaslapack.h>
16 :
17 21 : PetscErrorCode BDC_dsrtdf_(PetscBLASInt *k,PetscBLASInt n,PetscBLASInt n1,
18 : PetscReal *d,PetscReal *q,PetscBLASInt ldq,PetscBLASInt *indxq,
19 : PetscReal *rho,PetscReal *z,PetscReal *dlamda,PetscReal *w,
20 : PetscReal *q2,PetscBLASInt *indx,PetscBLASInt *indxc,PetscBLASInt *indxp,
21 : PetscBLASInt *coltyp,PetscReal reltol,PetscBLASInt *dz,PetscBLASInt *de,
22 : PetscBLASInt *info)
23 : {
24 : /* -- Routine written in LAPACK Version 3.0 style -- */
25 : /* *************************************************** */
26 : /* Written by */
27 : /* Michael Moldaschl and Wilfried Gansterer */
28 : /* University of Vienna */
29 : /* last modification: March 16, 2014 */
30 :
31 : /* Small adaptations of original code written by */
32 : /* Wilfried Gansterer and Bob Ward, */
33 : /* Department of Computer Science, University of Tennessee */
34 : /* see https://doi.org/10.1137/S1064827501399432 */
35 : /* *************************************************** */
36 :
37 : /* Purpose */
38 : /* ======= */
39 :
40 : /* DSRTDF merges the two sets of eigenvalues of a rank-one modified */
41 : /* diagonal matrix D+ZZ^T together into a single sorted set. Then it tries */
42 : /* to deflate the size of the problem. */
43 : /* There are two ways in which deflation can occur: when two or more */
44 : /* eigenvalues of D are close together or if there is a tiny entry in the */
45 : /* Z vector. For each such occurrence the order of the related secular */
46 : /* equation problem is reduced by one. */
47 :
48 : /* Arguments */
49 : /* ========= */
50 :
51 : /* K (output) INTEGER */
52 : /* The number of non-deflated eigenvalues, and the order of the */
53 : /* related secular equation. 0 <= K <=N. */
54 :
55 : /* N (input) INTEGER */
56 : /* The dimension of the diagonal matrix. N >= 0. */
57 :
58 : /* N1 (input) INTEGER */
59 : /* The location of the last eigenvalue in the leading sub-matrix. */
60 : /* min(1,N) <= N1 <= max(1,N). */
61 :
62 : /* D (input/output) DOUBLE PRECISION array, dimension (N) */
63 : /* On entry, D contains the eigenvalues of the two submatrices to */
64 : /* be combined. */
65 : /* On exit, D contains the trailing (N-K) updated eigenvalues */
66 : /* (those which were deflated) sorted into increasing order. */
67 :
68 : /* Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) */
69 : /* On entry, Q contains the eigenvectors of two submatrices in */
70 : /* the two square blocks with corners at (1,1), (N1,N1) */
71 : /* and (N1+1, N1+1), (N,N). */
72 : /* On exit, Q contains the trailing (N-K) updated eigenvectors */
73 : /* (those which were deflated) in its last N-K columns. */
74 :
75 : /* LDQ (input) INTEGER */
76 : /* The leading dimension of the array Q. LDQ >= max(1,N). */
77 :
78 : /* INDXQ (input/output) INTEGER array, dimension (N) */
79 : /* The permutation which separately sorts the two sub-problems */
80 : /* in D into ascending order. Note that elements in the second */
81 : /* half of this permutation must first have N1 added to their */
82 : /* values. Destroyed on exit. */
83 :
84 : /* RHO (input/output) DOUBLE PRECISION */
85 : /* On entry, the off-diagonal element associated with the rank-1 */
86 : /* cut which originally split the two submatrices which are now */
87 : /* being recombined. */
88 : /* On exit, RHO has been modified to the value required by */
89 : /* DLAED3M (made positive and multiplied by two, such that */
90 : /* the modification vector has norm one). */
91 :
92 : /* Z (input/output) DOUBLE PRECISION array, dimension (N) */
93 : /* On entry, Z contains the updating vector. */
94 : /* On exit, the contents of Z has been destroyed by the updating */
95 : /* process. */
96 :
97 : /* DLAMDA (output) DOUBLE PRECISION array, dimension (N) */
98 : /* A copy of the first K non-deflated eigenvalues which */
99 : /* will be used by DLAED3M to form the secular equation. */
100 :
101 : /* W (output) DOUBLE PRECISION array, dimension (N) */
102 : /* The first K values of the final deflation-altered z-vector */
103 : /* which will be passed to DLAED3M. */
104 :
105 : /* Q2 (output) DOUBLE PRECISION array, dimension */
106 : /* (N*N) (if everything is deflated) or */
107 : /* (N1*(COLTYP(1)+COLTYP(2)) + (N-N1)*(COLTYP(2)+COLTYP(3))) */
108 : /* (if not everything is deflated) */
109 : /* If everything is deflated, then N*N intermediate workspace */
110 : /* is needed in Q2. */
111 : /* If not everything is deflated, Q2 contains on exit a copy of the */
112 : /* first K non-deflated eigenvectors which will be used by */
113 : /* DLAED3M in a matrix multiply (DGEMM) to accumulate the new */
114 : /* eigenvectors, followed by the N-K deflated eigenvectors. */
115 :
116 : /* INDX (workspace) INTEGER array, dimension (N) */
117 : /* The permutation used to sort the contents of DLAMDA into */
118 : /* ascending order. */
119 :
120 : /* INDXC (output) INTEGER array, dimension (N) */
121 : /* The permutation used to arrange the columns of the deflated */
122 : /* Q matrix into three groups: columns in the first group contain */
123 : /* non-zero elements only at and above N1 (type 1), columns in the */
124 : /* second group are dense (type 2), and columns in the third group */
125 : /* contain non-zero elements only below N1 (type 3). */
126 :
127 : /* INDXP (workspace) INTEGER array, dimension (N) */
128 : /* The permutation used to place deflated values of D at the end */
129 : /* of the array. INDXP(1:K) points to the nondeflated D-values */
130 : /* and INDXP(K+1:N) points to the deflated eigenvalues. */
131 :
132 : /* COLTYP (workspace/output) INTEGER array, dimension (N) */
133 : /* During execution, a label which will indicate which of the */
134 : /* following types a column in the Q2 matrix is: */
135 : /* 1 : non-zero in the upper half only; */
136 : /* 2 : dense; */
137 : /* 3 : non-zero in the lower half only; */
138 : /* 4 : deflated. */
139 : /* On exit, COLTYP(i) is the number of columns of type i, */
140 : /* for i=1 to 4 only. */
141 :
142 : /* RELTOL (input) DOUBLE PRECISION */
143 : /* User specified deflation tolerance. If RELTOL.LT.20*EPS, then */
144 : /* the standard value used in the original LAPACK routines is used. */
145 :
146 : /* DZ (output) INTEGER, DZ.GE.0 */
147 : /* counts the deflation due to a small component in the modification */
148 : /* vector. */
149 :
150 : /* DE (output) INTEGER, DE.GE.0 */
151 : /* counts the deflation due to close eigenvalues. */
152 :
153 : /* INFO (output) INTEGER */
154 : /* = 0: successful exit. */
155 : /* < 0: if INFO = -i, the i-th argument had an illegal value. */
156 :
157 : /* Further Details */
158 : /* =============== */
159 :
160 : /* Based on code written by */
161 : /* Wilfried Gansterer and Bob Ward, */
162 : /* Department of Computer Science, University of Tennessee */
163 :
164 : /* Based on the design of the LAPACK code DLAED2 with modifications */
165 : /* to allow a block divide and conquer scheme */
166 :
167 : /* DLAED2 was written by Jeff Rutter, Computer Science Division, University */
168 : /* of California at Berkeley, USA, and modified by Francoise Tisseur, */
169 : /* University of Tennessee. */
170 :
171 : /* ===================================================================== */
172 :
173 21 : PetscReal c, s, t, eps, tau, tol, dmax, dmone = -1.;
174 21 : PetscBLASInt i, j, i1, k2, n2, ct, nj, pj=0, js, iq1, iq2;
175 21 : PetscBLASInt psm[4], imax, jmax, ctot[4], factmp=1, one=1;
176 :
177 21 : PetscFunctionBegin;
178 21 : *info = 0;
179 :
180 21 : if (n < 0) *info = -2;
181 21 : else if (n1 < PetscMin(1,n) || n1 > PetscMax(1,n)) *info = -3;
182 21 : else if (ldq < PetscMax(1,n)) *info = -6;
183 21 : PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %" PetscBLASInt_FMT " in DSRTDF",-(*info));
184 :
185 : /* Initialize deflation counters */
186 :
187 21 : *dz = 0;
188 21 : *de = 0;
189 :
190 : /* **************************************************************************** */
191 :
192 : /* Quick return if possible */
193 :
194 21 : if (n == 0) PetscFunctionReturn(PETSC_SUCCESS);
195 :
196 : /* **************************************************************************** */
197 :
198 21 : n2 = n - n1;
199 :
200 : /* Modify Z so that RHO is positive. */
201 :
202 21 : if (*rho < 0.) PetscCallBLAS("BLASscal",BLASscal_(&n2, &dmone, &z[n1], &one));
203 :
204 : /* Normalize z so that norm2(z) = 1. Since z is the concatenation of */
205 : /* two normalized vectors, norm2(z) = sqrt(2). (NOTE that this holds also */
206 : /* here in the approximate block-tridiagonal D&C because the two vectors are */
207 : /* singular vectors, but it would not necessarily hold in a D&C for a */
208 : /* general banded matrix !) */
209 :
210 21 : t = 1. / PETSC_SQRT2;
211 21 : PetscCallBLAS("BLASscal",BLASscal_(&n, &t, z, &one));
212 :
213 : /* NOTE: at this point the value of RHO is modified in order to */
214 : /* normalize Z: RHO = ABS( norm2(z)**2 * RHO */
215 : /* in our case: norm2(z) = sqrt(2), and therefore: */
216 :
217 21 : *rho = PetscAbs(*rho * 2.);
218 :
219 : /* Sort the eigenvalues into increasing order */
220 :
221 57 : for (i = n1; i < n; ++i) indxq[i] += n1;
222 :
223 : /* re-integrate the deflated parts from the last pass */
224 :
225 237 : for (i = 0; i < n; ++i) dlamda[i] = d[indxq[i]-1];
226 21 : PetscCallBLAS("LAPACKlamrg",LAPACKlamrg_(&n1, &n2, dlamda, &one, &one, indxc));
227 237 : for (i = 0; i < n; ++i) indx[i] = indxq[indxc[i]-1];
228 237 : for (i = 0; i < n; ++i) indxq[i] = 0;
229 :
230 : /* Calculate the allowable deflation tolerance */
231 :
232 : /* imax = BLASamax_(&n, z, &one); */
233 21 : imax = 1;
234 21 : dmax = PetscAbsReal(z[0]);
235 216 : for (i=1;i<n;i++) {
236 195 : if (PetscAbsReal(z[i])>dmax) {
237 41 : imax = i+1;
238 41 : dmax = PetscAbsReal(z[i]);
239 : }
240 : }
241 : /* jmax = BLASamax_(&n, d, &one); */
242 21 : jmax = 1;
243 21 : dmax = PetscAbsReal(d[0]);
244 216 : for (i=1;i<n;i++) {
245 195 : if (PetscAbsReal(d[i])>dmax) {
246 77 : jmax = i+1;
247 77 : dmax = PetscAbsReal(d[i]);
248 : }
249 : }
250 :
251 21 : eps = LAPACKlamch_("Epsilon");
252 21 : if (reltol < eps * 20) {
253 : /* use the standard deflation tolerance from the original LAPACK code */
254 38 : tol = eps * 8. * PetscMax(PetscAbs(d[jmax-1]),PetscAbs(z[imax-1]));
255 : } else {
256 : /* otherwise set TOL to the input parameter RELTOL ! */
257 0 : tol = reltol * PetscMax(PetscAbs(d[jmax-1]),PetscAbs(z[imax-1]));
258 : }
259 :
260 : /* If the rank-1 modifier is small enough, no more needs to be done */
261 : /* except to reorganize Q so that its columns correspond with the */
262 : /* elements in D. */
263 :
264 21 : if (*rho * PetscAbs(z[imax-1]) <= tol) {
265 :
266 : /* complete deflation because of small rank-one modifier */
267 : /* NOTE: in this case we need N*N space in the array Q2 ! */
268 :
269 0 : *dz = n; *k = 0;
270 0 : iq2 = 1;
271 0 : for (j = 0; j < n; ++j) {
272 0 : i = indx[j]; indxq[j] = i;
273 0 : PetscCallBLAS("BLAScopy",BLAScopy_(&n, &q[(i-1)*ldq], &one, &q2[iq2-1], &one));
274 0 : dlamda[j] = d[i-1];
275 0 : iq2 += n;
276 : }
277 0 : for (j=0;j<n;j++) for (i=0;i<n;i++) q[i+j*ldq] = q2[i+j*n];
278 0 : PetscCallBLAS("BLAScopy",BLAScopy_(&n, dlamda, &one, d, &one));
279 0 : PetscFunctionReturn(PETSC_SUCCESS);
280 : }
281 :
282 : /* If there are multiple eigenvalues then the problem deflates. Here */
283 : /* the number of equal eigenvalues is found. As each equal */
284 : /* eigenvalue is found, an elementary reflector is computed to rotate */
285 : /* the corresponding eigensubspace so that the corresponding */
286 : /* components of Z are zero in this new basis. */
287 :
288 : /* initialize the column types */
289 :
290 : /* first N1 columns are initially (before deflation) of type 1 */
291 201 : for (i = 0; i < n1; ++i) coltyp[i] = 1;
292 : /* columns N1+1 to N are initially (before deflation) of type 3 */
293 57 : for (i = n1; i < n; ++i) coltyp[i] = 3;
294 :
295 21 : *k = 0;
296 21 : k2 = n + 1;
297 21 : for (j = 0; j < n; ++j) {
298 21 : nj = indx[j]-1;
299 21 : if (*rho * PetscAbs(z[nj]) <= tol) {
300 :
301 : /* Deflate due to small z component. */
302 0 : ++(*dz);
303 0 : --k2;
304 :
305 : /* deflated eigenpair, therefore column type 4 */
306 0 : coltyp[nj] = 4;
307 0 : indxp[k2-1] = nj+1;
308 0 : indxq[k2-1] = nj+1;
309 0 : if (j+1 == n) goto L100;
310 : } else {
311 :
312 : /* not deflated */
313 21 : pj = nj;
314 21 : goto L80;
315 : }
316 : }
317 : factmp = 1;
318 : L80:
319 216 : ++j;
320 216 : nj = indx[j]-1;
321 216 : if (j+1 > n) goto L100;
322 195 : if (*rho * PetscAbs(z[nj]) <= tol) {
323 :
324 : /* Deflate due to small z component. */
325 38 : ++(*dz);
326 38 : --k2;
327 38 : coltyp[nj] = 4;
328 38 : indxp[k2-1] = nj+1;
329 38 : indxq[k2-1] = nj+1;
330 : } else {
331 :
332 : /* Check if eigenvalues are close enough to allow deflation. */
333 157 : s = z[pj];
334 157 : c = z[nj];
335 :
336 : /* Find sqrt(a**2+b**2) without overflow or */
337 : /* destructive underflow. */
338 :
339 157 : tau = SlepcAbs(c, s);
340 157 : t = d[nj] - d[pj];
341 157 : c /= tau;
342 157 : s = -s / tau;
343 157 : if (PetscAbs(t * c * s) <= tol) {
344 :
345 : /* Deflate due to close eigenvalues. */
346 18 : ++(*de);
347 18 : z[nj] = tau;
348 18 : z[pj] = 0.;
349 18 : if (coltyp[nj] != coltyp[pj]) coltyp[nj] = 2;
350 :
351 : /* if deflation happens across the two eigenvector blocks */
352 : /* (eigenvalues corresponding to different blocks), then */
353 : /* on column in the eigenvector matrix fills up completely */
354 : /* (changes from type 1 or 3 to type 2) */
355 :
356 : /* eigenpair PJ is deflated, therefore the column type changes */
357 : /* to 4 */
358 :
359 18 : coltyp[pj] = 4;
360 18 : PetscCallBLAS("BLASrot",BLASrot_(&n, &q[pj*ldq], &one, &q[nj*ldq], &one, &c, &s));
361 18 : t = d[pj] * (c * c) + d[nj] * (s * s);
362 18 : d[nj] = d[pj] * (s * s) + d[nj] * (c * c);
363 18 : d[pj] = t;
364 18 : --k2;
365 18 : i = 1;
366 18 : L90:
367 18 : if (k2 + i <= n) {
368 15 : if (d[pj] < d[indxp[k2 + i-1]-1]) {
369 0 : indxp[k2 + i - 2] = indxp[k2 + i - 1];
370 0 : indxq[k2 + i - 2] = indxq[k2 + i - 1];
371 0 : indxp[k2 + i - 1] = pj + 1;
372 0 : indxq[k2 + i - 2] = pj + 1;
373 0 : ++i;
374 0 : goto L90;
375 : } else {
376 15 : indxp[k2 + i - 2] = pj + 1;
377 15 : indxq[k2 + i - 2] = pj + 1;
378 : }
379 : } else {
380 3 : indxp[k2 + i - 2] = pj + 1;
381 3 : indxq[k2 + i - 2] = pj + 1;
382 : }
383 : pj = nj;
384 : factmp = -1;
385 : } else {
386 :
387 : /* do not deflate */
388 139 : ++(*k);
389 139 : dlamda[*k-1] = d[pj];
390 139 : w[*k-1] = z[pj];
391 139 : indxp[*k-1] = pj + 1;
392 139 : indxq[*k-1] = pj + 1;
393 139 : factmp = 1;
394 139 : pj = nj;
395 : }
396 : }
397 195 : goto L80;
398 21 : L100:
399 :
400 : /* Record the last eigenvalue. */
401 21 : ++(*k);
402 21 : dlamda[*k-1] = d[pj];
403 21 : w[*k-1] = z[pj];
404 21 : indxp[*k-1] = pj+1;
405 21 : indxq[*k-1] = (pj+1) * factmp;
406 :
407 : /* Count up the total number of the various types of columns, then */
408 : /* form a permutation which positions the four column types into */
409 : /* four uniform groups (although one or more of these groups may be */
410 : /* empty). */
411 :
412 105 : for (j = 0; j < 4; ++j) ctot[j] = 0;
413 237 : for (j = 0; j < n; ++j) {
414 216 : ct = coltyp[j];
415 216 : ++ctot[ct-1];
416 : }
417 :
418 : /* PSM(*) = Position in SubMatrix (of types 1 through 4) */
419 21 : psm[0] = 1;
420 21 : psm[1] = ctot[0] + 1;
421 21 : psm[2] = psm[1] + ctot[1];
422 21 : psm[3] = psm[2] + ctot[2];
423 21 : *k = n - ctot[3];
424 :
425 : /* Fill out the INDXC array so that the permutation which it induces */
426 : /* will place all type-1 columns first, all type-2 columns next, */
427 : /* then all type-3's, and finally all type-4's. */
428 :
429 237 : for (j = 0; j < n; ++j) {
430 216 : js = indxp[j];
431 216 : ct = coltyp[js-1];
432 216 : indx[psm[ct - 1]-1] = js;
433 216 : indxc[psm[ct - 1]-1] = j+1;
434 216 : ++psm[ct - 1];
435 : }
436 :
437 : /* Sort the eigenvalues and corresponding eigenvectors into DLAMDA */
438 : /* and Q2 respectively. The eigenvalues/vectors which were not */
439 : /* deflated go into the first K slots of DLAMDA and Q2 respectively, */
440 : /* while those which were deflated go into the last N - K slots. */
441 :
442 21 : i = 0;
443 21 : iq1 = 0;
444 21 : iq2 = (ctot[0] + ctot[1]) * n1;
445 145 : for (j = 0; j < ctot[0]; ++j) {
446 124 : js = indx[i];
447 124 : PetscCallBLAS("BLAScopy",BLAScopy_(&n1, &q[(js-1)*ldq], &one, &q2[iq1], &one));
448 124 : z[i] = d[js-1];
449 124 : ++i;
450 124 : iq1 += n1;
451 : }
452 :
453 39 : for (j = 0; j < ctot[1]; ++j) {
454 18 : js = indx[i];
455 18 : PetscCallBLAS("BLAScopy",BLAScopy_(&n1, &q[(js-1)*ldq], &one, &q2[iq1], &one));
456 18 : PetscCallBLAS("BLAScopy",BLAScopy_(&n2, &q[n1+(js-1)*ldq], &one, &q2[iq2], &one));
457 18 : z[i] = d[js-1];
458 18 : ++i;
459 18 : iq1 += n1;
460 18 : iq2 += n2;
461 : }
462 :
463 39 : for (j = 0; j < ctot[2]; ++j) {
464 18 : js = indx[i];
465 18 : PetscCallBLAS("BLAScopy",BLAScopy_(&n2, &q[n1+(js-1)*ldq], &one, &q2[iq2], &one));
466 18 : z[i] = d[js-1];
467 18 : ++i;
468 18 : iq2 += n2;
469 : }
470 :
471 77 : iq1 = iq2;
472 77 : for (j = 0; j < ctot[3]; ++j) {
473 56 : js = indx[i];
474 56 : PetscCallBLAS("BLAScopy",BLAScopy_(&n, &q[(js-1)*ldq], &one, &q2[iq2], &one));
475 56 : iq2 += n;
476 56 : z[i] = d[js-1];
477 56 : ++i;
478 : }
479 :
480 : /* The deflated eigenvalues and their corresponding vectors go back */
481 : /* into the last N - K slots of D and Q respectively. */
482 :
483 1061 : for (j=0;j<ctot[3];j++) for (i=0;i<n;i++) q[i+(j+*k)*ldq] = q2[iq1+i+j*n];
484 21 : i1 = n - *k;
485 21 : PetscCallBLAS("BLAScopy",BLAScopy_(&i1, &z[*k], &one, &d[*k], &one));
486 :
487 : /* Copy CTOT into COLTYP for referencing in DLAED3M. */
488 :
489 105 : for (j = 0; j < 4; ++j) coltyp[j] = ctot[j];
490 21 : PetscFunctionReturn(PETSC_SUCCESS);
491 : }
|