1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: BDC - Block-divide and conquer (see description in README file)
12: */
14: #include <slepc/private/dsimpl.h> 15: #include <slepcblaslapack.h> 17: static PetscErrorCode cutlr_(PetscBLASInt start,PetscBLASInt n,PetscBLASInt blkct, 18: PetscBLASInt *bsizes,PetscBLASInt *ranks,PetscBLASInt *cut, 19: PetscBLASInt *lsum,PetscBLASInt *lblks,PetscBLASInt *info) 20: {
21: /* -- Routine written in LAPACK Version 3.0 style -- */
22: /* *************************************************** */
23: /* Written by */
24: /* Michael Moldaschl and Wilfried Gansterer */
25: /* University of Vienna */
26: /* last modification: March 16, 2014 */
28: /* Small adaptations of original code written by */
29: /* Wilfried Gansterer and Bob Ward, */
30: /* Department of Computer Science, University of Tennessee */
31: /* see https://doi.org/10.1137/S1064827501399432 */
32: /* *************************************************** */
34: /* Purpose */
35: /* ======= */
37: /* CUTLR computes the optimal cut in a sequence of BLKCT neighboring */
38: /* blocks whose sizes are given by the array BSIZES. */
39: /* The sum of all block sizes in the sequence considered is given by N. */
40: /* The cut is optimal in the sense that the difference of the sizes of */
41: /* the resulting two halves is minimum over all cuts with minimum ranks */
42: /* between blocks of the sequence considered. */
44: /* Arguments */
45: /* ========= */
47: /* START (input) INTEGER */
48: /* In the original array KSIZES of the calling routine DIBTDC, */
49: /* the position where the sequence considered in this routine starts. */
50: /* START >= 1. */
52: /* N (input) INTEGER */
53: /* The sum of all the block sizes of the sequence to be cut = */
54: /* = sum_{i=1}^{BLKCT} BSIZES(I). */
55: /* N >= 3. */
57: /* BLKCT (input) INTEGER */
58: /* The number of blocks in the sequence to be cut. */
59: /* BLKCT >= 3. */
61: /* BSIZES (input) INTEGER array, dimension (BLKCT) */
62: /* The dimensions of the (quadratic) blocks of the sequence to be */
63: /* cut. sum_{i=1}^{BLKCT} BSIZES(I) = N. */
65: /* RANKS (input) INTEGER array, dimension (BLKCT-1) */
66: /* The ranks determining the approximations of the off-diagonal */
67: /* blocks in the sequence considered. */
69: /* CUT (output) INTEGER */
70: /* After the optimum cut has been determined, the position (in the */
71: /* overall problem as worked on in DIBTDC !) of the last block in */
72: /* the first half of the sequence to be cut. */
73: /* START <= CUT <= START+BLKCT-2. */
75: /* LSUM (output) INTEGER */
76: /* After the optimum cut has been determined, the sum of the */
77: /* block sizes in the first half of the sequence to be cut. */
78: /* LSUM < N. */
80: /* LBLKS (output) INTEGER */
81: /* After the optimum cut has been determined, the number of the */
82: /* blocks in the first half of the sequence to be cut. */
83: /* 1 <= LBLKS < BLKCT. */
85: /* INFO (output) INTEGER */
86: /* = 0: successful exit. */
87: /* < 0: illegal arguments. */
88: /* if INFO = -i, the i-th (input) argument had an illegal */
89: /* value. */
90: /* > 0: illegal results. */
91: /* if INFO = i, the i-th (output) argument had an illegal */
92: /* value. */
94: /* Further Details */
95: /* =============== */
97: /* Based on code written by */
98: /* Wilfried Gansterer and Bob Ward, */
99: /* Department of Computer Science, University of Tennessee */
101: /* ===================================================================== */
103: PetscBLASInt i, ksk, kchk, ksum, nhalf, deviat, mindev, minrnk, tmpsum;
105: PetscFunctionBegin;
106: *info = 0;
107: *lblks = 1;
108: *lsum = 1;
109: *cut = start;
111: if (start < 1) *info = -1;
112: else if (n < 3) *info = -2;
113: else if (blkct < 3) *info = -3;
114: if (*info == 0) {
115: ksum = 0;
116: kchk = 0;
117: for (i = 0; i < blkct; ++i) {
118: ksk = bsizes[i];
119: ksum += ksk;
120: if (ksk < 1) kchk = 1;
121: }
122: if (ksum != n || kchk == 1) *info = -4;
123: }
124: PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %" PetscBLASInt_FMT " in CUTLR",-(*info));
126: /* determine smallest rank in the range considered */
128: minrnk = n;
129: for (i = 0; i < blkct-1; ++i) {
130: if (ranks[i] < minrnk) minrnk = ranks[i];
131: }
133: /* determine best cut among those with smallest rank */
135: nhalf = n / 2;
136: tmpsum = 0;
137: mindev = n;
138: for (i = 0; i < blkct; ++i) {
139: tmpsum += bsizes[i];
140: if (ranks[i] == minrnk) {
142: /* determine deviation from "optimal" cut NHALF */
144: deviat = tmpsum - nhalf;
145: if (deviat<0) deviat = -deviat;
147: /* compare to best deviation so far */
149: if (deviat < mindev) {
150: mindev = deviat;
151: *cut = start + i;
152: *lblks = i + 1;
153: *lsum = tmpsum;
154: }
155: }
156: }
158: if (*cut < start || *cut >= start + blkct - 1) *info = 6;
159: else if (*lsum < 1 || *lsum >= n) *info = 7;
160: else if (*lblks < 1 || *lblks >= blkct) *info = 8;
161: PetscFunctionReturn(PETSC_SUCCESS);
162: }
164: PetscErrorCode BDC_dibtdc_(const char *jobz,PetscBLASInt n,PetscBLASInt nblks,165: PetscBLASInt *ksizes,PetscReal *d,PetscBLASInt l1d,PetscBLASInt l2d,166: PetscReal *e,PetscBLASInt *rank,PetscBLASInt l1e,PetscBLASInt l2e,167: PetscReal tol,PetscReal *ev,PetscReal *z,PetscBLASInt ldz,PetscReal *work,168: PetscBLASInt lwork,PetscBLASInt *iwork,PetscBLASInt liwork,169: PetscBLASInt *info,PetscBLASInt jobz_len)170: {
171: /* -- Routine written in LAPACK Version 3.0 style -- */
172: /* *************************************************** */
173: /* Written by */
174: /* Michael Moldaschl and Wilfried Gansterer */
175: /* University of Vienna */
176: /* last modification: March 16, 2014 */
178: /* Small adaptations of original code written by */
179: /* Wilfried Gansterer and Bob Ward, */
180: /* Department of Computer Science, University of Tennessee */
181: /* see https://doi.org/10.1137/S1064827501399432 */
182: /* *************************************************** */
184: /* Purpose */
185: /* ======= */
187: /* DIBTDC computes all eigenvalues and corresponding eigenvectors of a */
188: /* symmetric irreducible block tridiagonal matrix with rank RANK matrices */
189: /* as the subdiagonal blocks using a block divide and conquer method. */
191: /* Arguments */
192: /* ========= */
194: /* JOBZ (input) CHARACTER*1 */
195: /* = 'N': Compute eigenvalues only (not implemented); */
196: /* = 'D': Compute eigenvalues and eigenvectors. */
197: /* Eigenvectors are accumulated in the */
198: /* divide-and-conquer process. */
200: /* N (input) INTEGER */
201: /* The dimension of the symmetric irreducible block tridiagonal */
202: /* matrix. N >= 2. */
204: /* NBLKS (input) INTEGER, 2 <= NBLKS <= N */
205: /* The number of diagonal blocks in the matrix. */
207: /* KSIZES (input) INTEGER array, dimension (NBLKS) */
208: /* The dimension of the square diagonal blocks from top left */
209: /* to bottom right. KSIZES(I) >= 1 for all I, and the sum of */
210: /* KSIZES(I) for I = 1 to NBLKS has to be equal to N. */
212: /* D (input) DOUBLE PRECISION array, dimension (L1D,L2D,NBLKS) */
213: /* The lower triangular elements of the symmetric diagonal */
214: /* blocks of the block tridiagonal matrix. Elements of the top */
215: /* left diagonal block, which is of dimension KSIZES(1), are */
216: /* contained in D(*,*,1); the elements of the next diagonal */
217: /* block, which is of dimension KSIZES(2), are contained in */
218: /* D(*,*,2); etc. */
220: /* L1D (input) INTEGER */
221: /* The leading dimension of the array D. L1D >= max(3,KMAX), */
222: /* where KMAX is the dimension of the largest diagonal block. */
224: /* L2D (input) INTEGER */
225: /* The second dimension of the array D. L2D >= max(3,KMAX), */
226: /* where KMAX is as stated in L1D above. */
228: /* E (input) DOUBLE PRECISION array, dimension (L1E,L2E,NBLKS-1) */
229: /* Contains the elements of the scalars (singular values) and */
230: /* vectors (singular vectors) defining the rank RANK subdiagonal */
231: /* blocks of the matrix. */
232: /* E(1:RANK(K),RANK(K)+1,K) holds the RANK(K) scalars, */
233: /* E(:,1:RANK(K),K) holds the RANK(K) column vectors, and */
234: /* E(:,RANK(K)+2:2*RANK(K)+1,K) holds the row vectors for the K-th */
235: /* subdiagonal block. */
237: /* RANK (input) INTEGER array, dimension (NBLKS-1). */
238: /* The ranks of all the subdiagonal blocks contained in the array E. */
239: /* RANK(K) <= MIN(KSIZES(K), KSIZES(K+1)) */
241: /* L1E (input) INTEGER */
242: /* The leading dimension of the array E. L1E >= max(3,2*KMAX+1), */
243: /* where KMAX is as stated in L1D above. */
245: /* L2E (input) INTEGER */
246: /* The second dimension of the array E. L2E >= max(3,2*KMAX+1), */
247: /* where KMAX is as stated in L1D above. */
249: /* TOL (input) DOUBLE PRECISION, TOL <= 1.0D-1 */
250: /* User specified deflation tolerance for the routine DMERG2. */
251: /* If (1.0D-1 >= TOL >= 20*EPS) then TOL is used as */
252: /* the deflation tolerance in DSRTDF. */
253: /* If (TOL < 20*EPS) then the standard deflation tolerance from */
254: /* LAPACK is used as the deflation tolerance in DSRTDF. */
256: /* EV (output) DOUBLE PRECISION array, dimension (N) */
257: /* If INFO = 0, then EV contains the eigenvalues of the */
258: /* symmetric block tridiagonal matrix in ascending order. */
260: /* Z (input/output) DOUBLE PRECISION array, dimension (LDZ, N) */
261: /* On entry, Z will be the identity matrix. */
262: /* On exit, Z contains the eigenvectors of the block tridiagonal */
263: /* matrix. */
265: /* LDZ (input) INTEGER */
266: /* The leading dimension of the array Z. LDZ >= max(1,N). */
268: /* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK) */
270: /* LWORK (input) INTEGER */
271: /* The dimension of the array WORK. */
272: /* In order to guarantee correct results in all cases, */
273: /* LWORK must be at least (2*N**2 + 3*N). In many cases, */
274: /* less workspace is required. The absolute minimum required is */
275: /* (N**2 + 3*N). */
276: /* If the workspace provided is not sufficient, the routine will */
277: /* return a corresponding error code and report how much workspace */
278: /* was missing (see INFO). */
280: /* IWORK (workspace) INTEGER array, dimension (LIWORK) */
282: /* LIWORK (input) INTEGER */
283: /* The dimension of the array IWORK. */
284: /* LIWORK must be at least (5*N + 3 + 4*NBLKS - 4): */
285: /* 5*KMAX+3 for DSYEVD, 5*N for ????, */
286: /* 4*NBLKS-4 for the preprocessing (merging order) */
287: /* Summarizing, the minimum integer workspace needed is */
288: /* MAX(5*N, 5*KMAX + 3) + 4*NBLKS - 4 */
290: /* INFO (output) INTEGER */
291: /* = 0: successful exit. */
292: /* < 0, > -99: illegal arguments. */
293: /* if INFO = -i, the i-th argument had an illegal value. */
294: /* = -99: error in the preprocessing (call of CUTLR). */
295: /* < -200: not enough workspace. Space for ABS(INFO + 200) */
296: /* numbers is required in addition to the workspace provided, */
297: /* otherwise some eigenvectors will be incorrect. */
298: /* > 0: The algorithm failed to compute an eigenvalue while */
299: /* working on the submatrix lying in rows and columns */
300: /* INFO/(N+1) through mod(INFO,N+1). */
302: /* Further Details */
303: /* =============== */
305: /* Based on code written by */
306: /* Wilfried Gansterer and Bob Ward, */
307: /* Department of Computer Science, University of Tennessee */
309: /* This routine is comparable to Dlaed0.f from LAPACK. */
311: /* ===================================================================== */
313: PetscBLASInt i, j, k, np, rp1, ksk, one=1;
314: PetscBLASInt cut, mat1, kchk, kbrk, blks, kmax, icut, size, ksum, lsum;
315: PetscBLASInt lblks, rblks, isize, lwmin, ilsum;
316: PetscBLASInt start, istck1, istck2, istck3, merged;
317: PetscBLASInt liwmin, matsiz, startp, istrtp;
318: PetscReal rho, done=1.0, dmone=-1.0;
320: PetscFunctionBegin;
321: *info = 0;
323: if (*(unsigned char *)jobz != 'N' && *(unsigned char *)jobz != 'D') *info = -1;
324: else if (n < 2) *info = -2;
325: else if (nblks < 2 || nblks > n) *info = -3;
326: if (*info == 0) {
327: ksum = 0;
328: kmax = 0;
329: kchk = 0;
330: for (k = 0; k < nblks; ++k) {
331: ksk = ksizes[k];
332: ksum += ksk;
333: if (ksk > kmax) kmax = ksk;
334: if (ksk < 1) kchk = 1;
335: }
336: lwmin = n*n + n * 3;
337: liwmin = PetscMax(n * 5,kmax * 5 + 3) + 4*nblks - 4;
338: if (ksum != n || kchk == 1) *info = -4;
339: else if (l1d < PetscMax(3,kmax)) *info = -6;
340: else if (l2d < PetscMax(3,kmax)) *info = -7;
341: else if (l1e < PetscMax(3,2*kmax + 1)) *info = -10;
342: else if (l2e < PetscMax(3,2*kmax + 1)) *info = -11;
343: else if (tol > .1) *info = -12;
344: else if (ldz < PetscMax(1,n)) *info = -15;
345: else if (lwork < lwmin) *info = -17;
346: else if (liwork < liwmin) *info = -19;
347: }
348: if (*info == 0) {
349: for (k = 0; k < nblks-1; ++k) {
350: if (rank[k] > PetscMin(ksizes[k],ksizes[k+1]) || rank[k] < 1) *info = -9;
351: }
352: }
354: PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %" PetscBLASInt_FMT " in DIBTDC",-(*info));
356: /* **************************************************************************** */
358: /* ...Preprocessing..................................................... */
359: /* Determine the optimal order for merging the subblocks and how much */
360: /* workspace will be needed for the merging (determined by the last */
361: /* merge). Cutpoints for the merging operations are determined and stored */
362: /* in reverse chronological order (starting with the final merging */
363: /* operation). */
365: /* integer workspace requirements for the preprocessing: */
366: /* 4*(NBLKS-1) for merging history */
367: /* at most 3*(NBLKS-1) for stack */
369: start = 1;
370: size = n;
371: blks = nblks;
372: merged = 0;
373: k = 0;
375: /* integer workspace used for the stack is not needed any more after the */
376: /* preprocessing and therefore can use part of the 5*N */
377: /* integer workspace needed later on in the code */
379: istck1 = 0;
380: istck2 = istck1 + nblks;
381: istck3 = istck2 + nblks;
383: /* integer workspace used for storing the order of merges starts AFTER */
384: /* the integer workspace 5*N+3 which is needed later on in the code */
385: /* (5*KMAX+3 for DSYEVD, 4*N in DMERG2) */
387: istrtp = n * 5 + 4;
388: icut = istrtp + nblks - 1;
389: isize = icut + nblks - 1;
390: ilsum = isize + nblks - 1;
392: L200:
394: if (nblks >= 3) {
396: /* Determine the cut point. Note that in the routine CUTLR it is */
397: /* chosen such that it yields the best balanced merging operation */
398: /* among all the rank modifications with minimum rank. */
400: PetscCall(cutlr_(start, size, blks, &ksizes[start-1], &rank[start-1], &cut, &lsum, &lblks, info));
401: PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_PLIB,"dibtdc: Error in cutlr, info = %" PetscBLASInt_FMT,*info);
403: } else {
404: cut = 1;
405: lsum = ksizes[0];
406: lblks = 1;
407: }
409: ++merged;
410: startp = 0;
411: for (i = 0; i < start-1; ++i) startp += ksizes[i];
412: iwork[istrtp + (nblks - 1) - merged-1] = startp + 1;
413: iwork[icut + (nblks - 1) - merged-1] = cut;
414: iwork[isize + (nblks - 1) - merged-1] = size;
415: iwork[ilsum + (nblks - 1) - merged-1] = lsum;
417: if (lblks == 2) {
419: /* one merge in left branch, left branch done */
420: ++merged;
421: iwork[istrtp + (nblks - 1) - merged-1] = startp + 1;
422: iwork[icut + (nblks - 1) - merged-1] = start;
423: iwork[isize + (nblks - 1) - merged-1] = lsum;
424: iwork[ilsum + (nblks - 1) - merged-1] = ksizes[start-1];
425: }
427: if (lblks == 1 || lblks == 2) {
429: /* left branch done, continue on the right side */
430: start += lblks;
431: size -= lsum;
432: blks -= lblks;
434: PetscCheck(blks>0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"dibtdc: Error in preprocessing, blks = %" PetscBLASInt_FMT,blks);
436: if (blks == 2) {
438: /* one merge in right branch, right branch done */
439: ++merged;
440: startp += lsum;
441: iwork[istrtp + (nblks - 1) - merged-1] = startp + 1;
442: iwork[icut + (nblks - 1) - merged-1] = start;
443: iwork[isize + (nblks - 1) - merged-1] = size;
444: iwork[ilsum + (nblks - 1) - merged-1] = ksizes[start-1];
445: }
447: if (blks == 1 || blks == 2) {
449: /* get the next subproblem from the stack or finished */
451: if (k >= 1) {
453: /* something left on the stack */
454: start = iwork[istck1 + k-1];
455: size = iwork[istck2 + k-1];
456: blks = iwork[istck3 + k-1];
457: --k;
458: goto L200;
459: } else {
461: /* nothing left on the stack */
462: PetscCheck(merged==nblks-1,PETSC_COMM_SELF,PETSC_ERR_PLIB,"ERROR in preprocessing - not enough merges performed");
464: /* exit preprocessing */
466: }
467: } else {
469: /* BLKS.GE.3, and therefore analyze the right side */
471: goto L200;
472: }
473: } else {
475: /* LBLKS.GE.3, and therefore check the right side and */
476: /* put it on the stack if required */
478: rblks = blks - lblks;
479: if (rblks >= 3) {
480: ++k;
481: iwork[istck1 + k-1] = cut + 1;
482: iwork[istck2 + k-1] = size - lsum;
483: iwork[istck3 + k-1] = rblks;
484: } else if (rblks == 2) {
486: /* one merge in right branch, right branch done */
487: /* (note that nothing needs to be done if RBLKS.EQ.1 !) */
489: ++merged;
490: startp += lsum;
491: iwork[istrtp + (nblks - 1) - merged-1] = startp + 1;
492: iwork[icut + (nblks - 1) - merged-1] = start + lblks;
493: iwork[isize + (nblks - 1) - merged-1] = size - lsum;
494: iwork[ilsum + (nblks - 1) - merged-1] = ksizes[start + lblks-1];
495: }
496: PetscCheck(rblks>0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"dibtdc: ERROR in preprocessing - rblks = %" PetscBLASInt_FMT,rblks);
498: /* continue on the left side */
500: size = lsum;
501: blks = lblks;
502: goto L200;
503: }
505: /* SIZE = IWORK(ISIZE+NBLKS-2) */
506: /* MAT1 = IWORK(ILSUM+NBLKS-2) */
508: /* Note: after the dimensions SIZE and MAT1 of the last merging */
509: /* operation have been determined, an upper bound for the workspace */
510: /* requirements which is independent of how much deflation occurs in */
511: /* the last merging operation could be determined as follows */
512: /* (based on (3.15) and (3.19) from UT-CS-00-447): */
514: /* IF(MAT1.LE.N/2) THEN */
515: /* WSPREQ = 3*N + 3/2*(SIZE-MAT1)**2 + N*N/2 + MAT1*MAT1 */
516: /* ELSE */
517: /* WSPREQ = 3*N + 3/2*MAT1*MAT1 + N*N/2 + (SIZE-MAT1)**2 */
518: /* END IF */
520: /* IF(LWORK-WSPREQ.LT.0)THEN */
521: /* not enough work space provided */
522: /* INFO = -200 - (WSPREQ-LWORK) */
523: /* RETURN */
524: /* END IF */
525: /* However, this is not really useful, since the actual check whether */
526: /* enough workspace is provided happens in DMERG2.f ! */
528: /* ************************************************************************* */
530: /* ...Solve subproblems................................... */
532: /* Divide the matrix into NBLKS submatrices using rank-r */
533: /* modifications (cuts) and solve for their eigenvalues and */
534: /* eigenvectors. Initialize index array to sort eigenvalues. */
536: /* first block: ...................................... */
538: /* correction for block 1: D1 - V1 \Sigma1 V1^T */
540: ksk = ksizes[0];
541: rp1 = rank[0];
543: /* initialize the proper part of Z with the diagonal block D1 */
544: /* (the correction will be made in Z and then the call of DSYEVD will */
545: /* overwrite it with the eigenvectors) */
547: for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) z[i+j*ldz] = d[i+j*l1d];
549: /* copy D1 into WORK (in order to be able to restore it afterwards) */
551: for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) work[i+j*ksk] = d[i+j*l1d];
553: /* copy V1 into the first RANK(1) columns of D1 and then */
554: /* multiply with \Sigma1 */
556: for (i = 0; i < rank[0]; ++i) {
557: PetscCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(rp1 + i+1)*l1e], &one, &d[i*l1d], &one));
558: PetscCallBLAS("BLASscal",BLASscal_(&ksk, &e[i + rp1*l1e], &d[i*l1d], &one));
559: }
561: /* multiply the first RANK(1) columns of D1 with V1^T and */
562: /* subtract the result from the proper part of Z (previously */
563: /* initialized with D1) */
565: PetscCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, rank, &dmone,
566: d, &l1d, &e[(rank[0]+1)*l1e], &l1e, &done, z, &ldz));
568: /* restore the original D1 from WORK */
570: for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) d[i+j*l1d] = work[i+j*ksk];
572: /* eigenanalysis of block 1 (using DSYEVD) */
574: PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V", "L", &ksk, z, &ldz, ev, work, &lwork, info));
575: SlepcCheckLapackInfo("syev",*info);
577: /* EV(1:) contains the eigenvalues in ascending order */
578: /* (they are returned this way by DSYEVD) */
580: for (i = 0; i < ksk; ++i) iwork[i] = i+1;
582: /* intermediate blocks: .............................. */
584: np = ksk;
586: /* remaining number of blocks */
588: if (nblks > 2) {
589: for (k = 1; k < nblks-1; ++k) {
591: /* correction for block K: */
592: /* Dk - U(k-1) \Sigma(k-1) U(k-1)^T - Vk \Sigmak Vk^T */
594: ksk = ksizes[k];
595: rp1 = rank[k];
597: /* initialize the proper part of Z with the diagonal block Dk */
598: /* (the correction will be made in Z and then the call of DSYEVD will */
599: /* overwrite it with the eigenvectors) */
601: for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) z[np+i+(np+j)*ldz] = d[i+(j+k*l2d)*l1d];
603: /* copy Dk into WORK (in order to be able to restore it afterwards) */
605: for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) work[i+j*ksk] = d[i+(j+k*l2d)*l1d];
607: /* copy U(K-1) into the first RANK(K-1) columns of Dk and then */
608: /* multiply with \Sigma(K-1) */
610: for (i = 0; i < rank[k-1]; ++i) {
611: PetscCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(i+(k-1)*l2e)*l1e], &one, &d[(i+k*l2d)*l1d], &one));
612: PetscCallBLAS("BLASscal",BLASscal_(&ksk, &e[i+(rank[k-1]+(k-1)*l2e)*l1e], &d[(i+k*l2d)*l1d], &one));
613: }
615: /* multiply the first RANK(K-1) columns of Dk with U(k-1)^T and */
616: /* subtract the result from the proper part of Z (previously */
617: /* initialized with Dk) */
619: PetscCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, &rank[k-1],
620: &dmone, &d[k*l1d*l2d],
621: &l1d, &e[(k-1)*l1e*l2e], &l1e, &done, &z[np+np*ldz], &ldz));
623: /* copy Vk into the first RANK(K) columns of Dk and then */
624: /* multiply with \Sigmak */
626: for (i = 0; i < rank[k]; ++i) {
627: PetscCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(rp1+i+1 + k*l2e)*l1e], &one, &d[(i + k*l2d)*l1d], &one));
628: PetscCallBLAS("BLASscal",BLASscal_(&ksk, &e[i + (rp1 + k*l2e)*l1e], &d[(i + k*l2d)*l1d], &one));
629: }
631: /* multiply the first RANK(K) columns of Dk with Vk^T and */
632: /* subtract the result from the proper part of Z (previously */
633: /* updated with [- U(k-1) \Sigma(k-1) U(k-1)^T]) */
635: PetscCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, &rank[k],
636: &dmone, &d[k*l1d*l2d], &l1d,
637: &e[(rank[k]+1 + k*l2e)*l1e], &l1e, &done, &z[np+np*ldz], &ldz));
639: /* restore the original Dk from WORK */
641: for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) d[i+(j+k*l2d)*l1d] = work[i+j*ksk];
643: /* eigenanalysis of block K (using dsyevd) */
645: PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V", "L", &ksk, &z[np+np*ldz],
646: &ldz, &ev[np], work, &lwork, info));
647: SlepcCheckLapackInfo("syev",*info);
649: /* EV(NPP1:) contains the eigenvalues in ascending order */
650: /* (they are returned this way by DSYEVD) */
652: for (i = 0; i < ksk; ++i) iwork[np + i] = i+1;
654: /* update NP */
655: np += ksk;
656: }
657: }
659: /* last block: ....................................... */
661: /* correction for block NBLKS: */
662: /* D(nblks) - U(nblks-1) \Sigma(nblks-1) U(nblks-1)^T */
664: ksk = ksizes[nblks-1];
666: /* initialize the proper part of Z with the diagonal block D(nblks) */
667: /* (the correction will be made in Z and then the call of DSYEVD will */
668: /* overwrite it with the eigenvectors) */
670: for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) z[np+i+(np+j)*ldz] = d[i+(j+(nblks-1)*l2d)*l1d];
672: /* copy D(nblks) into WORK (in order to be able to restore it afterwards) */
674: for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) work[i+j*ksk] = d[i+(j+(nblks-1)*l2d)*l1d];
676: /* copy U(nblks-1) into the first RANK(nblks-1) columns of D(nblks) and then */
677: /* multiply with \Sigma(nblks-1) */
679: for (i = 0; i < rank[nblks-2]; ++i) {
680: PetscCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(i + (nblks-2)*l2e)*l1e],
681: &one, &d[(i + (nblks-1)*l2d)*l1d], &one));
682: PetscCallBLAS("BLASscal",BLASscal_(&ksk,
683: &e[i + (rank[nblks-2] + (nblks-2)*l2e)*l1e],
684: &d[(i + (nblks-1)*l2d)*l1d], &one));
685: }
687: /* multiply the first RANK(nblks-1) columns of D(nblks) with U(nblks-1)^T */
688: /* and subtract the result from the proper part of Z (previously */
689: /* initialized with D(nblks)) */
691: PetscCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, &rank[nblks - 2],
692: &dmone, &d[(nblks-1)*l1d*l2d], &l1d,
693: &e[(nblks-2)*l1e*l2e], &l1e, &done, &z[np+np*ldz], &ldz));
695: /* restore the original D(nblks) from WORK */
697: for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) d[i+(j+(nblks-1)*l2d)*l1d] = work[i+j*ksk];
699: /* eigenanalysis of block NBLKS (using dsyevd) */
701: PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V", "L", &ksk, &z[np+np*ldz], &ldz, &ev[np], work, &lwork, info));
702: SlepcCheckLapackInfo("syev",*info);
704: /* EV(NPP1:) contains the eigenvalues in ascending order */
705: /* (they are returned this way by DSYEVD) */
707: for (i = 0; i < ksk; ++i) iwork[np + i] = i+1;
709: /* note that from here on the entire workspace is available again */
711: /* Perform all the merging operations. */
713: for (i = 0; i < nblks-1; ++i) {
715: /* MATSIZ = total size of the current rank RANK modification problem */
717: matsiz = iwork[isize + i - 1];
718: np = iwork[istrtp + i - 1];
719: kbrk = iwork[icut + i - 1];
720: mat1 = iwork[ilsum + i - 1];
722: for (j = 0; j < rank[kbrk-1]; ++j) {
724: /* NOTE: The parameter RHO in DMERG2 is modified in DSRTDF */
725: /* (multiplied by 2) ! In order not to change the */
726: /* singular value stored in E(:, RANK(KBRK)+1, KBRK), */
727: /* we do not pass on this variable as an argument to DMERG2, */
728: /* but we assign a separate variable RHO here which is passed */
729: /* on to DMERG2. */
730: /* Alternative solution in F90: */
731: /* pass E(:,RANK(KBRK)+1,KBRK) to an INTENT(IN) parameter */
732: /* in DMERG2. */
734: rho = e[j + (rank[kbrk-1] + (kbrk-1)*l2e)*l1e];
736: /* eigenvectors are accumulated (JOBZ.EQ.'D') */
738: PetscCall(BDC_dmerg2_(jobz, j+1, matsiz, &ev[np-1], &z[np-1+(np-1)*ldz],
739: ldz, &iwork[np-1], &rho, &e[(j + (kbrk-1)*l2e)*l1e],
740: ksizes[kbrk], &e[(rank[kbrk-1]+j+1 + (kbrk-1)*l2e)*l1e],
741: ksizes[kbrk-1], mat1, work, lwork, &iwork[n], tol, info, 1));
742: PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_PLIB,"dibtdc: Error in dmerg2, info = %" PetscBLASInt_FMT,*info);
743: }
745: /* at this point all RANK(KBRK) rank-one modifications corresponding */
746: /* to the current off-diagonal block are finished. */
747: /* Move on to the next off-diagonal block. */
749: }
751: /* Re-merge the eigenvalues/vectors which were deflated at the final */
752: /* merging step by sorting all eigenvalues and eigenvectors according */
753: /* to the permutation stored in IWORK. */
755: /* copy eigenvalues and eigenvectors in ordered form into WORK */
756: /* (eigenvalues into WORK(1:N), eigenvectors into WORK(N+1:N+1+N^2)) */
758: for (i = 0; i < n; ++i) {
759: j = iwork[i];
760: work[i] = ev[j-1];
761: PetscCallBLAS("BLAScopy",BLAScopy_(&n, &z[(j-1)*ldz], &one, &work[n*(i+1)], &one));
762: }
764: /* copy ordered eigenvalues back from WORK(1:N) into EV */
766: PetscCallBLAS("BLAScopy",BLAScopy_(&n, work, &one, ev, &one));
768: /* copy ordered eigenvectors back from WORK(N+1:N+1+N^2) into Z */
770: for (j=0;j<n;j++) for (i=0;i<n;i++) z[i+j*ldz] = work[i+(j+1)*n];
771: PetscFunctionReturn(PETSC_SUCCESS);
772: }