Actual source code: dibtdc.c

slepc-3.21.1 2024-04-26
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    BDC - Block-divide and conquer (see description in README file)
 12: */

 14: #include <slepc/private/dsimpl.h>
 15: #include <slepcblaslapack.h>

 17: static PetscErrorCode cutlr_(PetscBLASInt start,PetscBLASInt n,PetscBLASInt blkct,
 18:         PetscBLASInt *bsizes,PetscBLASInt *ranks,PetscBLASInt *cut,
 19:         PetscBLASInt *lsum,PetscBLASInt *lblks,PetscBLASInt *info)
 20: {
 21: /*  -- Routine written in LAPACK Version 3.0 style -- */
 22: /* *************************************************** */
 23: /*     Written by */
 24: /*     Michael Moldaschl and Wilfried Gansterer */
 25: /*     University of Vienna */
 26: /*     last modification: March 16, 2014 */

 28: /*     Small adaptations of original code written by */
 29: /*     Wilfried Gansterer and Bob Ward, */
 30: /*     Department of Computer Science, University of Tennessee */
 31: /*     see https://doi.org/10.1137/S1064827501399432 */
 32: /* *************************************************** */

 34: /*  Purpose */
 35: /*  ======= */

 37: /*  CUTLR computes the optimal cut in a sequence of BLKCT neighboring */
 38: /*  blocks whose sizes are given by the array BSIZES. */
 39: /*  The sum of all block sizes in the sequence considered is given by N. */
 40: /*  The cut is optimal in the sense that the difference of the sizes of */
 41: /*  the resulting two halves is minimum over all cuts with minimum ranks */
 42: /*  between blocks of the sequence considered. */

 44: /*  Arguments */
 45: /*  ========= */

 47: /*  START  (input) INTEGER */
 48: /*         In the original array KSIZES of the calling routine DIBTDC, */
 49: /*         the position where the sequence considered in this routine starts. */
 50: /*         START >= 1. */

 52: /*  N      (input) INTEGER */
 53: /*         The sum of all the block sizes of the sequence to be cut = */
 54: /*         = sum_{i=1}^{BLKCT} BSIZES(I). */
 55: /*         N >= 3. */

 57: /*  BLKCT  (input) INTEGER */
 58: /*         The number of blocks in the sequence to be cut. */
 59: /*         BLKCT >= 3. */

 61: /*  BSIZES (input) INTEGER array, dimension (BLKCT) */
 62: /*         The dimensions of the (quadratic) blocks of the sequence to be */
 63: /*         cut. sum_{i=1}^{BLKCT} BSIZES(I) = N. */

 65: /*  RANKS  (input) INTEGER array, dimension (BLKCT-1) */
 66: /*         The ranks determining the approximations of the off-diagonal */
 67: /*         blocks in the sequence considered. */

 69: /*  CUT    (output) INTEGER */
 70: /*         After the optimum cut has been determined, the position (in the */
 71: /*         overall problem as worked on in DIBTDC !) of the last block in */
 72: /*         the first half of the sequence to be cut. */
 73: /*         START <= CUT <= START+BLKCT-2. */

 75: /*  LSUM   (output) INTEGER */
 76: /*         After the optimum cut has been determined, the sum of the */
 77: /*         block sizes in the first half of the sequence to be cut. */
 78: /*         LSUM < N. */

 80: /*  LBLKS  (output) INTEGER */
 81: /*         After the optimum cut has been determined, the number of the */
 82: /*         blocks in the first half of the sequence to be cut. */
 83: /*         1 <= LBLKS < BLKCT. */

 85: /*  INFO   (output) INTEGER */
 86: /*          = 0:  successful exit. */
 87: /*          < 0:  illegal arguments. */
 88: /*                if INFO = -i, the i-th (input) argument had an illegal */
 89: /*                value. */
 90: /*          > 0:  illegal results. */
 91: /*                if INFO = i, the i-th (output) argument had an illegal */
 92: /*                value. */

 94: /*  Further Details */
 95: /*  =============== */

 97: /*  Based on code written by */
 98: /*     Wilfried Gansterer and Bob Ward, */
 99: /*     Department of Computer Science, University of Tennessee */

101: /*  ===================================================================== */

103:   PetscBLASInt i, ksk, kchk, ksum, nhalf, deviat, mindev, minrnk, tmpsum;

105:   PetscFunctionBegin;
106:   *info = 0;
107:   *lblks = 1;
108:   *lsum = 1;
109:   *cut = start;

111:   if (start < 1) *info = -1;
112:   else if (n < 3) *info = -2;
113:   else if (blkct < 3) *info = -3;
114:   if (*info == 0) {
115:     ksum = 0;
116:     kchk = 0;
117:     for (i = 0; i < blkct; ++i) {
118:       ksk = bsizes[i];
119:       ksum += ksk;
120:       if (ksk < 1) kchk = 1;
121:     }
122:     if (ksum != n || kchk == 1) *info = -4;
123:   }
124:   PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %" PetscBLASInt_FMT " in CUTLR",-(*info));

126:   /* determine smallest rank in the range considered */

128:   minrnk = n;
129:   for (i = 0; i < blkct-1; ++i) {
130:     if (ranks[i] < minrnk) minrnk = ranks[i];
131:   }

133:   /* determine best cut among those with smallest rank */

135:   nhalf = n / 2;
136:   tmpsum = 0;
137:   mindev = n;
138:   for (i = 0; i < blkct; ++i) {
139:     tmpsum += bsizes[i];
140:     if (ranks[i] == minrnk) {

142:       /* determine deviation from "optimal" cut NHALF */

144:       deviat = tmpsum - nhalf;
145:       if (deviat<0) deviat = -deviat;

147:       /* compare to best deviation so far */

149:       if (deviat < mindev) {
150:         mindev = deviat;
151:         *cut = start + i;
152:         *lblks = i + 1;
153:         *lsum = tmpsum;
154:       }
155:     }
156:   }

158:   if (*cut < start || *cut >= start + blkct - 1) *info = 6;
159:   else if (*lsum < 1 || *lsum >= n) *info = 7;
160:   else if (*lblks < 1 || *lblks >= blkct) *info = 8;
161:   PetscFunctionReturn(PETSC_SUCCESS);
162: }

164: PetscErrorCode BDC_dibtdc_(const char *jobz,PetscBLASInt n,PetscBLASInt nblks,
165:         PetscBLASInt *ksizes,PetscReal *d,PetscBLASInt l1d,PetscBLASInt l2d,
166:         PetscReal *e,PetscBLASInt *rank,PetscBLASInt l1e,PetscBLASInt l2e,
167:         PetscReal tol,PetscReal *ev,PetscReal *z,PetscBLASInt ldz,PetscReal *work,
168:         PetscBLASInt lwork,PetscBLASInt *iwork,PetscBLASInt liwork,
169:         PetscBLASInt *info,PetscBLASInt jobz_len)
170: {
171: /*  -- Routine written in LAPACK Version 3.0 style -- */
172: /* *************************************************** */
173: /*     Written by */
174: /*     Michael Moldaschl and Wilfried Gansterer */
175: /*     University of Vienna */
176: /*     last modification: March 16, 2014 */

178: /*     Small adaptations of original code written by */
179: /*     Wilfried Gansterer and Bob Ward, */
180: /*     Department of Computer Science, University of Tennessee */
181: /*     see https://doi.org/10.1137/S1064827501399432 */
182: /* *************************************************** */

184: /*  Purpose */
185: /*  ======= */

187: /*  DIBTDC computes all eigenvalues and corresponding eigenvectors of a */
188: /*  symmetric irreducible block tridiagonal matrix with rank RANK matrices */
189: /*  as the subdiagonal blocks using a block divide and conquer method. */

191: /*  Arguments */
192: /*  ========= */

194: /*  JOBZ    (input) CHARACTER*1 */
195: /*          = 'N':  Compute eigenvalues only (not implemented); */
196: /*          = 'D':  Compute eigenvalues and eigenvectors. */
197: /*                  Eigenvectors are accumulated in the */
198: /*                  divide-and-conquer process. */

200: /*  N      (input) INTEGER */
201: /*         The dimension of the symmetric irreducible block tridiagonal */
202: /*         matrix.  N >= 2. */

204: /*  NBLKS  (input) INTEGER, 2 <= NBLKS <= N */
205: /*         The number of diagonal blocks in the matrix. */

207: /*  KSIZES (input) INTEGER array, dimension (NBLKS) */
208: /*         The dimension of the square diagonal blocks from top left */
209: /*         to bottom right.  KSIZES(I) >= 1 for all I, and the sum of */
210: /*         KSIZES(I) for I = 1 to NBLKS has to be equal to N. */

212: /*  D      (input) DOUBLE PRECISION array, dimension (L1D,L2D,NBLKS) */
213: /*         The lower triangular elements of the symmetric diagonal */
214: /*         blocks of the block tridiagonal matrix.  Elements of the top */
215: /*         left diagonal block, which is of dimension KSIZES(1), are */
216: /*         contained in D(*,*,1); the elements of the next diagonal */
217: /*         block, which is of dimension KSIZES(2), are contained in */
218: /*         D(*,*,2); etc. */

220: /*  L1D    (input) INTEGER */
221: /*         The leading dimension of the array D.  L1D >= max(3,KMAX), */
222: /*         where KMAX is the dimension of the largest diagonal block. */

224: /*  L2D    (input) INTEGER */
225: /*         The second dimension of the array D.  L2D >= max(3,KMAX), */
226: /*         where KMAX is as stated in L1D above. */

228: /*  E      (input) DOUBLE PRECISION array, dimension (L1E,L2E,NBLKS-1) */
229: /*         Contains the elements of the scalars (singular values) and */
230: /*         vectors (singular vectors) defining the rank RANK subdiagonal */
231: /*         blocks of the matrix. */
232: /*         E(1:RANK(K),RANK(K)+1,K) holds the RANK(K) scalars, */
233: /*         E(:,1:RANK(K),K) holds the RANK(K) column vectors, and */
234: /*         E(:,RANK(K)+2:2*RANK(K)+1,K) holds the row vectors for the K-th */
235: /*         subdiagonal block. */

237: /*  RANK   (input) INTEGER array, dimension (NBLKS-1). */
238: /*         The ranks of all the subdiagonal blocks contained in the array E. */
239: /*         RANK(K) <= MIN(KSIZES(K), KSIZES(K+1)) */

241: /*  L1E    (input) INTEGER */
242: /*         The leading dimension of the array E.  L1E >= max(3,2*KMAX+1), */
243: /*         where KMAX is as stated in L1D above. */

245: /*  L2E    (input) INTEGER */
246: /*         The second dimension of the array E.  L2E >= max(3,2*KMAX+1), */
247: /*         where KMAX is as stated in L1D above. */

249: /*  TOL    (input) DOUBLE PRECISION, TOL <= 1.0D-1 */
250: /*         User specified deflation tolerance for the routine DMERG2. */
251: /*         If (1.0D-1 >= TOL >= 20*EPS) then TOL is used as */
252: /*         the deflation tolerance in DSRTDF. */
253: /*         If (TOL < 20*EPS) then the standard deflation tolerance from */
254: /*         LAPACK is used as the deflation tolerance in DSRTDF. */

256: /*  EV     (output) DOUBLE PRECISION array, dimension (N) */
257: /*         If INFO = 0, then EV contains the eigenvalues of the */
258: /*         symmetric block tridiagonal matrix in ascending order. */

260: /*  Z      (input/output) DOUBLE PRECISION array, dimension (LDZ, N) */
261: /*         On entry, Z will be the identity matrix. */
262: /*         On exit, Z contains the eigenvectors of the block tridiagonal */
263: /*         matrix. */

265: /*  LDZ    (input) INTEGER */
266: /*         The leading dimension of the array Z.  LDZ >= max(1,N). */

268: /*  WORK   (workspace) DOUBLE PRECISION array, dimension (LWORK) */

270: /*  LWORK   (input) INTEGER */
271: /*          The dimension of the array WORK. */
272: /*          In order to guarantee correct results in all cases, */
273: /*          LWORK must be at least (2*N**2 + 3*N). In many cases, */
274: /*          less workspace is required. The absolute minimum required is */
275: /*          (N**2 + 3*N). */
276: /*          If the workspace provided is not sufficient, the routine will */
277: /*          return a corresponding error code and report how much workspace */
278: /*          was missing (see INFO). */

280: /*  IWORK  (workspace) INTEGER array, dimension (LIWORK) */

282: /*  LIWORK  (input) INTEGER */
283: /*          The dimension of the array IWORK. */
284: /*          LIWORK must be at least (5*N + 3 + 4*NBLKS - 4): */
285: /*                 5*KMAX+3 for DSYEVD, 5*N for ????, */
286: /*                 4*NBLKS-4 for the preprocessing (merging order) */
287: /*          Summarizing, the minimum integer workspace needed is */
288: /*          MAX(5*N, 5*KMAX + 3) + 4*NBLKS - 4 */

290: /*  INFO   (output) INTEGER */
291: /*          = 0:  successful exit. */
292: /*          < 0, > -99: illegal arguments. */
293: /*                if INFO = -i, the i-th argument had an illegal value. */
294: /*          = -99: error in the preprocessing (call of CUTLR). */
295: /*          < -200: not enough workspace. Space for ABS(INFO + 200) */
296: /*                numbers is required in addition to the workspace provided, */
297: /*                otherwise some eigenvectors will be incorrect. */
298: /*          > 0:  The algorithm failed to compute an eigenvalue while */
299: /*                working on the submatrix lying in rows and columns */
300: /*                INFO/(N+1) through mod(INFO,N+1). */

302: /*  Further Details */
303: /*  =============== */

305: /*  Based on code written by */
306: /*     Wilfried Gansterer and Bob Ward, */
307: /*     Department of Computer Science, University of Tennessee */

309: /*  This routine is comparable to Dlaed0.f from LAPACK. */

311: /*  ===================================================================== */

313:   PetscBLASInt   i, j, k, np, rp1, ksk, one=1;
314:   PetscBLASInt   cut, mat1, kchk, kbrk, blks, kmax, icut, size, ksum, lsum;
315:   PetscBLASInt   lblks, rblks, isize, lwmin, ilsum;
316:   PetscBLASInt   start, istck1, istck2, istck3, merged;
317:   PetscBLASInt   liwmin, matsiz, startp, istrtp;
318:   PetscReal      rho, done=1.0, dmone=-1.0;

320:   PetscFunctionBegin;
321:   *info = 0;

323:   if (*(unsigned char *)jobz != 'N' && *(unsigned char *)jobz != 'D') *info = -1;
324:   else if (n < 2) *info = -2;
325:   else if (nblks < 2 || nblks > n) *info = -3;
326:   if (*info == 0) {
327:     ksum = 0;
328:     kmax = 0;
329:     kchk = 0;
330:     for (k = 0; k < nblks; ++k) {
331:       ksk = ksizes[k];
332:       ksum += ksk;
333:       if (ksk > kmax) kmax = ksk;
334:       if (ksk < 1) kchk = 1;
335:     }
336:     lwmin = n*n + n * 3;
337:     liwmin = PetscMax(n * 5,kmax * 5 + 3) + 4*nblks - 4;
338:     if (ksum != n || kchk == 1) *info = -4;
339:     else if (l1d < PetscMax(3,kmax)) *info = -6;
340:     else if (l2d < PetscMax(3,kmax)) *info = -7;
341:     else if (l1e < PetscMax(3,2*kmax + 1)) *info = -10;
342:     else if (l2e < PetscMax(3,2*kmax + 1)) *info = -11;
343:     else if (tol > .1) *info = -12;
344:     else if (ldz < PetscMax(1,n)) *info = -15;
345:     else if (lwork < lwmin) *info = -17;
346:     else if (liwork < liwmin) *info = -19;
347:   }
348:   if (*info == 0) {
349:     for (k = 0; k < nblks-1; ++k) {
350:       if (rank[k] > PetscMin(ksizes[k],ksizes[k+1]) || rank[k] < 1) *info = -9;
351:     }
352:   }

354:   PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %" PetscBLASInt_FMT " in DIBTDC",-(*info));

356: /* **************************************************************************** */

358:   /* ...Preprocessing..................................................... */
359:   /*    Determine the optimal order for merging the subblocks and how much */
360:   /*    workspace will be needed for the merging (determined by the last */
361:   /*    merge). Cutpoints for the merging operations are determined and stored */
362:   /*    in reverse chronological order (starting with the final merging */
363:   /*    operation). */

365:   /*    integer workspace requirements for the preprocessing: */
366:   /*         4*(NBLKS-1) for merging history */
367:   /*         at most 3*(NBLKS-1) for stack */

369:   start = 1;
370:   size = n;
371:   blks = nblks;
372:   merged = 0;
373:   k = 0;

375:   /* integer workspace used for the stack is not needed any more after the */
376:   /* preprocessing and therefore can use part of the 5*N */
377:   /* integer workspace needed later on in the code */

379:   istck1 = 0;
380:   istck2 = istck1 + nblks;
381:   istck3 = istck2 + nblks;

383:   /* integer workspace used for storing the order of merges starts AFTER */
384:   /* the integer workspace 5*N+3 which is needed later on in the code */
385:   /* (5*KMAX+3 for DSYEVD, 4*N in DMERG2) */

387:   istrtp = n * 5 + 4;
388:   icut = istrtp + nblks - 1;
389:   isize = icut + nblks - 1;
390:   ilsum = isize + nblks - 1;

392: L200:

394:   if (nblks >= 3) {

396:     /* Determine the cut point. Note that in the routine CUTLR it is */
397:     /* chosen such that it yields the best balanced merging operation */
398:     /* among all the rank modifications with minimum rank. */

400:     PetscCall(cutlr_(start, size, blks, &ksizes[start-1], &rank[start-1], &cut, &lsum, &lblks, info));
401:     PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_PLIB,"dibtdc: Error in cutlr, info = %" PetscBLASInt_FMT,*info);

403:   } else {
404:     cut = 1;
405:     lsum = ksizes[0];
406:     lblks = 1;
407:   }

409:   ++merged;
410:   startp = 0;
411:   for (i = 0; i < start-1; ++i) startp += ksizes[i];
412:   iwork[istrtp + (nblks - 1) - merged-1] = startp + 1;
413:   iwork[icut + (nblks - 1) - merged-1] = cut;
414:   iwork[isize + (nblks - 1) - merged-1] = size;
415:   iwork[ilsum + (nblks - 1) - merged-1] = lsum;

417:   if (lblks == 2) {

419:     /* one merge in left branch, left branch done */
420:     ++merged;
421:     iwork[istrtp + (nblks - 1) - merged-1] = startp + 1;
422:     iwork[icut + (nblks - 1) - merged-1] = start;
423:     iwork[isize + (nblks - 1) - merged-1] = lsum;
424:     iwork[ilsum + (nblks - 1) - merged-1] = ksizes[start-1];
425:   }

427:   if (lblks == 1 || lblks == 2) {

429:     /* left branch done, continue on the right side */
430:     start += lblks;
431:     size -= lsum;
432:     blks -= lblks;

434:     PetscCheck(blks>0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"dibtdc: Error in preprocessing, blks = %" PetscBLASInt_FMT,blks);

436:     if (blks == 2) {

438:       /* one merge in right branch, right branch done */
439:       ++merged;
440:       startp += lsum;
441:       iwork[istrtp + (nblks - 1) - merged-1] = startp + 1;
442:       iwork[icut + (nblks - 1) - merged-1] = start;
443:       iwork[isize + (nblks - 1) - merged-1] = size;
444:       iwork[ilsum + (nblks - 1) - merged-1] = ksizes[start-1];
445:     }

447:     if (blks == 1 || blks == 2) {

449:       /* get the next subproblem from the stack or finished */

451:       if (k >= 1) {

453:         /* something left on the stack */
454:         start = iwork[istck1 + k-1];
455:         size = iwork[istck2 + k-1];
456:         blks = iwork[istck3 + k-1];
457:         --k;
458:         goto L200;
459:       } else {

461:         /* nothing left on the stack */
462:         PetscCheck(merged==nblks-1,PETSC_COMM_SELF,PETSC_ERR_PLIB,"ERROR in preprocessing - not enough merges performed");

464:         /* exit preprocessing */

466:       }
467:     } else {

469:       /* BLKS.GE.3, and therefore analyze the right side */

471:       goto L200;
472:     }
473:   } else {

475:     /* LBLKS.GE.3, and therefore check the right side and */
476:     /* put it on the stack if required */

478:     rblks = blks - lblks;
479:     if (rblks >= 3) {
480:       ++k;
481:       iwork[istck1 + k-1] = cut + 1;
482:       iwork[istck2 + k-1] = size - lsum;
483:       iwork[istck3 + k-1] = rblks;
484:     } else if (rblks == 2) {

486:       /* one merge in right branch, right branch done */
487:       /* (note that nothing needs to be done if RBLKS.EQ.1 !) */

489:       ++merged;
490:       startp += lsum;
491:       iwork[istrtp + (nblks - 1) - merged-1] = startp + 1;
492:       iwork[icut + (nblks - 1) - merged-1] = start + lblks;
493:       iwork[isize + (nblks - 1) - merged-1] = size - lsum;
494:       iwork[ilsum + (nblks - 1) - merged-1] = ksizes[start + lblks-1];
495:     }
496:     PetscCheck(rblks>0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"dibtdc: ERROR in preprocessing - rblks = %" PetscBLASInt_FMT,rblks);

498:     /* continue on the left side */

500:     size = lsum;
501:     blks = lblks;
502:     goto L200;
503:   }

505:   /*  SIZE = IWORK(ISIZE+NBLKS-2) */
506:   /*  MAT1 = IWORK(ILSUM+NBLKS-2) */

508:   /* Note: after the dimensions SIZE and MAT1 of the last merging */
509:   /* operation have been determined, an upper bound for the workspace */
510:   /* requirements which is independent of how much deflation occurs in */
511:   /* the last merging operation could be determined as follows */
512:   /* (based on (3.15) and (3.19) from UT-CS-00-447): */

514:   /*  IF(MAT1.LE.N/2) THEN */
515:   /*     WSPREQ = 3*N + 3/2*(SIZE-MAT1)**2 + N*N/2 + MAT1*MAT1 */
516:   /*  ELSE */
517:   /*     WSPREQ = 3*N + 3/2*MAT1*MAT1 + N*N/2 + (SIZE-MAT1)**2 */
518:   /*  END IF */

520:   /*  IF(LWORK-WSPREQ.LT.0)THEN */
521:   /*          not enough work space provided */
522:   /*     INFO = -200 - (WSPREQ-LWORK) */
523:   /*     RETURN */
524:   /*  END IF */
525:   /*  However, this is not really useful, since the actual check whether */
526:   /*  enough workspace is provided happens in DMERG2.f ! */

528: /* ************************************************************************* */

530:   /* ...Solve subproblems................................... */

532:   /* Divide the matrix into NBLKS submatrices using rank-r */
533:   /* modifications (cuts) and solve for their eigenvalues and */
534:   /* eigenvectors. Initialize index array to sort eigenvalues. */

536:   /* first block: ...................................... */

538:   /*    correction for block 1: D1 - V1 \Sigma1 V1^T */

540:   ksk = ksizes[0];
541:   rp1 = rank[0];

543:   /* initialize the proper part of Z with the diagonal block D1 */
544:   /* (the correction will be made in Z and then the call of DSYEVD will */
545:   /*  overwrite it with the eigenvectors) */

547:   for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) z[i+j*ldz] = d[i+j*l1d];

549:   /* copy D1 into WORK (in order to be able to restore it afterwards) */

551:   for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) work[i+j*ksk] = d[i+j*l1d];

553:   /* copy V1 into the first RANK(1) columns of D1 and then */
554:   /* multiply with \Sigma1 */

556:   for (i = 0; i < rank[0]; ++i) {
557:     PetscCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(rp1 + i+1)*l1e], &one, &d[i*l1d], &one));
558:     PetscCallBLAS("BLASscal",BLASscal_(&ksk, &e[i + rp1*l1e], &d[i*l1d], &one));
559:   }

561:   /* multiply the first RANK(1) columns of D1 with V1^T and */
562:   /* subtract the result from the proper part of Z (previously */
563:   /* initialized with D1) */

565:   PetscCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, rank, &dmone,
566:           d, &l1d, &e[(rank[0]+1)*l1e], &l1e, &done, z, &ldz));

568:   /* restore the original D1 from WORK */

570:   for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) d[i+j*l1d] = work[i+j*ksk];

572:   /* eigenanalysis of block 1 (using DSYEVD) */

574:   PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V", "L", &ksk, z, &ldz, ev, work, &lwork, info));
575:   SlepcCheckLapackInfo("syev",*info);

577:   /* EV(1:) contains the eigenvalues in ascending order */
578:   /* (they are returned this way by DSYEVD) */

580:   for (i = 0; i < ksk; ++i) iwork[i] = i+1;

582:     /* intermediate blocks: .............................. */

584:     np = ksk;

586:     /* remaining number of blocks */

588:     if (nblks > 2) {
589:       for (k = 1; k < nblks-1; ++k) {

591:       /* correction for block K: */
592:       /* Dk - U(k-1) \Sigma(k-1) U(k-1)^T - Vk \Sigmak Vk^T */

594:       ksk = ksizes[k];
595:       rp1 = rank[k];

597:       /* initialize the proper part of Z with the diagonal block Dk */
598:       /* (the correction will be made in Z and then the call of DSYEVD will */
599:       /*  overwrite it with the eigenvectors) */

601:       for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) z[np+i+(np+j)*ldz] = d[i+(j+k*l2d)*l1d];

603:       /* copy Dk into WORK (in order to be able to restore it afterwards) */

605:       for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) work[i+j*ksk] = d[i+(j+k*l2d)*l1d];

607:       /* copy U(K-1) into the first RANK(K-1) columns of Dk and then */
608:       /* multiply with \Sigma(K-1) */

610:       for (i = 0; i < rank[k-1]; ++i) {
611:         PetscCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(i+(k-1)*l2e)*l1e], &one, &d[(i+k*l2d)*l1d], &one));
612:         PetscCallBLAS("BLASscal",BLASscal_(&ksk, &e[i+(rank[k-1]+(k-1)*l2e)*l1e], &d[(i+k*l2d)*l1d], &one));
613:       }

615:       /* multiply the first RANK(K-1) columns of Dk with U(k-1)^T and */
616:       /* subtract the result from the proper part of Z (previously */
617:       /* initialized with Dk) */

619:       PetscCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, &rank[k-1],
620:                     &dmone, &d[k*l1d*l2d],
621:                     &l1d, &e[(k-1)*l1e*l2e], &l1e, &done, &z[np+np*ldz], &ldz));

623:       /* copy Vk into the first RANK(K) columns of Dk and then */
624:       /* multiply with \Sigmak */

626:       for (i = 0; i < rank[k]; ++i) {
627:         PetscCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(rp1+i+1 + k*l2e)*l1e], &one, &d[(i + k*l2d)*l1d], &one));
628:         PetscCallBLAS("BLASscal",BLASscal_(&ksk, &e[i + (rp1 + k*l2e)*l1e], &d[(i + k*l2d)*l1d], &one));
629:       }

631:       /* multiply the first RANK(K) columns of Dk with Vk^T and */
632:       /* subtract the result from the proper part of Z (previously */
633:       /* updated with [- U(k-1) \Sigma(k-1) U(k-1)^T]) */

635:       PetscCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, &rank[k],
636:                     &dmone, &d[k*l1d*l2d], &l1d,
637:                     &e[(rank[k]+1 + k*l2e)*l1e], &l1e, &done, &z[np+np*ldz], &ldz));

639:       /* restore the original Dk from WORK */

641:       for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) d[i+(j+k*l2d)*l1d] = work[i+j*ksk];

643:       /* eigenanalysis of block K (using dsyevd) */

645:       PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V", "L", &ksk, &z[np+np*ldz],
646:                      &ldz, &ev[np], work, &lwork, info));
647:       SlepcCheckLapackInfo("syev",*info);

649:       /* EV(NPP1:) contains the eigenvalues in ascending order */
650:       /* (they are returned this way by DSYEVD) */

652:       for (i = 0; i < ksk; ++i) iwork[np + i] = i+1;

654:       /* update NP */
655:       np += ksk;
656:     }
657:   }

659:   /* last block: ....................................... */

661:   /*    correction for block NBLKS: */
662:   /*    D(nblks) - U(nblks-1) \Sigma(nblks-1) U(nblks-1)^T */

664:   ksk = ksizes[nblks-1];

666:   /* initialize the proper part of Z with the diagonal block D(nblks) */
667:   /* (the correction will be made in Z and then the call of DSYEVD will */
668:   /* overwrite it with the eigenvectors) */

670:   for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) z[np+i+(np+j)*ldz] = d[i+(j+(nblks-1)*l2d)*l1d];

672:   /* copy D(nblks) into WORK (in order to be able to restore it afterwards) */

674:   for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) work[i+j*ksk] = d[i+(j+(nblks-1)*l2d)*l1d];

676:   /* copy U(nblks-1) into the first RANK(nblks-1) columns of D(nblks) and then */
677:   /* multiply with \Sigma(nblks-1) */

679:   for (i = 0; i < rank[nblks-2]; ++i) {
680:     PetscCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(i + (nblks-2)*l2e)*l1e],
681:               &one, &d[(i + (nblks-1)*l2d)*l1d], &one));
682:     PetscCallBLAS("BLASscal",BLASscal_(&ksk,
683:               &e[i + (rank[nblks-2] + (nblks-2)*l2e)*l1e],
684:               &d[(i + (nblks-1)*l2d)*l1d], &one));
685:   }

687:   /* multiply the first RANK(nblks-1) columns of D(nblks) with U(nblks-1)^T */
688:   /* and subtract the result from the proper part of Z (previously */
689:   /* initialized with D(nblks)) */

691:   PetscCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, &rank[nblks - 2],
692:           &dmone, &d[(nblks-1)*l1d*l2d], &l1d,
693:           &e[(nblks-2)*l1e*l2e], &l1e, &done, &z[np+np*ldz], &ldz));

695:   /* restore the original D(nblks) from WORK */

697:   for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) d[i+(j+(nblks-1)*l2d)*l1d] = work[i+j*ksk];

699:   /* eigenanalysis of block NBLKS (using dsyevd) */

701:   PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V", "L", &ksk, &z[np+np*ldz], &ldz, &ev[np], work, &lwork, info));
702:   SlepcCheckLapackInfo("syev",*info);

704:   /* EV(NPP1:) contains the eigenvalues in ascending order */
705:   /* (they are returned this way by DSYEVD) */

707:   for (i = 0; i < ksk; ++i) iwork[np + i] = i+1;

709:   /* note that from here on the entire workspace is available again */

711:   /* Perform all the merging operations. */

713:   for (i = 0; i < nblks-1; ++i) {

715:     /* MATSIZ = total size of the current rank RANK modification problem */

717:     matsiz = iwork[isize + i - 1];
718:     np = iwork[istrtp + i - 1];
719:     kbrk = iwork[icut + i - 1];
720:     mat1 = iwork[ilsum + i - 1];

722:     for (j = 0; j < rank[kbrk-1]; ++j) {

724:       /* NOTE: The parameter RHO in DMERG2 is modified in DSRTDF */
725:       /*       (multiplied by 2) ! In order not to change the */
726:       /*       singular value stored in E(:, RANK(KBRK)+1, KBRK), */
727:       /*       we do not pass on this variable as an argument to DMERG2, */
728:       /*       but we assign a separate variable RHO here which is passed */
729:       /*       on to DMERG2. */
730:       /*       Alternative solution in F90: */
731:       /*       pass E(:,RANK(KBRK)+1,KBRK) to an INTENT(IN) parameter */
732:       /*       in DMERG2. */

734:       rho = e[j + (rank[kbrk-1] + (kbrk-1)*l2e)*l1e];

736:       /* eigenvectors are accumulated (JOBZ.EQ.'D') */

738:       PetscCall(BDC_dmerg2_(jobz, j+1, matsiz, &ev[np-1], &z[np-1+(np-1)*ldz],
739:                     ldz, &iwork[np-1], &rho, &e[(j + (kbrk-1)*l2e)*l1e],
740:                     ksizes[kbrk], &e[(rank[kbrk-1]+j+1 + (kbrk-1)*l2e)*l1e],
741:                     ksizes[kbrk-1], mat1, work, lwork, &iwork[n], tol, info, 1));
742:       PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_PLIB,"dibtdc: Error in dmerg2, info = %" PetscBLASInt_FMT,*info);
743:     }

745:     /* at this point all RANK(KBRK) rank-one modifications corresponding */
746:     /* to the current off-diagonal block are finished. */
747:     /* Move on to the next off-diagonal block. */

749:   }

751:   /* Re-merge the eigenvalues/vectors which were deflated at the final */
752:   /* merging step by sorting all eigenvalues and eigenvectors according */
753:   /* to the permutation stored in IWORK. */

755:   /* copy eigenvalues and eigenvectors in ordered form into WORK */
756:   /* (eigenvalues into WORK(1:N), eigenvectors into WORK(N+1:N+1+N^2)) */

758:   for (i = 0; i < n; ++i) {
759:     j = iwork[i];
760:     work[i] = ev[j-1];
761:     PetscCallBLAS("BLAScopy",BLAScopy_(&n, &z[(j-1)*ldz], &one, &work[n*(i+1)], &one));
762:   }

764:   /* copy ordered eigenvalues back from WORK(1:N) into EV */

766:   PetscCallBLAS("BLAScopy",BLAScopy_(&n, work, &one, ev, &one));

768:   /* copy ordered eigenvectors back from WORK(N+1:N+1+N^2) into Z */

770:   for (j=0;j<n;j++) for (i=0;i<n;i++) z[i+j*ldz] = work[i+(j+1)*n];
771:   PetscFunctionReturn(PETSC_SUCCESS);
772: }