LCOV - code coverage report
Current view: top level - sys/classes/ds/impls/hep/bdc - dibtdc.c (source / functions) Hit Total Coverage
Test: SLEPc Lines: 187 198 94.4 %
Date: 2024-12-18 00:42:09 Functions: 2 2 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       3             :    SLEPc - Scalable Library for Eigenvalue Problem Computations
       4             :    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
       5             : 
       6             :    This file is part of SLEPc.
       7             :    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
       8             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       9             : */
      10             : /*
      11             :    BDC - Block-divide and conquer (see description in README file)
      12             : */
      13             : 
      14             : #include <slepc/private/dsimpl.h>
      15             : #include <slepcblaslapack.h>
      16             : 
      17           3 : static PetscErrorCode cutlr_(PetscBLASInt start,PetscBLASInt n,PetscBLASInt blkct,
      18             :         PetscBLASInt *bsizes,PetscBLASInt *ranks,PetscBLASInt *cut,
      19             :         PetscBLASInt *lsum,PetscBLASInt *lblks,PetscBLASInt *info)
      20             : {
      21             : /*  -- Routine written in LAPACK Version 3.0 style -- */
      22             : /* *************************************************** */
      23             : /*     Written by */
      24             : /*     Michael Moldaschl and Wilfried Gansterer */
      25             : /*     University of Vienna */
      26             : /*     last modification: March 16, 2014 */
      27             : 
      28             : /*     Small adaptations of original code written by */
      29             : /*     Wilfried Gansterer and Bob Ward, */
      30             : /*     Department of Computer Science, University of Tennessee */
      31             : /*     see https://doi.org/10.1137/S1064827501399432 */
      32             : /* *************************************************** */
      33             : 
      34             : /*  Purpose */
      35             : /*  ======= */
      36             : 
      37             : /*  CUTLR computes the optimal cut in a sequence of BLKCT neighboring */
      38             : /*  blocks whose sizes are given by the array BSIZES. */
      39             : /*  The sum of all block sizes in the sequence considered is given by N. */
      40             : /*  The cut is optimal in the sense that the difference of the sizes of */
      41             : /*  the resulting two halves is minimum over all cuts with minimum ranks */
      42             : /*  between blocks of the sequence considered. */
      43             : 
      44             : /*  Arguments */
      45             : /*  ========= */
      46             : 
      47             : /*  START  (input) INTEGER */
      48             : /*         In the original array KSIZES of the calling routine DIBTDC, */
      49             : /*         the position where the sequence considered in this routine starts. */
      50             : /*         START >= 1. */
      51             : 
      52             : /*  N      (input) INTEGER */
      53             : /*         The sum of all the block sizes of the sequence to be cut = */
      54             : /*         = sum_{i=1}^{BLKCT} BSIZES(I). */
      55             : /*         N >= 3. */
      56             : 
      57             : /*  BLKCT  (input) INTEGER */
      58             : /*         The number of blocks in the sequence to be cut. */
      59             : /*         BLKCT >= 3. */
      60             : 
      61             : /*  BSIZES (input) INTEGER array, dimension (BLKCT) */
      62             : /*         The dimensions of the (quadratic) blocks of the sequence to be */
      63             : /*         cut. sum_{i=1}^{BLKCT} BSIZES(I) = N. */
      64             : 
      65             : /*  RANKS  (input) INTEGER array, dimension (BLKCT-1) */
      66             : /*         The ranks determining the approximations of the off-diagonal */
      67             : /*         blocks in the sequence considered. */
      68             : 
      69             : /*  CUT    (output) INTEGER */
      70             : /*         After the optimum cut has been determined, the position (in the */
      71             : /*         overall problem as worked on in DIBTDC !) of the last block in */
      72             : /*         the first half of the sequence to be cut. */
      73             : /*         START <= CUT <= START+BLKCT-2. */
      74             : 
      75             : /*  LSUM   (output) INTEGER */
      76             : /*         After the optimum cut has been determined, the sum of the */
      77             : /*         block sizes in the first half of the sequence to be cut. */
      78             : /*         LSUM < N. */
      79             : 
      80             : /*  LBLKS  (output) INTEGER */
      81             : /*         After the optimum cut has been determined, the number of the */
      82             : /*         blocks in the first half of the sequence to be cut. */
      83             : /*         1 <= LBLKS < BLKCT. */
      84             : 
      85             : /*  INFO   (output) INTEGER */
      86             : /*          = 0:  successful exit. */
      87             : /*          < 0:  illegal arguments. */
      88             : /*                if INFO = -i, the i-th (input) argument had an illegal */
      89             : /*                value. */
      90             : /*          > 0:  illegal results. */
      91             : /*                if INFO = i, the i-th (output) argument had an illegal */
      92             : /*                value. */
      93             : 
      94             : /*  Further Details */
      95             : /*  =============== */
      96             : 
      97             : /*  Based on code written by */
      98             : /*     Wilfried Gansterer and Bob Ward, */
      99             : /*     Department of Computer Science, University of Tennessee */
     100             : 
     101             : /*  ===================================================================== */
     102             : 
     103           3 :   PetscBLASInt i, ksk, kchk, ksum, nhalf, deviat, mindev, minrnk, tmpsum;
     104             : 
     105           3 :   PetscFunctionBegin;
     106           3 :   *info = 0;
     107           3 :   *lblks = 1;
     108           3 :   *lsum = 1;
     109           3 :   *cut = start;
     110             : 
     111           3 :   if (start < 1) *info = -1;
     112           3 :   else if (n < 3) *info = -2;
     113           3 :   else if (blkct < 3) *info = -3;
     114           3 :   if (*info == 0) {
     115             :     ksum = 0;
     116             :     kchk = 0;
     117          19 :     for (i = 0; i < blkct; ++i) {
     118          16 :       ksk = bsizes[i];
     119          16 :       ksum += ksk;
     120          16 :       if (ksk < 1) kchk = 1;
     121             :     }
     122           3 :     if (ksum != n || kchk == 1) *info = -4;
     123             :   }
     124           3 :   PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %" PetscBLASInt_FMT " in CUTLR",-(*info));
     125             : 
     126             :   /* determine smallest rank in the range considered */
     127             : 
     128             :   minrnk = n;
     129          16 :   for (i = 0; i < blkct-1; ++i) {
     130          13 :     if (ranks[i] < minrnk) minrnk = ranks[i];
     131             :   }
     132             : 
     133             :   /* determine best cut among those with smallest rank */
     134             : 
     135           3 :   nhalf = n / 2;
     136           3 :   tmpsum = 0;
     137           3 :   mindev = n;
     138          19 :   for (i = 0; i < blkct; ++i) {
     139          16 :     tmpsum += bsizes[i];
     140          16 :     if (ranks[i] == minrnk) {
     141             : 
     142             :       /* determine deviation from "optimal" cut NHALF */
     143             : 
     144          14 :       deviat = tmpsum - nhalf;
     145          14 :       if (deviat<0) deviat = -deviat;
     146             : 
     147             :       /* compare to best deviation so far */
     148             : 
     149          14 :       if (deviat < mindev) {
     150           8 :         mindev = deviat;
     151           8 :         *cut = start + i;
     152           8 :         *lblks = i + 1;
     153           8 :         *lsum = tmpsum;
     154             :       }
     155             :     }
     156             :   }
     157             : 
     158           3 :   if (*cut < start || *cut >= start + blkct - 1) *info = 6;
     159           3 :   else if (*lsum < 1 || *lsum >= n) *info = 7;
     160           3 :   else if (*lblks < 1 || *lblks >= blkct) *info = 8;
     161           3 :   PetscFunctionReturn(PETSC_SUCCESS);
     162             : }
     163             : 
     164           1 : PetscErrorCode BDC_dibtdc_(const char *jobz,PetscBLASInt n,PetscBLASInt nblks,
     165             :         PetscBLASInt *ksizes,PetscReal *d,PetscBLASInt l1d,PetscBLASInt l2d,
     166             :         PetscReal *e,PetscBLASInt *rank,PetscBLASInt l1e,PetscBLASInt l2e,
     167             :         PetscReal tol,PetscReal *ev,PetscReal *z,PetscBLASInt ldz,PetscReal *work,
     168             :         PetscBLASInt lwork,PetscBLASInt *iwork,PetscBLASInt liwork,
     169             :         PetscBLASInt *info,PetscBLASInt jobz_len)
     170             : {
     171             : /*  -- Routine written in LAPACK Version 3.0 style -- */
     172             : /* *************************************************** */
     173             : /*     Written by */
     174             : /*     Michael Moldaschl and Wilfried Gansterer */
     175             : /*     University of Vienna */
     176             : /*     last modification: March 16, 2014 */
     177             : 
     178             : /*     Small adaptations of original code written by */
     179             : /*     Wilfried Gansterer and Bob Ward, */
     180             : /*     Department of Computer Science, University of Tennessee */
     181             : /*     see https://doi.org/10.1137/S1064827501399432 */
     182             : /* *************************************************** */
     183             : 
     184             : /*  Purpose */
     185             : /*  ======= */
     186             : 
     187             : /*  DIBTDC computes all eigenvalues and corresponding eigenvectors of a */
     188             : /*  symmetric irreducible block tridiagonal matrix with rank RANK matrices */
     189             : /*  as the subdiagonal blocks using a block divide and conquer method. */
     190             : 
     191             : /*  Arguments */
     192             : /*  ========= */
     193             : 
     194             : /*  JOBZ    (input) CHARACTER*1 */
     195             : /*          = 'N':  Compute eigenvalues only (not implemented); */
     196             : /*          = 'D':  Compute eigenvalues and eigenvectors. */
     197             : /*                  Eigenvectors are accumulated in the */
     198             : /*                  divide-and-conquer process. */
     199             : 
     200             : /*  N      (input) INTEGER */
     201             : /*         The dimension of the symmetric irreducible block tridiagonal */
     202             : /*         matrix.  N >= 2. */
     203             : 
     204             : /*  NBLKS  (input) INTEGER, 2 <= NBLKS <= N */
     205             : /*         The number of diagonal blocks in the matrix. */
     206             : 
     207             : /*  KSIZES (input) INTEGER array, dimension (NBLKS) */
     208             : /*         The dimension of the square diagonal blocks from top left */
     209             : /*         to bottom right.  KSIZES(I) >= 1 for all I, and the sum of */
     210             : /*         KSIZES(I) for I = 1 to NBLKS has to be equal to N. */
     211             : 
     212             : /*  D      (input) DOUBLE PRECISION array, dimension (L1D,L2D,NBLKS) */
     213             : /*         The lower triangular elements of the symmetric diagonal */
     214             : /*         blocks of the block tridiagonal matrix.  Elements of the top */
     215             : /*         left diagonal block, which is of dimension KSIZES(1), are */
     216             : /*         contained in D(*,*,1); the elements of the next diagonal */
     217             : /*         block, which is of dimension KSIZES(2), are contained in */
     218             : /*         D(*,*,2); etc. */
     219             : 
     220             : /*  L1D    (input) INTEGER */
     221             : /*         The leading dimension of the array D.  L1D >= max(3,KMAX), */
     222             : /*         where KMAX is the dimension of the largest diagonal block. */
     223             : 
     224             : /*  L2D    (input) INTEGER */
     225             : /*         The second dimension of the array D.  L2D >= max(3,KMAX), */
     226             : /*         where KMAX is as stated in L1D above. */
     227             : 
     228             : /*  E      (input) DOUBLE PRECISION array, dimension (L1E,L2E,NBLKS-1) */
     229             : /*         Contains the elements of the scalars (singular values) and */
     230             : /*         vectors (singular vectors) defining the rank RANK subdiagonal */
     231             : /*         blocks of the matrix. */
     232             : /*         E(1:RANK(K),RANK(K)+1,K) holds the RANK(K) scalars, */
     233             : /*         E(:,1:RANK(K),K) holds the RANK(K) column vectors, and */
     234             : /*         E(:,RANK(K)+2:2*RANK(K)+1,K) holds the row vectors for the K-th */
     235             : /*         subdiagonal block. */
     236             : 
     237             : /*  RANK   (input) INTEGER array, dimension (NBLKS-1). */
     238             : /*         The ranks of all the subdiagonal blocks contained in the array E. */
     239             : /*         RANK(K) <= MIN(KSIZES(K), KSIZES(K+1)) */
     240             : 
     241             : /*  L1E    (input) INTEGER */
     242             : /*         The leading dimension of the array E.  L1E >= max(3,2*KMAX+1), */
     243             : /*         where KMAX is as stated in L1D above. */
     244             : 
     245             : /*  L2E    (input) INTEGER */
     246             : /*         The second dimension of the array E.  L2E >= max(3,2*KMAX+1), */
     247             : /*         where KMAX is as stated in L1D above. */
     248             : 
     249             : /*  TOL    (input) DOUBLE PRECISION, TOL <= 1.0D-1 */
     250             : /*         User specified deflation tolerance for the routine DMERG2. */
     251             : /*         If (1.0D-1 >= TOL >= 20*EPS) then TOL is used as */
     252             : /*         the deflation tolerance in DSRTDF. */
     253             : /*         If (TOL < 20*EPS) then the standard deflation tolerance from */
     254             : /*         LAPACK is used as the deflation tolerance in DSRTDF. */
     255             : 
     256             : /*  EV     (output) DOUBLE PRECISION array, dimension (N) */
     257             : /*         If INFO = 0, then EV contains the eigenvalues of the */
     258             : /*         symmetric block tridiagonal matrix in ascending order. */
     259             : 
     260             : /*  Z      (input/output) DOUBLE PRECISION array, dimension (LDZ, N) */
     261             : /*         On entry, Z will be the identity matrix. */
     262             : /*         On exit, Z contains the eigenvectors of the block tridiagonal */
     263             : /*         matrix. */
     264             : 
     265             : /*  LDZ    (input) INTEGER */
     266             : /*         The leading dimension of the array Z.  LDZ >= max(1,N). */
     267             : 
     268             : /*  WORK   (workspace) DOUBLE PRECISION array, dimension (LWORK) */
     269             : 
     270             : /*  LWORK   (input) INTEGER */
     271             : /*          The dimension of the array WORK. */
     272             : /*          In order to guarantee correct results in all cases, */
     273             : /*          LWORK must be at least (2*N**2 + 3*N). In many cases, */
     274             : /*          less workspace is required. The absolute minimum required is */
     275             : /*          (N**2 + 3*N). */
     276             : /*          If the workspace provided is not sufficient, the routine will */
     277             : /*          return a corresponding error code and report how much workspace */
     278             : /*          was missing (see INFO). */
     279             : 
     280             : /*  IWORK  (workspace) INTEGER array, dimension (LIWORK) */
     281             : 
     282             : /*  LIWORK  (input) INTEGER */
     283             : /*          The dimension of the array IWORK. */
     284             : /*          LIWORK must be at least (5*N + 3 + 4*NBLKS - 4): */
     285             : /*                 5*KMAX+3 for DSYEVD, 5*N for ????, */
     286             : /*                 4*NBLKS-4 for the preprocessing (merging order) */
     287             : /*          Summarizing, the minimum integer workspace needed is */
     288             : /*          MAX(5*N, 5*KMAX + 3) + 4*NBLKS - 4 */
     289             : 
     290             : /*  INFO   (output) INTEGER */
     291             : /*          = 0:  successful exit. */
     292             : /*          < 0, > -99: illegal arguments. */
     293             : /*                if INFO = -i, the i-th argument had an illegal value. */
     294             : /*          = -99: error in the preprocessing (call of CUTLR). */
     295             : /*          < -200: not enough workspace. Space for ABS(INFO + 200) */
     296             : /*                numbers is required in addition to the workspace provided, */
     297             : /*                otherwise some eigenvectors will be incorrect. */
     298             : /*          > 0:  The algorithm failed to compute an eigenvalue while */
     299             : /*                working on the submatrix lying in rows and columns */
     300             : /*                INFO/(N+1) through mod(INFO,N+1). */
     301             : 
     302             : /*  Further Details */
     303             : /*  =============== */
     304             : 
     305             : /*  Based on code written by */
     306             : /*     Wilfried Gansterer and Bob Ward, */
     307             : /*     Department of Computer Science, University of Tennessee */
     308             : 
     309             : /*  This routine is comparable to Dlaed0.f from LAPACK. */
     310             : 
     311             : /*  ===================================================================== */
     312             : 
     313           1 :   PetscBLASInt   i, j, k, np, rp1, ksk, one=1;
     314           1 :   PetscBLASInt   cut, mat1, kchk, kbrk, blks, kmax, icut, size, ksum, lsum;
     315           1 :   PetscBLASInt   lblks, rblks, isize, lwmin, ilsum;
     316           1 :   PetscBLASInt   start, istck1, istck2, istck3, merged;
     317           1 :   PetscBLASInt   liwmin, matsiz, startp, istrtp;
     318           1 :   PetscReal      rho, done=1.0, dmone=-1.0;
     319             : 
     320           1 :   PetscFunctionBegin;
     321           1 :   *info = 0;
     322             : 
     323           1 :   if (*(unsigned char *)jobz != 'N' && *(unsigned char *)jobz != 'D') *info = -1;
     324           1 :   else if (n < 2) *info = -2;
     325           1 :   else if (nblks < 2 || nblks > n) *info = -3;
     326           1 :   if (*info == 0) {
     327             :     ksum = 0;
     328             :     kmax = 0;
     329             :     kchk = 0;
     330           9 :     for (k = 0; k < nblks; ++k) {
     331           8 :       ksk = ksizes[k];
     332           8 :       ksum += ksk;
     333           8 :       if (ksk > kmax) kmax = ksk;
     334           8 :       if (ksk < 1) kchk = 1;
     335             :     }
     336           1 :     lwmin = n*n + n * 3;
     337           1 :     liwmin = PetscMax(n * 5,kmax * 5 + 3) + 4*nblks - 4;
     338           1 :     if (ksum != n || kchk == 1) *info = -4;
     339           1 :     else if (l1d < PetscMax(3,kmax)) *info = -6;
     340           1 :     else if (l2d < PetscMax(3,kmax)) *info = -7;
     341           1 :     else if (l1e < PetscMax(3,2*kmax + 1)) *info = -10;
     342           1 :     else if (l2e < PetscMax(3,2*kmax + 1)) *info = -11;
     343           1 :     else if (tol > .1) *info = -12;
     344           1 :     else if (ldz < PetscMax(1,n)) *info = -15;
     345           1 :     else if (lwork < lwmin) *info = -17;
     346           1 :     else if (liwork < liwmin) *info = -19;
     347             :   }
     348           1 :   if (*info == 0) {
     349           8 :     for (k = 0; k < nblks-1; ++k) {
     350           7 :       if (rank[k] > PetscMin(ksizes[k],ksizes[k+1]) || rank[k] < 1) *info = -9;
     351             :     }
     352             :   }
     353             : 
     354           1 :   PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %" PetscBLASInt_FMT " in DIBTDC",-(*info));
     355             : 
     356             : /* **************************************************************************** */
     357             : 
     358             :   /* ...Preprocessing..................................................... */
     359             :   /*    Determine the optimal order for merging the subblocks and how much */
     360             :   /*    workspace will be needed for the merging (determined by the last */
     361             :   /*    merge). Cutpoints for the merging operations are determined and stored */
     362             :   /*    in reverse chronological order (starting with the final merging */
     363             :   /*    operation). */
     364             : 
     365             :   /*    integer workspace requirements for the preprocessing: */
     366             :   /*         4*(NBLKS-1) for merging history */
     367             :   /*         at most 3*(NBLKS-1) for stack */
     368             : 
     369           1 :   start = 1;
     370           1 :   size = n;
     371           1 :   blks = nblks;
     372           1 :   merged = 0;
     373           1 :   k = 0;
     374             : 
     375             :   /* integer workspace used for the stack is not needed any more after the */
     376             :   /* preprocessing and therefore can use part of the 5*N */
     377             :   /* integer workspace needed later on in the code */
     378             : 
     379           1 :   istck1 = 0;
     380           1 :   istck2 = istck1 + nblks;
     381           1 :   istck3 = istck2 + nblks;
     382             : 
     383             :   /* integer workspace used for storing the order of merges starts AFTER */
     384             :   /* the integer workspace 5*N+3 which is needed later on in the code */
     385             :   /* (5*KMAX+3 for DSYEVD, 4*N in DMERG2) */
     386             : 
     387           1 :   istrtp = n * 5 + 4;
     388           1 :   icut = istrtp + nblks - 1;
     389           1 :   isize = icut + nblks - 1;
     390           1 :   ilsum = isize + nblks - 1;
     391             : 
     392           2 : L200:
     393             : 
     394           3 :   if (nblks >= 3) {
     395             : 
     396             :     /* Determine the cut point. Note that in the routine CUTLR it is */
     397             :     /* chosen such that it yields the best balanced merging operation */
     398             :     /* among all the rank modifications with minimum rank. */
     399             : 
     400           3 :     PetscCall(cutlr_(start, size, blks, &ksizes[start-1], &rank[start-1], &cut, &lsum, &lblks, info));
     401           3 :     PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_PLIB,"dibtdc: Error in cutlr, info = %" PetscBLASInt_FMT,*info);
     402             : 
     403             :   } else {
     404           0 :     cut = 1;
     405           0 :     lsum = ksizes[0];
     406           0 :     lblks = 1;
     407             :   }
     408             : 
     409           3 :   ++merged;
     410           3 :   startp = 0;
     411           7 :   for (i = 0; i < start-1; ++i) startp += ksizes[i];
     412           3 :   iwork[istrtp + (nblks - 1) - merged-1] = startp + 1;
     413           3 :   iwork[icut + (nblks - 1) - merged-1] = cut;
     414           3 :   iwork[isize + (nblks - 1) - merged-1] = size;
     415           3 :   iwork[ilsum + (nblks - 1) - merged-1] = lsum;
     416             : 
     417           3 :   if (lblks == 2) {
     418             : 
     419             :     /* one merge in left branch, left branch done */
     420           2 :     ++merged;
     421           2 :     iwork[istrtp + (nblks - 1) - merged-1] = startp + 1;
     422           2 :     iwork[icut + (nblks - 1) - merged-1] = start;
     423           2 :     iwork[isize + (nblks - 1) - merged-1] = lsum;
     424           2 :     iwork[ilsum + (nblks - 1) - merged-1] = ksizes[start-1];
     425             :   }
     426             : 
     427           3 :   if (lblks == 1 || lblks == 2) {
     428             : 
     429             :     /* left branch done, continue on the right side */
     430           2 :     start += lblks;
     431           2 :     size -= lsum;
     432           2 :     blks -= lblks;
     433             : 
     434           2 :     PetscCheck(blks>0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"dibtdc: Error in preprocessing, blks = %" PetscBLASInt_FMT,blks);
     435             : 
     436           2 :     if (blks == 2) {
     437             : 
     438             :       /* one merge in right branch, right branch done */
     439           2 :       ++merged;
     440           2 :       startp += lsum;
     441           2 :       iwork[istrtp + (nblks - 1) - merged-1] = startp + 1;
     442           2 :       iwork[icut + (nblks - 1) - merged-1] = start;
     443           2 :       iwork[isize + (nblks - 1) - merged-1] = size;
     444           2 :       iwork[ilsum + (nblks - 1) - merged-1] = ksizes[start-1];
     445             :     }
     446             : 
     447           2 :     if (blks == 1 || blks == 2) {
     448             : 
     449             :       /* get the next subproblem from the stack or finished */
     450             : 
     451           2 :       if (k >= 1) {
     452             : 
     453             :         /* something left on the stack */
     454           1 :         start = iwork[istck1 + k-1];
     455           1 :         size = iwork[istck2 + k-1];
     456           1 :         blks = iwork[istck3 + k-1];
     457           1 :         --k;
     458           1 :         goto L200;
     459             :       } else {
     460             : 
     461             :         /* nothing left on the stack */
     462           1 :         PetscCheck(merged==nblks-1,PETSC_COMM_SELF,PETSC_ERR_PLIB,"ERROR in preprocessing - not enough merges performed");
     463             : 
     464             :         /* exit preprocessing */
     465             : 
     466             :       }
     467             :     } else {
     468             : 
     469             :       /* BLKS.GE.3, and therefore analyze the right side */
     470             : 
     471           0 :       goto L200;
     472             :     }
     473             :   } else {
     474             : 
     475             :     /* LBLKS.GE.3, and therefore check the right side and */
     476             :     /* put it on the stack if required */
     477             : 
     478           1 :     rblks = blks - lblks;
     479           1 :     if (rblks >= 3) {
     480           1 :       ++k;
     481           1 :       iwork[istck1 + k-1] = cut + 1;
     482           1 :       iwork[istck2 + k-1] = size - lsum;
     483           1 :       iwork[istck3 + k-1] = rblks;
     484           0 :     } else if (rblks == 2) {
     485             : 
     486             :       /* one merge in right branch, right branch done */
     487             :       /* (note that nothing needs to be done if RBLKS.EQ.1 !) */
     488             : 
     489           0 :       ++merged;
     490           0 :       startp += lsum;
     491           0 :       iwork[istrtp + (nblks - 1) - merged-1] = startp + 1;
     492           0 :       iwork[icut + (nblks - 1) - merged-1] = start + lblks;
     493           0 :       iwork[isize + (nblks - 1) - merged-1] = size - lsum;
     494           0 :       iwork[ilsum + (nblks - 1) - merged-1] = ksizes[start + lblks-1];
     495             :     }
     496           1 :     PetscCheck(rblks>0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"dibtdc: ERROR in preprocessing - rblks = %" PetscBLASInt_FMT,rblks);
     497             : 
     498             :     /* continue on the left side */
     499             : 
     500           1 :     size = lsum;
     501           1 :     blks = lblks;
     502           1 :     goto L200;
     503             :   }
     504             : 
     505             :   /*  SIZE = IWORK(ISIZE+NBLKS-2) */
     506             :   /*  MAT1 = IWORK(ILSUM+NBLKS-2) */
     507             : 
     508             :   /* Note: after the dimensions SIZE and MAT1 of the last merging */
     509             :   /* operation have been determined, an upper bound for the workspace */
     510             :   /* requirements which is independent of how much deflation occurs in */
     511             :   /* the last merging operation could be determined as follows */
     512             :   /* (based on (3.15) and (3.19) from UT-CS-00-447): */
     513             : 
     514             :   /*  IF(MAT1.LE.N/2) THEN */
     515             :   /*     WSPREQ = 3*N + 3/2*(SIZE-MAT1)**2 + N*N/2 + MAT1*MAT1 */
     516             :   /*  ELSE */
     517             :   /*     WSPREQ = 3*N + 3/2*MAT1*MAT1 + N*N/2 + (SIZE-MAT1)**2 */
     518             :   /*  END IF */
     519             : 
     520             :   /*  IF(LWORK-WSPREQ.LT.0)THEN */
     521             :   /*          not enough work space provided */
     522             :   /*     INFO = -200 - (WSPREQ-LWORK) */
     523             :   /*     RETURN */
     524             :   /*  END IF */
     525             :   /*  However, this is not really useful, since the actual check whether */
     526             :   /*  enough workspace is provided happens in DMERG2.f ! */
     527             : 
     528             : /* ************************************************************************* */
     529             : 
     530             :   /* ...Solve subproblems................................... */
     531             : 
     532             :   /* Divide the matrix into NBLKS submatrices using rank-r */
     533             :   /* modifications (cuts) and solve for their eigenvalues and */
     534             :   /* eigenvectors. Initialize index array to sort eigenvalues. */
     535             : 
     536             :   /* first block: ...................................... */
     537             : 
     538             :   /*    correction for block 1: D1 - V1 \Sigma1 V1^T */
     539             : 
     540           1 :   ksk = ksizes[0];
     541           1 :   rp1 = rank[0];
     542             : 
     543             :   /* initialize the proper part of Z with the diagonal block D1 */
     544             :   /* (the correction will be made in Z and then the call of DSYEVD will */
     545             :   /*  overwrite it with the eigenvectors) */
     546             : 
     547          10 :   for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) z[i+j*ldz] = d[i+j*l1d];
     548             : 
     549             :   /* copy D1 into WORK (in order to be able to restore it afterwards) */
     550             : 
     551          10 :   for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) work[i+j*ksk] = d[i+j*l1d];
     552             : 
     553             :   /* copy V1 into the first RANK(1) columns of D1 and then */
     554             :   /* multiply with \Sigma1 */
     555             : 
     556           4 :   for (i = 0; i < rank[0]; ++i) {
     557           3 :     PetscCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(rp1 + i+1)*l1e], &one, &d[i*l1d], &one));
     558           3 :     PetscCallBLAS("BLASscal",BLASscal_(&ksk, &e[i + rp1*l1e], &d[i*l1d], &one));
     559             :   }
     560             : 
     561             :   /* multiply the first RANK(1) columns of D1 with V1^T and */
     562             :   /* subtract the result from the proper part of Z (previously */
     563             :   /* initialized with D1) */
     564             : 
     565           1 :   PetscCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, rank, &dmone,
     566             :           d, &l1d, &e[(rank[0]+1)*l1e], &l1e, &done, z, &ldz));
     567             : 
     568             :   /* restore the original D1 from WORK */
     569             : 
     570          10 :   for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) d[i+j*l1d] = work[i+j*ksk];
     571             : 
     572             :   /* eigenanalysis of block 1 (using DSYEVD) */
     573             : 
     574           1 :   PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V", "L", &ksk, z, &ldz, ev, work, &lwork, info));
     575           1 :   SlepcCheckLapackInfo("syev",*info);
     576             : 
     577             :   /* EV(1:) contains the eigenvalues in ascending order */
     578             :   /* (they are returned this way by DSYEVD) */
     579             : 
     580           4 :   for (i = 0; i < ksk; ++i) iwork[i] = i+1;
     581             : 
     582             :     /* intermediate blocks: .............................. */
     583             : 
     584           1 :     np = ksk;
     585             : 
     586             :     /* remaining number of blocks */
     587             : 
     588           1 :     if (nblks > 2) {
     589           7 :       for (k = 1; k < nblks-1; ++k) {
     590             : 
     591             :       /* correction for block K: */
     592             :       /* Dk - U(k-1) \Sigma(k-1) U(k-1)^T - Vk \Sigmak Vk^T */
     593             : 
     594           6 :       ksk = ksizes[k];
     595           6 :       rp1 = rank[k];
     596             : 
     597             :       /* initialize the proper part of Z with the diagonal block Dk */
     598             :       /* (the correction will be made in Z and then the call of DSYEVD will */
     599             :       /*  overwrite it with the eigenvectors) */
     600             : 
     601          60 :       for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) z[np+i+(np+j)*ldz] = d[i+(j+k*l2d)*l1d];
     602             : 
     603             :       /* copy Dk into WORK (in order to be able to restore it afterwards) */
     604             : 
     605          60 :       for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) work[i+j*ksk] = d[i+(j+k*l2d)*l1d];
     606             : 
     607             :       /* copy U(K-1) into the first RANK(K-1) columns of Dk and then */
     608             :       /* multiply with \Sigma(K-1) */
     609             : 
     610          24 :       for (i = 0; i < rank[k-1]; ++i) {
     611          18 :         PetscCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(i+(k-1)*l2e)*l1e], &one, &d[(i+k*l2d)*l1d], &one));
     612          18 :         PetscCallBLAS("BLASscal",BLASscal_(&ksk, &e[i+(rank[k-1]+(k-1)*l2e)*l1e], &d[(i+k*l2d)*l1d], &one));
     613             :       }
     614             : 
     615             :       /* multiply the first RANK(K-1) columns of Dk with U(k-1)^T and */
     616             :       /* subtract the result from the proper part of Z (previously */
     617             :       /* initialized with Dk) */
     618             : 
     619           6 :       PetscCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, &rank[k-1],
     620             :                     &dmone, &d[k*l1d*l2d],
     621             :                     &l1d, &e[(k-1)*l1e*l2e], &l1e, &done, &z[np+np*ldz], &ldz));
     622             : 
     623             :       /* copy Vk into the first RANK(K) columns of Dk and then */
     624             :       /* multiply with \Sigmak */
     625             : 
     626          24 :       for (i = 0; i < rank[k]; ++i) {
     627          18 :         PetscCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(rp1+i+1 + k*l2e)*l1e], &one, &d[(i + k*l2d)*l1d], &one));
     628          18 :         PetscCallBLAS("BLASscal",BLASscal_(&ksk, &e[i + (rp1 + k*l2e)*l1e], &d[(i + k*l2d)*l1d], &one));
     629             :       }
     630             : 
     631             :       /* multiply the first RANK(K) columns of Dk with Vk^T and */
     632             :       /* subtract the result from the proper part of Z (previously */
     633             :       /* updated with [- U(k-1) \Sigma(k-1) U(k-1)^T]) */
     634             : 
     635           6 :       PetscCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, &rank[k],
     636             :                     &dmone, &d[k*l1d*l2d], &l1d,
     637             :                     &e[(rank[k]+1 + k*l2e)*l1e], &l1e, &done, &z[np+np*ldz], &ldz));
     638             : 
     639             :       /* restore the original Dk from WORK */
     640             : 
     641          60 :       for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) d[i+(j+k*l2d)*l1d] = work[i+j*ksk];
     642             : 
     643             :       /* eigenanalysis of block K (using dsyevd) */
     644             : 
     645           6 :       PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V", "L", &ksk, &z[np+np*ldz],
     646             :                      &ldz, &ev[np], work, &lwork, info));
     647           6 :       SlepcCheckLapackInfo("syev",*info);
     648             : 
     649             :       /* EV(NPP1:) contains the eigenvalues in ascending order */
     650             :       /* (they are returned this way by DSYEVD) */
     651             : 
     652          24 :       for (i = 0; i < ksk; ++i) iwork[np + i] = i+1;
     653             : 
     654             :       /* update NP */
     655           6 :       np += ksk;
     656             :     }
     657             :   }
     658             : 
     659             :   /* last block: ....................................... */
     660             : 
     661             :   /*    correction for block NBLKS: */
     662             :   /*    D(nblks) - U(nblks-1) \Sigma(nblks-1) U(nblks-1)^T */
     663             : 
     664           1 :   ksk = ksizes[nblks-1];
     665             : 
     666             :   /* initialize the proper part of Z with the diagonal block D(nblks) */
     667             :   /* (the correction will be made in Z and then the call of DSYEVD will */
     668             :   /* overwrite it with the eigenvectors) */
     669             : 
     670          10 :   for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) z[np+i+(np+j)*ldz] = d[i+(j+(nblks-1)*l2d)*l1d];
     671             : 
     672             :   /* copy D(nblks) into WORK (in order to be able to restore it afterwards) */
     673             : 
     674          10 :   for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) work[i+j*ksk] = d[i+(j+(nblks-1)*l2d)*l1d];
     675             : 
     676             :   /* copy U(nblks-1) into the first RANK(nblks-1) columns of D(nblks) and then */
     677             :   /* multiply with \Sigma(nblks-1) */
     678             : 
     679           4 :   for (i = 0; i < rank[nblks-2]; ++i) {
     680           3 :     PetscCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(i + (nblks-2)*l2e)*l1e],
     681             :               &one, &d[(i + (nblks-1)*l2d)*l1d], &one));
     682           3 :     PetscCallBLAS("BLASscal",BLASscal_(&ksk,
     683             :               &e[i + (rank[nblks-2] + (nblks-2)*l2e)*l1e],
     684             :               &d[(i + (nblks-1)*l2d)*l1d], &one));
     685             :   }
     686             : 
     687             :   /* multiply the first RANK(nblks-1) columns of D(nblks) with U(nblks-1)^T */
     688             :   /* and subtract the result from the proper part of Z (previously */
     689             :   /* initialized with D(nblks)) */
     690             : 
     691           1 :   PetscCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, &rank[nblks - 2],
     692             :           &dmone, &d[(nblks-1)*l1d*l2d], &l1d,
     693             :           &e[(nblks-2)*l1e*l2e], &l1e, &done, &z[np+np*ldz], &ldz));
     694             : 
     695             :   /* restore the original D(nblks) from WORK */
     696             : 
     697          10 :   for (j=0;j<ksk;j++) for (i=j;i<ksk;i++) d[i+(j+(nblks-1)*l2d)*l1d] = work[i+j*ksk];
     698             : 
     699             :   /* eigenanalysis of block NBLKS (using dsyevd) */
     700             : 
     701           1 :   PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V", "L", &ksk, &z[np+np*ldz], &ldz, &ev[np], work, &lwork, info));
     702           1 :   SlepcCheckLapackInfo("syev",*info);
     703             : 
     704             :   /* EV(NPP1:) contains the eigenvalues in ascending order */
     705             :   /* (they are returned this way by DSYEVD) */
     706             : 
     707           4 :   for (i = 0; i < ksk; ++i) iwork[np + i] = i+1;
     708             : 
     709             :   /* note that from here on the entire workspace is available again */
     710             : 
     711             :   /* Perform all the merging operations. */
     712             : 
     713           8 :   for (i = 0; i < nblks-1; ++i) {
     714             : 
     715             :     /* MATSIZ = total size of the current rank RANK modification problem */
     716             : 
     717           7 :     matsiz = iwork[isize + i - 1];
     718           7 :     np = iwork[istrtp + i - 1];
     719           7 :     kbrk = iwork[icut + i - 1];
     720           7 :     mat1 = iwork[ilsum + i - 1];
     721             : 
     722          28 :     for (j = 0; j < rank[kbrk-1]; ++j) {
     723             : 
     724             :       /* NOTE: The parameter RHO in DMERG2 is modified in DSRTDF */
     725             :       /*       (multiplied by 2) ! In order not to change the */
     726             :       /*       singular value stored in E(:, RANK(KBRK)+1, KBRK), */
     727             :       /*       we do not pass on this variable as an argument to DMERG2, */
     728             :       /*       but we assign a separate variable RHO here which is passed */
     729             :       /*       on to DMERG2. */
     730             :       /*       Alternative solution in F90: */
     731             :       /*       pass E(:,RANK(KBRK)+1,KBRK) to an INTENT(IN) parameter */
     732             :       /*       in DMERG2. */
     733             : 
     734          21 :       rho = e[j + (rank[kbrk-1] + (kbrk-1)*l2e)*l1e];
     735             : 
     736             :       /* eigenvectors are accumulated (JOBZ.EQ.'D') */
     737             : 
     738          21 :       PetscCall(BDC_dmerg2_(jobz, j+1, matsiz, &ev[np-1], &z[np-1+(np-1)*ldz],
     739             :                     ldz, &iwork[np-1], &rho, &e[(j + (kbrk-1)*l2e)*l1e],
     740             :                     ksizes[kbrk], &e[(rank[kbrk-1]+j+1 + (kbrk-1)*l2e)*l1e],
     741             :                     ksizes[kbrk-1], mat1, work, lwork, &iwork[n], tol, info, 1));
     742          21 :       PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_PLIB,"dibtdc: Error in dmerg2, info = %" PetscBLASInt_FMT,*info);
     743             :     }
     744             : 
     745             :     /* at this point all RANK(KBRK) rank-one modifications corresponding */
     746             :     /* to the current off-diagonal block are finished. */
     747             :     /* Move on to the next off-diagonal block. */
     748             : 
     749             :   }
     750             : 
     751             :   /* Re-merge the eigenvalues/vectors which were deflated at the final */
     752             :   /* merging step by sorting all eigenvalues and eigenvectors according */
     753             :   /* to the permutation stored in IWORK. */
     754             : 
     755             :   /* copy eigenvalues and eigenvectors in ordered form into WORK */
     756             :   /* (eigenvalues into WORK(1:N), eigenvectors into WORK(N+1:N+1+N^2)) */
     757             : 
     758          25 :   for (i = 0; i < n; ++i) {
     759          24 :     j = iwork[i];
     760          24 :     work[i] = ev[j-1];
     761          24 :     PetscCallBLAS("BLAScopy",BLAScopy_(&n, &z[(j-1)*ldz], &one, &work[n*(i+1)], &one));
     762             :   }
     763             : 
     764             :   /* copy ordered eigenvalues back from WORK(1:N) into EV */
     765             : 
     766           1 :   PetscCallBLAS("BLAScopy",BLAScopy_(&n, work, &one, ev, &one));
     767             : 
     768             :   /* copy ordered eigenvectors back from WORK(N+1:N+1+N^2) into Z */
     769             : 
     770         601 :   for (j=0;j<n;j++) for (i=0;i<n;i++) z[i+j*ldz] = work[i+(j+1)*n];
     771           1 :   PetscFunctionReturn(PETSC_SUCCESS);
     772             : }

Generated by: LCOV version 1.14