LCOV - code coverage report
Current view: top level - sys/classes/ds/impls/hep/bdc - dsbtdc.c (source / functions) Hit Total Coverage
Test: SLEPc Lines: 136 160 85.0 %
Date: 2024-12-18 00:42:09 Functions: 1 1 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       3             :    SLEPc - Scalable Library for Eigenvalue Problem Computations
       4             :    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
       5             : 
       6             :    This file is part of SLEPc.
       7             :    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
       8             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       9             : */
      10             : /*
      11             :    BDC - Block-divide and conquer (see description in README file)
      12             : */
      13             : 
      14             : #include <slepc/private/dsimpl.h>
      15             : #include <slepcblaslapack.h>
      16             : 
      17           2 : PetscErrorCode BDC_dsbtdc_(const char *jobz,const char *jobacc,PetscBLASInt n,
      18             :         PetscBLASInt nblks,PetscBLASInt *ksizes,PetscReal *d,PetscBLASInt l1d,
      19             :         PetscBLASInt l2d,PetscReal *e,PetscBLASInt l1e,PetscBLASInt l2e,PetscReal tol,
      20             :         PetscReal tau1,PetscReal tau2,PetscReal *ev,PetscReal *z,PetscBLASInt ldz,
      21             :         PetscReal *work,PetscBLASInt lwork,PetscBLASInt *iwork,PetscBLASInt liwork,
      22             :         PetscReal *mingap,PetscBLASInt *mingapi,PetscBLASInt *info,
      23             :         PetscBLASInt jobz_len,PetscBLASInt jobacc_len)
      24             : {
      25             : /*  -- Routine written in LAPACK Version 3.0 style -- */
      26             : /* *************************************************** */
      27             : /*     Written by */
      28             : /*     Michael Moldaschl and Wilfried Gansterer */
      29             : /*     University of Vienna */
      30             : /*     last modification: March 28, 2014 */
      31             : 
      32             : /*     Small adaptations of original code written by */
      33             : /*     Wilfried Gansterer and Bob Ward, */
      34             : /*     Department of Computer Science, University of Tennessee */
      35             : /*     see https://doi.org/10.1137/S1064827501399432 */
      36             : /* *************************************************** */
      37             : 
      38             : /*  Purpose */
      39             : /*  ======= */
      40             : 
      41             : /*  DSBTDC computes approximations to all eigenvalues and eigenvectors */
      42             : /*  of a symmetric block tridiagonal matrix using the divide and */
      43             : /*  conquer method with lower rank approximations to the subdiagonal blocks. */
      44             : 
      45             : /*  This code makes very mild assumptions about floating point */
      46             : /*  arithmetic. It will work on machines with a guard digit in */
      47             : /*  add/subtract, or on those binary machines without guard digits */
      48             : /*  which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. */
      49             : /*  It could conceivably fail on hexadecimal or decimal machines */
      50             : /*  without guard digits, but we know of none.  See DLAED3M for details. */
      51             : 
      52             : /*  Arguments */
      53             : /*  ========= */
      54             : 
      55             : /*  JOBZ    (input) CHARACTER*1 */
      56             : /*          = 'N':  Compute eigenvalues only (not implemented); */
      57             : /*          = 'D':  Compute eigenvalues and eigenvectors. Eigenvectors */
      58             : /*                  are accumulated in the divide-and-conquer process. */
      59             : 
      60             : /*  JOBACC  (input) CHARACTER*1 */
      61             : /*          = 'A' ("automatic"): The accuracy parameters TAU1 and TAU2 */
      62             : /*                               are determined automatically from the */
      63             : /*                               parameter TOL according to the analytical */
      64             : /*                               bounds. In that case the input values of */
      65             : /*                               TAU1 and TAU2 are irrelevant (ignored). */
      66             : /*          = 'M' ("manual"): The input values of the accuracy parameters */
      67             : /*                            TAU1 and TAU2 are used. In that case the input */
      68             : /*                            value of the parameter TOL is irrelevant */
      69             : /*                            (ignored). */
      70             : 
      71             : /*  N       (input) INTEGER */
      72             : /*          The dimension of the symmetric block tridiagonal matrix. */
      73             : /*          N >= 1. */
      74             : 
      75             : /*  NBLKS   (input) INTEGER, 1 <= NBLKS <= N */
      76             : /*          The number of diagonal blocks in the matrix. */
      77             : 
      78             : /*  KSIZES  (input) INTEGER array, dimension (NBLKS) */
      79             : /*          The dimensions of the square diagonal blocks from top left */
      80             : /*          to bottom right.  KSIZES(I) >= 1 for all I, and the sum of */
      81             : /*          KSIZES(I) for I = 1 to NBLKS has to be equal to N. */
      82             : 
      83             : /*  D       (input) DOUBLE PRECISION array, dimension (L1D,L2D,NBLKS) */
      84             : /*          The lower triangular elements of the symmetric diagonal */
      85             : /*          blocks of the block tridiagonal matrix. The elements of the top */
      86             : /*          left diagonal block, which is of dimension KSIZES(1), have to */
      87             : /*          be placed in D(*,*,1); the elements of the next diagonal */
      88             : /*          block, which is of dimension KSIZES(2), have to be placed in */
      89             : /*          D(*,*,2); etc. */
      90             : 
      91             : /*  L1D     (input) INTEGER */
      92             : /*          The leading dimension of the array D.  L1D >= max(3,KMAX), */
      93             : /*          where KMAX is the dimension of the largest diagonal block, */
      94             : /*          i.e.,  KMAX = max_I (KSIZES(I)). */
      95             : 
      96             : /*  L2D     (input) INTEGER */
      97             : /*          The second dimension of the array D.  L2D >= max(3,KMAX), */
      98             : /*          where KMAX is as stated in L1D above. */
      99             : 
     100             : /*  E       (input) DOUBLE PRECISION array, dimension (L1E,L2E,NBLKS-1) */
     101             : /*          The elements of the subdiagonal blocks of the */
     102             : /*          block tridiagonal matrix. The elements of the top left */
     103             : /*          subdiagonal block, which is KSIZES(2) x KSIZES(1), have to be */
     104             : /*          placed in E(*,*,1); the elements of the next subdiagonal block, */
     105             : /*          which is KSIZES(3) x KSIZES(2), have to be placed in E(*,*,2); etc. */
     106             : /*          During runtime, the original contents of E(*,*,K) is */
     107             : /*          overwritten by the singular vectors and singular values of */
     108             : /*          the lower rank representation. */
     109             : 
     110             : /*  L1E     (input) INTEGER */
     111             : /*          The leading dimension of the array E.  L1E >= max(3,2*KMAX+1), */
     112             : /*          where KMAX is as stated in L1D above. The size of L1E enables */
     113             : /*          the storage of ALL singular vectors and singular values for */
     114             : /*          the corresponding off-diagonal block in E(*,*,K) and therefore */
     115             : /*          there are no restrictions on the rank of the approximation */
     116             : /*          (only the "natural" restriction */
     117             : /*          RANK(K) .LE. MIN(KSIZES(K),KSIZES(K+1))). */
     118             : 
     119             : /*  L2E     (input) INTEGER */
     120             : /*          The second dimension of the array E.  L2E >= max(3,2*KMAX+1), */
     121             : /*          where KMAX is as stated in L1D above. The size of L2E enables */
     122             : /*          the storage of ALL singular vectors and singular values for */
     123             : /*          the corresponding off-diagonal block in E(*,*,K) and therefore */
     124             : /*          there are no restrictions on the rank of the approximation */
     125             : /*          (only the "natural" restriction */
     126             : /*          RANK(K) .LE. MIN(KSIZES(K),KSIZES(K+1))). */
     127             : 
     128             : /*  TOL     (input) DOUBLE PRECISION, TOL.LE.TOLMAX */
     129             : /*          User specified tolerance for the residuals of the computed */
     130             : /*          eigenpairs. If (JOBACC.EQ.'A') then it is used to determine */
     131             : /*          TAU1 and TAU2; ignored otherwise. */
     132             : /*          If (TOL.LT.40*EPS .AND. JOBACC.EQ.'A') then TAU1 is set to machine */
     133             : /*          epsilon and TAU2 is set to the standard deflation tolerance from */
     134             : /*          LAPACK. */
     135             : 
     136             : /*  TAU1    (input) DOUBLE PRECISION, TAU1.LE.TOLMAX/2 */
     137             : /*          User specified tolerance for determining the rank of the */
     138             : /*          lower rank approximations to the off-diagonal blocks. */
     139             : /*          The rank for each off-diagonal block is determined such that */
     140             : /*          the resulting absolute eigenvalue error is less than or equal */
     141             : /*          to TAU1. */
     142             : /*          If (JOBACC.EQ.'A') then TAU1 is determined automatically from */
     143             : /*             TOL and the input value is ignored. */
     144             : /*          If (JOBACC.EQ.'M' .AND. TAU1.LT.20*EPS) then TAU1 is set to */
     145             : /*             machine epsilon. */
     146             : 
     147             : /*  TAU2    (input) DOUBLE PRECISION, TAU2.LE.TOLMAX/2 */
     148             : /*          User specified deflation tolerance for the routine DIBTDC. */
     149             : /*          If (1.0D-1.GT.TAU2.GT.20*EPS) then TAU2 is used as */
     150             : /*          the deflation tolerance in DSRTDF (EPS is the machine epsilon). */
     151             : /*          If (TAU2.LE.20*EPS) then the standard deflation tolerance from */
     152             : /*          LAPACK is used as the deflation tolerance in DSRTDF. */
     153             : /*          If (JOBACC.EQ.'A') then TAU2 is determined automatically from */
     154             : /*             TOL and the input value is ignored. */
     155             : /*          If (JOBACC.EQ.'M' .AND. TAU2.LT.20*EPS) then TAU2 is set to */
     156             : /*             the standard deflation tolerance from LAPACK. */
     157             : 
     158             : /*  EV      (output) DOUBLE PRECISION array, dimension (N) */
     159             : /*          If INFO = 0, then EV contains the computed eigenvalues of the */
     160             : /*          symmetric block tridiagonal matrix in ascending order. */
     161             : 
     162             : /*  Z       (output) DOUBLE PRECISION array, dimension (LDZ,N) */
     163             : /*          If (JOBZ.EQ.'D' .AND. INFO = 0) */
     164             : /*          then Z contains the orthonormal eigenvectors of the symmetric */
     165             : /*          block tridiagonal matrix computed by the routine DIBTDC */
     166             : /*          (accumulated in the divide-and-conquer process). */
     167             : /*          If (-199 < INFO < -99) then Z contains the orthonormal */
     168             : /*          eigenvectors of the symmetric block tridiagonal matrix, */
     169             : /*          computed without divide-and-conquer (quick returns). */
     170             : /*          Otherwise not referenced. */
     171             : 
     172             : /*  LDZ     (input) INTEGER */
     173             : /*          The leading dimension of the array Z.  LDZ >= max(1,N). */
     174             : 
     175             : /*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK) */
     176             : 
     177             : /*  LWORK   (input) INTEGER */
     178             : /*          The dimension of the array WORK. */
     179             : /*          If NBLKS.EQ.1, then LWORK has to be at least 2N^2+6N+1 */
     180             : /*          (for the call of DSYEVD). */
     181             : /*          If NBLKS.GE.2 and (JOBZ.EQ.'D') then the absolute minimum */
     182             : /*             required for DIBTDC is (N**2 + 3*N). This will not always */
     183             : /*             suffice, though, the routine will return a corresponding */
     184             : /*             error code and report how much work space was missing (see */
     185             : /*             INFO). */
     186             : /*          In order to guarantee correct results in all cases where */
     187             : /*          NBLKS.GE.2, LWORK must be at least (2*N**2 + 3*N). */
     188             : 
     189             : /*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK) */
     190             : 
     191             : /*  LIWORK  (input) INTEGER */
     192             : /*          The dimension of the array IWORK. */
     193             : /*          LIWORK must be at least (5*N + 5*NBLKS - 1) (for DIBTDC) */
     194             : /*          Note that this should also suffice for the call of DSYEVD on a */
     195             : /*          diagonal block which requires (5*KMAX + 3). */
     196             : 
     197             : /*  MINGAP  (output) DOUBLE PRECISION */
     198             : /*          The minimum "gap" between the approximate eigenvalues */
     199             : /*          computed, i.e., MIN( ABS(EV(I+1)-EV(I)) for I=1,2,..., N-1 */
     200             : /*          IF (MINGAP.LE.TOL/10) THEN a warning flag is returned in INFO, */
     201             : /*          because the computed eigenvectors may be unreliable individually */
     202             : /*          (only the subspaces spanned are approximated reliably). */
     203             : 
     204             : /*  MINGAPI (output) INTEGER */
     205             : /*          Index I where the minimum gap in the spectrum occurred. */
     206             : 
     207             : /*  INFO    (output) INTEGER */
     208             : /*          = 0:  successful exit, no special cases occurred. */
     209             : /*          < -200: not enough workspace. Space for ABS(INFO + 200) */
     210             : /*                numbers is required in addition to the workspace provided, */
     211             : /*                otherwise some of the computed eigenvectors will be incorrect. */
     212             : /*          < -99, > -199: successful exit, but quick returns. */
     213             : /*                if INFO = -100, successful exit, but the input matrix */
     214             : /*                                was the zero matrix and no */
     215             : /*                                divide-and-conquer was performed */
     216             : /*                if INFO = -101, successful exit, but N was 1 and no */
     217             : /*                                divide-and-conquer was performed */
     218             : /*                if INFO = -102, successful exit, but only a single */
     219             : /*                                dense block. Standard dense solver */
     220             : /*                                was called, no divide-and-conquer was */
     221             : /*                                performed */
     222             : /*                if INFO = -103, successful exit, but warning that */
     223             : /*                                MINGAP.LE.TOL/10 and therefore the */
     224             : /*                                eigenvectors corresponding to close */
     225             : /*                                approximate eigenvalues may individually */
     226             : /*                                be unreliable (although taken together they */
     227             : /*                                do approximate the corresponding subspace to */
     228             : /*                                the desired accuracy) */
     229             : /*          = -99: error in the preprocessing in DIBTDC (when determining */
     230             : /*                 the merging order). */
     231             : /*          < 0, > -99: illegal arguments. */
     232             : /*                if INFO = -i, the i-th argument had an illegal value. */
     233             : /*          > 0:  The algorithm failed to compute an eigenvalue while */
     234             : /*                working on the submatrix lying in rows and columns */
     235             : /*                INFO/(N+1) through mod(INFO,N+1). */
     236             : 
     237             : /*  Further Details */
     238             : /*  =============== */
     239             : 
     240             : /*  Small modifications of code written by */
     241             : /*     Wilfried Gansterer and Bob Ward, */
     242             : /*     Department of Computer Science, University of Tennessee */
     243             : /*     see https://doi.org/10.1137/S1064827501399432 */
     244             : 
     245             : /*  Based on the design of the LAPACK code sstedc.f written by Jeff */
     246             : /*  Rutter, Computer Science Division, University of California at */
     247             : /*  Berkeley, and modified by Francoise Tisseur, University of Tennessee. */
     248             : 
     249             : /*  ===================================================================== */
     250             : 
     251             : /*     .. Parameters .. */
     252             : 
     253             : #define TOLMAX 0.1
     254             : 
     255             : /*        TOLMAX       .... upper bound for tolerances TOL, TAU1, TAU2 */
     256             : /*                          NOTE: in the routine DIBTDC, the value */
     257             : /*                                1.D-1 is hardcoded for TOLMAX ! */
     258             : 
     259           2 :   PetscBLASInt   i, j, k, i1, iwspc, lwmin, start;
     260           2 :   PetscBLASInt   ii, ip, nk, rk, np, iu, rp1, ldu;
     261           2 :   PetscBLASInt   ksk, ivt, iend, kchk=0, kmax=0, one=1, zero=0;
     262           2 :   PetscBLASInt   ldvt, ksum=0, kskp1, spneed, nrblks, liwmin, isvals;
     263           2 :   PetscReal      p, d2, eps, dmax, emax, done = 1.0;
     264           2 :   PetscReal      dnrm, tiny, anorm, exdnrm=0, dropsv, absdiff;
     265             : 
     266           2 :   PetscFunctionBegin;
     267             :   /* Determine machine epsilon. */
     268           2 :   eps = LAPACKlamch_("Epsilon");
     269             : 
     270           2 :   *info = 0;
     271             : 
     272           2 :   if (*(unsigned char *)jobz != 'N' && *(unsigned char *)jobz != 'D') *info = -1;
     273           2 :   else if (*(unsigned char *)jobacc != 'A' && *(unsigned char *)jobacc != 'M') *info = -2;
     274           2 :   else if (n < 1) *info = -3;
     275           2 :   else if (nblks < 1 || nblks > n) *info = -4;
     276           2 :   if (*info == 0) {
     277          11 :     for (k = 0; k < nblks; ++k) {
     278           9 :       ksk = ksizes[k];
     279           9 :       ksum += ksk;
     280           9 :       if (ksk > kmax) kmax = ksk;
     281           9 :       if (ksk < 1) kchk = 1;
     282             :     }
     283           2 :     if (nblks == 1) lwmin = 2*n*n + n*6 + 1;
     284           1 :     else lwmin = n*n + n*3;
     285           2 :     liwmin = n * 5 + nblks * 5 - 4;
     286           2 :     if (ksum != n || kchk == 1) *info = -5;
     287           2 :     else if (l1d < PetscMax(3,kmax)) *info = -7;
     288           2 :     else if (l2d < PetscMax(3,kmax)) *info = -8;
     289           2 :     else if (l1e < PetscMax(3,2*kmax+1)) *info = -10;
     290           2 :     else if (l2e < PetscMax(3,2*kmax+1)) *info = -11;
     291           2 :     else if (*(unsigned char *)jobacc == 'A' && tol > TOLMAX) *info = -12;
     292           2 :     else if (*(unsigned char *)jobacc == 'M' && tau1 > TOLMAX/2) *info = -13;
     293           2 :     else if (*(unsigned char *)jobacc == 'M' && tau2 > TOLMAX/2) *info = -14;
     294           2 :     else if (ldz < PetscMax(1,n)) *info = -17;
     295           2 :     else if (lwork < lwmin) *info = -19;
     296           2 :     else if (liwork < liwmin) *info = -21;
     297             :   }
     298             : 
     299           2 :   PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %" PetscBLASInt_FMT " in DSBTDC",-(*info));
     300             : 
     301             :   /* Quick return if possible */
     302             : 
     303           2 :   if (n == 1) {
     304           0 :     ev[0] = d[0]; z[0] = 1.;
     305           0 :     *info = -101;
     306           0 :     PetscFunctionReturn(PETSC_SUCCESS);
     307             :   }
     308             : 
     309             :   /* If NBLKS is equal to 1, then solve the problem with standard */
     310             :   /* dense solver (in this case KSIZES(1) = N). */
     311             : 
     312           2 :   if (nblks == 1) {
     313           4 :     for (i = 0; i < n; ++i) {
     314           9 :       for (j = 0; j <= i; ++j) {
     315           6 :         z[i + j*ldz] = d[i + j*l1d];
     316             :       }
     317             :     }
     318           1 :     PetscCallBLAS("LAPACKsyevd",LAPACKsyevd_("V", "L", &n, z, &ldz, ev, work, &lwork, iwork, &liwork, info));
     319           1 :     SlepcCheckLapackInfo("syevd",*info);
     320           1 :     *info = -102;
     321           1 :     PetscFunctionReturn(PETSC_SUCCESS);
     322             :   }
     323             : 
     324             :   /* determine the accuracy parameters (if requested) */
     325             : 
     326           1 :   if (*(unsigned char *)jobacc == 'A') {
     327           1 :     tau1 = tol / 2;
     328           1 :     if (tau1 < eps * 20) tau1 = eps;
     329             :     tau2 = tol / 2;
     330             :   }
     331             : 
     332             :   /* Initialize Z as the identity matrix */
     333             : 
     334           1 :   if (*(unsigned char *)jobz == 'D') {
     335         601 :     for (j=0;j<n;j++) for (i=0;i<n;i++) z[i+j*ldz] = 0.0;
     336          25 :     for (i=0;i<n;i++) z[i+i*ldz] = 1.0;
     337             :   }
     338             : 
     339             :   /* Determine the off-diagonal ranks, form and store the lower rank */
     340             :   /* approximations based on the tolerance parameters, the */
     341             :   /* RANK(K) largest singular values and the associated singular */
     342             :   /* vectors of each subdiagonal block. Also find the maximum norm of */
     343             :   /* the subdiagonal blocks (in EMAX). */
     344             : 
     345             :   /* Compute SVDs of the subdiagonal blocks.... */
     346             : 
     347             :   /* EMAX .... maximum norm of the off-diagonal blocks */
     348             : 
     349             :   emax = 0.;
     350           8 :   for (k = 0; k < nblks-1; ++k) {
     351           7 :     ksk = ksizes[k];
     352           7 :     kskp1 = ksizes[k+1];
     353           7 :     isvals = 0;
     354             : 
     355             :     /* Note that min(KSKP1,KSK).LE.N/2 (equality possible for */
     356             :     /* NBLKS=2), and therefore storing the singular values requires */
     357             :     /* at most N/2 entries of the *        array WORK. */
     358             : 
     359           7 :     iu = isvals + n / 2;
     360           7 :     ivt = isvals + n / 2;
     361             : 
     362             :     /* Call of DGESVD: The space for U is not referenced, since */
     363             :     /* JOBU='O' and therefore this portion of the array WORK */
     364             :     /* is not referenced for U. */
     365             : 
     366           7 :     ldu = kskp1;
     367           7 :     ldvt = PetscMin(kskp1,ksk);
     368           7 :     iwspc = ivt + n * n / 2;
     369             : 
     370             :     /* Note that the minimum workspace required for this call */
     371             :     /* of DGESVD is: N/2 for storing the singular values + N**2/2 for */
     372             :     /* storing V^T + 5*N/2 workspace =  N**2/2 + 3*N. */
     373             : 
     374           7 :     i1 = lwork - iwspc;
     375           7 :     PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O", "S", &kskp1, &ksk,
     376             :             &e[k*l1e*l2e], &l1e, &work[isvals],
     377             :             &work[iu], &ldu, &work[ivt], &ldvt, &work[iwspc], &i1, info));
     378           7 :     SlepcCheckLapackInfo("gesvd",*info);
     379             : 
     380             :     /* Note that after the return from DGESVD U is stored in */
     381             :     /* E(*,*,K), and V^{\top} is stored in WORK(IVT, IVT+1, ....) */
     382             : 
     383             :     /* determine the ranks RANK() for the approximations */
     384             : 
     385           7 :     rk = PetscMin(ksk,kskp1);
     386           7 : L8:
     387           7 :     dropsv = work[isvals - 1 + rk];
     388             : 
     389           7 :     if (dropsv * 2. <= tau1) {
     390             : 
     391             :       /* the error caused by dropping singular value RK is */
     392             :       /* small enough, try to reduce the rank by one more */
     393             : 
     394           0 :       if (--rk > 0) goto L8;
     395           0 :       else iwork[k] = 0;
     396             :     } else {
     397             : 
     398             :       /* the error caused by dropping singular value RK is */
     399             :       /* too large already, RK is the rank required to achieve the */
     400             :       /* desired accuracy */
     401             : 
     402           7 :       iwork[k] = rk;
     403             :     }
     404             : 
     405             : /* ************************************************************************** */
     406             : 
     407             :     /* Store the first RANK(K) terms of the SVD of the current */
     408             :     /* off-diagonal block. */
     409             :     /* NOTE that here it is required that L1E, L2E >= 2*KMAX+1 in order */
     410             :     /* to have enough space for storing singular vectors and values up */
     411             :     /* to the full SVD of an off-diagonal block !!!! */
     412             : 
     413             :     /* u1-u_RANK(K) is already contained in E(:,1:RANK(K),K) (as a */
     414             :     /* result of the call of DGESVD !), the sigma1-sigmaK are to be */
     415             :     /* stored in E(1:RANK(K),RANK(K)+1,K),  and v1-v_RANK(K) are to be */
     416             :     /* stored in E(:,RANK(K)+2:2*RANK(K)+1,K) */
     417             : 
     418           7 :     rp1 = iwork[k];
     419          28 :     for (j = 0; j < iwork[k]; ++j) {
     420             : 
     421             :       /* store sigma_J in E(J,RANK(K)+1,K) */
     422             : 
     423          21 :       e[j + (rp1 + k*l2e)* l1e] = work[isvals + j];
     424             : 
     425             :       /* update maximum norm of subdiagonal blocks */
     426             : 
     427          21 :       if (e[j + (rp1 + k*l2e)*l1e] > emax) {
     428           1 :         emax = e[j + (rp1 + k*l2e)*l1e];
     429             :       }
     430             : 
     431             :       /* store v_J in E(:,RANK(K)+1+J,K) */
     432             :       /* (note that WORK contains V^{\top} and therefore */
     433             :       /* we need to read rowwise !) */
     434             : 
     435          84 :       for (i = 1; i <= ksk; ++i) {
     436          63 :         e[i-1 + (rp1+j+1 + k*l2e)*l1e] = work[ivt+j + (i-1)*ldvt];
     437             :       }
     438             :     }
     439             : 
     440             :   }
     441             : 
     442             :   /* Compute the maximum norm of diagonal blocks and store the norm */
     443             :   /* of each diagonal block in E(RP1,RP1,K) (after the singular values); */
     444             :   /* store the norm of the last diagonal block in EXDNRM. */
     445             : 
     446             :   /* DMAX .... maximum one-norm of the diagonal blocks */
     447             : 
     448             :   dmax = 0.;
     449           9 :   for (k = 0; k < nblks; ++k) {
     450           8 :     rp1 = iwork[k];
     451             : 
     452             :     /* compute the one-norm of diagonal block K */
     453             : 
     454           8 :     dnrm = LAPACKlansy_("1", "L", &ksizes[k], &d[k*l1d*l2d], &l1d, work);
     455           8 :     if (k+1 == nblks) exdnrm = dnrm;
     456           7 :     else e[rp1 + (rp1 + k*l2e)*l1e] = dnrm;
     457           8 :     if (dnrm > dmax) dmax = dnrm;
     458             :   }
     459             : 
     460             :   /* Check for zero matrix. */
     461             : 
     462           1 :   if (emax == 0. && dmax == 0.) {
     463           0 :     for (i = 0; i < n; ++i) ev[i] = 0.;
     464           0 :     *info = -100;
     465           0 :     PetscFunctionReturn(PETSC_SUCCESS);
     466             :   }
     467             : 
     468             : /* **************************************************************** */
     469             : 
     470             :   /* ....Identify irreducible parts of the block tridiagonal matrix */
     471             :   /* [while (START <= NBLKS)].... */
     472             : 
     473             :   start = 0;
     474             :   np = 0;
     475           1 : L10:
     476           2 :   if (start < nblks) {
     477             : 
     478             :     /* Let IEND be the number of the next subdiagonal block such that */
     479             :     /* its RANK is 0 or IEND = NBLKS if no such subdiagonal exists. */
     480             :     /* The matrix identified by the elements between the diagonal block START */
     481             :     /* and the diagonal block IEND constitutes an independent (irreducible) */
     482             :     /* sub-problem. */
     483             : 
     484             :     iend = start;
     485             : 
     486           8 : L20:
     487           8 :     if (iend < nblks) {
     488           8 :       rk = iwork[iend];
     489             : 
     490             :       /* NOTE: if RANK(IEND).EQ.0 then decoupling happens due to */
     491             :       /*       reduced accuracy requirements ! (because in this case */
     492             :       /*       we would not merge the corresponding two diagonal blocks) */
     493             : 
     494             :       /* NOTE: seems like any combination may potentially happen: */
     495             :       /*       (i) RANK = 0 but no decoupling due to small norm of */
     496             :       /*           off-diagonal block (corresponding diagonal blocks */
     497             :       /*           also have small norm) as well as */
     498             :       /*       (ii) RANK > 0 but decoupling due to small norm of */
     499             :       /*           off-diagonal block (corresponding diagonal blocks */
     500             :       /*           have very large norm) */
     501             :       /*       case (i) is ruled out by checking for RANK = 0 above */
     502             :       /*       (we decide to decouple all the time when the rank */
     503             :       /*       of an off-diagonal block is zero, independently of */
     504             :       /*       the norms of the corresponding diagonal blocks. */
     505             : 
     506           8 :       if (rk > 0) {
     507             : 
     508             :         /* check for decoupling due to small norm of off-diagonal block */
     509             :         /* (relative to the norms of the corresponding diagonal blocks) */
     510             : 
     511           7 :         if (iend == nblks-2) {
     512           1 :           d2 = PetscSqrtReal(exdnrm);
     513             :         } else {
     514           6 :           d2 = PetscSqrtReal(e[iwork[iend+1] + (iwork[iend+1] + (iend+1)*l2e)*l1e]);
     515             :         }
     516             : 
     517             :         /* this definition of TINY is analogous to the definition */
     518             :         /* in the tridiagonal divide&conquer (dstedc) */
     519             : 
     520           7 :         tiny = eps * PetscSqrtReal(e[iwork[iend] + (iwork[iend] + iend*l2e)*l1e])*d2;
     521           7 :         if (e[(iwork[iend] + iend*l2e)*l1e] > tiny) {
     522             : 
     523             :           /* no decoupling due to small norm of off-diagonal block */
     524             : 
     525           7 :           ++iend;
     526           7 :           goto L20;
     527             :         }
     528             :       }
     529             :     }
     530             : 
     531             :     /* ....(Sub) Problem determined: between diagonal blocks */
     532             :     /*     START and IEND. Compute its size and solve it.... */
     533             : 
     534           1 :     nrblks = iend - start + 1;
     535           1 :     if (nrblks == 1) {
     536             : 
     537             :       /* Isolated problem is a single diagonal block */
     538             : 
     539           0 :       nk = ksizes[start];
     540             : 
     541             :       /* copy this isolated block into Z */
     542             : 
     543           0 :       for (i = 0; i < nk; ++i) {
     544           0 :         ip = np + i + 1;
     545           0 :         for (j = 0; j <= i; ++j) z[ip + (np+j+1)*ldz] = d[i + (j + start*l2d)*l1d];
     546             :       }
     547             : 
     548             :       /* check whether there is enough workspace */
     549             : 
     550           0 :       spneed = 2*nk*nk + nk * 6 + 1;
     551           0 :       PetscCheck(spneed<=lwork,PETSC_COMM_SELF,PETSC_ERR_MEM,"dsbtdc: not enough workspace for DSYEVD, info = %" PetscBLASInt_FMT,lwork - 200 - spneed);
     552             : 
     553           0 :       PetscCallBLAS("LAPACKsyevd",LAPACKsyevd_("V", "L", &nk,
     554             :                     &z[np + np*ldz], &ldz, &ev[np],
     555             :                     work, &lwork, &iwork[nblks-1], &liwork, info));
     556           0 :       SlepcCheckLapackInfo("syevd",*info);
     557           0 :       start = iend + 1;
     558           0 :       np += nk;
     559             : 
     560             :       /* go to the next irreducible subproblem */
     561             : 
     562           0 :       goto L10;
     563             :     }
     564             : 
     565             :     /* ....Isolated problem consists of more than one diagonal block. */
     566             :     /*     Start the divide and conquer algorithm.... */
     567             : 
     568             :     /* Scale: Divide by the maximum of all norms of diagonal blocks */
     569             :     /*        and singular values of the subdiagonal blocks */
     570             : 
     571             :     /* ....determine maximum of the norms of all diagonal and subdiagonal */
     572             :     /*     blocks.... */
     573             : 
     574           1 :     if (iend == nblks-1) anorm = exdnrm;
     575           0 :     else anorm = e[iwork[iend] + (iwork[iend] + iend*l2e)*l1e];
     576           8 :     for (k = start; k < iend; ++k) {
     577           7 :       rp1 = iwork[k];
     578             : 
     579             :       /* norm of diagonal block */
     580           7 :       anorm = PetscMax(anorm,e[rp1 + (rp1 + k*l2e)*l1e]);
     581             : 
     582             :       /* singular value of subdiagonal block */
     583          14 :       anorm = PetscMax(anorm,e[(rp1 + k*l2e)*l1e]);
     584             :     }
     585             : 
     586           1 :     nk = 0;
     587           9 :     for (k = start; k < iend+1; ++k) {
     588           8 :       ksk = ksizes[k];
     589           8 :       nk += ksk;
     590             : 
     591             :       /* scale the diagonal block */
     592           8 :       PetscCallBLAS("LAPACKlascl",LAPACKlascl_("L", &zero, &zero,
     593             :                     &anorm, &done, &ksk, &ksk, &d[k*l2d*l1d], &l1d, info));
     594           8 :       SlepcCheckLapackInfo("lascl",*info);
     595             : 
     596             :       /* scale the (approximated) off-diagonal block by dividing its */
     597             :       /* singular values */
     598             : 
     599           8 :       if (k != iend) {
     600             : 
     601             :         /* the last subdiagonal block has index IEND-1 !!!! */
     602          28 :         for (i = 0; i < iwork[k]; ++i) {
     603          21 :           e[i + (iwork[k] + k*l2e)*l1e] /= anorm;
     604             :         }
     605             :       }
     606             :     }
     607             : 
     608             :     /* call the block-tridiagonal divide-and-conquer on the */
     609             :     /* irreducible subproblem which has been identified */
     610             : 
     611           1 :     PetscCall(BDC_dibtdc_(jobz, nk, nrblks, &ksizes[start], &d[start*l1d*l2d], l1d, l2d,
     612             :                 &e[start*l2e*l1e], &iwork[start], l1e, l2e, tau2, &ev[np],
     613             :                 &z[np + np*ldz], ldz, work, lwork, &iwork[nblks-1], liwork, info, 1));
     614           1 :     PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_LIB,"dsbtdc: Error in DIBTDC, info = %" PetscBLASInt_FMT,*info);
     615             : 
     616             : /* ************************************************************************** */
     617             : 
     618             :     /* Scale back the computed eigenvalues. */
     619             : 
     620           1 :     PetscCallBLAS("LAPACKlascl",LAPACKlascl_("G", &zero, &zero, &done,
     621             :             &anorm, &nk, &one, &ev[np], &nk, info));
     622           1 :     SlepcCheckLapackInfo("lascl",*info);
     623             : 
     624           1 :     start = iend + 1;
     625           1 :     np += nk;
     626             : 
     627             :     /* Go to the next irreducible subproblem. */
     628             : 
     629           1 :     goto L10;
     630             :   }
     631             : 
     632             :   /* ....If the problem split any number of times, then the eigenvalues */
     633             :   /* will not be properly ordered. Here we permute the eigenvalues */
     634             :   /* (and the associated eigenvectors) across the irreducible parts */
     635             :   /* into ascending order.... */
     636             : 
     637             :   /*  IF(NRBLKS.LT.NBLKS)THEN */
     638             : 
     639             :   /*    Use Selection Sort to minimize swaps of eigenvectors */
     640             : 
     641          24 :   for (ii = 1; ii < n; ++ii) {
     642          23 :     i = ii;
     643          23 :     k = i;
     644          23 :     p = ev[i];
     645         299 :     for (j = ii; j < n; ++j) {
     646         276 :       if (ev[j] < p) {
     647           0 :         k = j;
     648           0 :         p = ev[j];
     649             :       }
     650             :     }
     651          23 :     if (k != i) {
     652           0 :       ev[k] = ev[i];
     653           0 :       ev[i] = p;
     654          23 :       PetscCallBLAS("BLASswap",BLASswap_(&n, &z[i*ldz], &one, &z[k*ldz], &one));
     655             :     }
     656             :   }
     657             : 
     658             :   /* ...Compute MINGAP (minimum difference between neighboring eigenvalue */
     659             :   /*    approximations).............................................. */
     660             : 
     661           1 :   *mingap = ev[1] - ev[0];
     662           1 :   PetscCheck(*mingap>=0.,PETSC_COMM_SELF,PETSC_ERR_LIB,"dsbtdc: Eigenvalue approximations are not ordered properly. Approximation 1 is larger than approximation 2.");
     663           1 :   *mingapi = 1;
     664          23 :   for (i = 2; i < n; ++i) {
     665          22 :     absdiff = ev[i] - ev[i-1];
     666          22 :     PetscCheck(absdiff>=0.,PETSC_COMM_SELF,PETSC_ERR_LIB,"dsbtdc: Eigenvalue approximations are not ordered properly. Approximation %" PetscBLASInt_FMT " is larger than approximation %" PetscBLASInt_FMT ".",i,i+1);
     667          22 :     if (absdiff < *mingap) {
     668           1 :       *mingap = absdiff;
     669           1 :       *mingapi = i;
     670             :     }
     671             :   }
     672             : 
     673             :   /* check whether the minimum gap between eigenvalue approximations */
     674             :   /* may indicate severe inaccuracies in the eigenvector approximations */
     675             : 
     676           1 :   if (*mingap <= tol / 10) *info = -103;
     677           1 :   PetscFunctionReturn(PETSC_SUCCESS);
     678             : }

Generated by: LCOV version 1.14