LCOV - code coverage report
Current view: top level - sys/classes/ds/impls/hep/bdc - dmerg2.c (source / functions) Hit Total Coverage
Test: SLEPc Lines: 53 54 98.1 %
Date: 2024-12-18 00:42:09 Functions: 1 1 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       3             :    SLEPc - Scalable Library for Eigenvalue Problem Computations
       4             :    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
       5             : 
       6             :    This file is part of SLEPc.
       7             :    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
       8             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       9             : */
      10             : /*
      11             :    BDC - Block-divide and conquer (see description in README file)
      12             : */
      13             : 
      14             : #include <slepc/private/dsimpl.h>
      15             : #include <slepcblaslapack.h>
      16             : 
      17          21 : PetscErrorCode BDC_dmerg2_(const char *jobz,PetscBLASInt rkct,PetscBLASInt n,
      18             :         PetscReal *ev,PetscReal *q,PetscBLASInt ldq,PetscBLASInt *indxq,
      19             :         PetscReal *rho,PetscReal *u,PetscBLASInt sbrkp1,PetscReal *v,
      20             :         PetscBLASInt sbrk,PetscBLASInt cutpnt,PetscReal *work,PetscBLASInt lwork,
      21             :         PetscBLASInt *iwork,PetscReal tol,PetscBLASInt *info,PetscBLASInt jobz_len)
      22             : {
      23             : /*  -- Routine written in LAPACK Version 3.0 style -- */
      24             : /* *************************************************** */
      25             : /*     Written by */
      26             : /*     Michael Moldaschl and Wilfried Gansterer */
      27             : /*     University of Vienna */
      28             : /*     last modification: March 16, 2014 */
      29             : 
      30             : /*     Small adaptations of original code written by */
      31             : /*     Wilfried Gansterer and Bob Ward, */
      32             : /*     Department of Computer Science, University of Tennessee */
      33             : /*     see https://doi.org/10.1137/S1064827501399432 */
      34             : /* *************************************************** */
      35             : 
      36             : /*  Purpose */
      37             : /*  ======= */
      38             : 
      39             : /*  DMERG2 computes the updated eigensystem of a diagonal matrix after */
      40             : /*  modification by a rank-one symmetric matrix.  The diagonal matrix */
      41             : /*  consists of two diagonal submatrices, and the vectors defining the */
      42             : /*  rank-1 matrix similarly have two underlying subvectors each. */
      43             : /*  The dimension of the first subproblem is CUTPNT, the dimension of */
      44             : /*  the second subproblem is N-CUTPNT. */
      45             : 
      46             : /*  T = Q(in) (EV(in) + RHO * Z*Z') Q'(in) = Q(out) * EV(out) * Q'(out) */
      47             : 
      48             : /*     where Z = Q'[V U']', where V is a row vector and U is a column */
      49             : /*     vector with dimensions corresponding to the two underlying */
      50             : /*     subproblems. */
      51             : 
      52             : /*     The eigenvectors of the original matrix are stored in Q, and the */
      53             : /*     eigenvalues in EV.  The algorithm consists of three stages: */
      54             : 
      55             : /*        The first stage consists of deflating the size of the problem */
      56             : /*        when there are multiple eigenvalues or if there is a zero in */
      57             : /*        the Z vector.  For each such occurrence the dimension of the */
      58             : /*        secular equation problem is reduced by one.  This stage is */
      59             : /*        performed by the routine DSRTDF. */
      60             : 
      61             : /*        The second stage consists of calculating the updated */
      62             : /*        eigenvalues. This is done by finding the roots of the secular */
      63             : /*        equation via the routine DLAED4 (as called by DLAED3M). */
      64             : /*        This routine also calculates the eigenvectors of the current */
      65             : /*        problem. */
      66             : 
      67             : /*        If(JOBZ.EQ.'D') then the final stage consists */
      68             : /*        of computing the updated eigenvectors directly using the updated */
      69             : /*        eigenvalues. The eigenvectors for the current problem are multiplied */
      70             : /*        with the eigenvectors from the overall problem. */
      71             : 
      72             : /*  Arguments */
      73             : /*  ========= */
      74             : 
      75             : /*  JOBZ   (input) CHARACTER*1 */
      76             : /*          = 'N': Compute eigenvalues only (not implemented); */
      77             : /*          = 'D': Compute eigenvalues and eigenvectors. */
      78             : /*                 Eigenvectors are accumulated in the divide-and-conquer */
      79             : /*                 process. */
      80             : 
      81             : /*  RKCT   (input) INTEGER */
      82             : /*         The number of the rank modification which is accounted for */
      83             : /*         (RKCT >= 1). Required parameter, because the update operation of the */
      84             : /*         modification vector can be performed much more efficiently */
      85             : /*         if RKCT.EQ.1. In that case, the eigenvector matrix is still */
      86             : /*         block-diagonal. For RKCT.GE.2 the eigenvector matrix for the update */
      87             : /*         operation has filled up and is a full matrix. */
      88             : 
      89             : /*  N      (input) INTEGER */
      90             : /*         The dimension of the symmetric block tridiagonal matrix. */
      91             : /*         N >= 0. */
      92             : 
      93             : /*  EV     (input/output) DOUBLE PRECISION array, dimension (N) */
      94             : /*         On entry, the eigenvalues (=diagonal values) of the */
      95             : /*         rank-1-perturbed matrix. */
      96             : /*         On exit, the eigenvalues of the repaired matrix. */
      97             : 
      98             : /*  Q      (input/output) DOUBLE PRECISION array, dimension (LDQ,N) */
      99             : /*         On entry, the eigenvectors of the rank-1-perturbed matrix. */
     100             : /*         On exit, the eigenvectors of the repaired tridiagonal matrix. */
     101             : 
     102             : /*  LDQ    (input) INTEGER */
     103             : /*         The leading dimension of the array Q.  LDQ >= max(1,N). */
     104             : 
     105             : /*  INDXQ  (input/output) INTEGER array, dimension (N) */
     106             : /*         On entry, the permutation which separately sorts the two */
     107             : /*         subproblems in EV into ascending order. */
     108             : /*         On exit, the permutation which will reintegrate the */
     109             : /*         subproblems back into sorted order, */
     110             : /*         i.e. EV(INDXQ(I = 1, N)) will be in ascending order. */
     111             : 
     112             : /*  RHO    (input/output) DOUBLE PRECISION */
     113             : /*         The scalar in the rank-1 perturbation. Modified (multiplied */
     114             : /*         by 2) in DSRTDF. */
     115             : 
     116             : /*  U      (input) DOUBLE PRECISION array; dimension (SBRKP1), where SBRKP1 */
     117             : /*         is the size of the first (original) block after CUTPNT. */
     118             : /*         The column vector of the rank-1 subdiagonal connecting the */
     119             : /*         two diagonal subproblems. */
     120             : /*         Theoretically, zero entries might have to be appended after U */
     121             : /*         in order to make it have dimension (N-CUTPNT). However, this */
     122             : /*         is not required because it can be accounted for in the */
     123             : /*         matrix-vector product using the argument SBRKP1. */
     124             : 
     125             : /*  SBRKP1 (input) INTEGER */
     126             : /*         Dimension of the relevant (non-zero) part of the vector U. */
     127             : /*         Equal to the size of the first original block after the */
     128             : /*         breakpoint. */
     129             : 
     130             : /*  V      (input) DOUBLE PRECISION array; dimension (SBRK), where SBRK */
     131             : /*         is the size of the last (original) block before CUTPNT. */
     132             : /*         The row vector of the rank-1 subdiagonal connecting the two */
     133             : /*         diagonal subproblems. */
     134             : /*         Theoretically, zero entries might have to be inserted in front */
     135             : /*         of V in order to make it have dimension (CUTPNT). However, this */
     136             : /*         is not required because it can be accounted for in the */
     137             : /*         matrix-vector product using the argument SBRK. */
     138             : 
     139             : /*  SBRK   (input) INTEGER */
     140             : /*         Dimension of the relevant (non-zero) part of the vector V. */
     141             : /*         Equal to the size of the last original block before the */
     142             : /*         breakpoint. */
     143             : 
     144             : /*  CUTPNT (input) INTEGER */
     145             : /*         The location of the last eigenvalue of the leading diagonal */
     146             : /*         block.  min(1,N) <= CUTPNT <= max(1,N). */
     147             : 
     148             : /*  WORK   (workspace) DOUBLE PRECISION array, dimension (LWORK) */
     149             : 
     150             : /*  LWORK  (input) INTEGER */
     151             : /*         The dimension of the array WORK. */
     152             : /*         In order to guarantee correct results in all cases, */
     153             : /*         LWORK must be at least (2*N**2 + 3*N). In many cases, */
     154             : /*         less workspace is required. The absolute minimum required is */
     155             : /*         (N**2 + 3*N). */
     156             : /*         If the workspace provided is not sufficient, the routine will */
     157             : /*         return a corresponding error code and report how much workspace */
     158             : /*         was missing (see INFO). */
     159             : /*         NOTE: This parameter is needed for determining whether enough */
     160             : /*               workspace is provided, and, if not, for computing how */
     161             : /*               much workspace is needed. */
     162             : 
     163             : /*  IWORK  (workspace) INTEGER array, dimension (4*N) */
     164             : 
     165             : /*  TOL    (input) DOUBLE PRECISION */
     166             : /*         User specified deflation tolerance for the routine DSRTDF. */
     167             : 
     168             : /*  INFO   (output) INTEGER */
     169             : /*          = 0:  successful exit. */
     170             : /*          < -200: not enough workspace */
     171             : /*                ABS(INFO + 200) numbers have to be stored in addition */
     172             : /*                to the workspace provided, otherwise some eigenvectors */
     173             : /*                will be incorrect. */
     174             : /*          < 0, > -99:  if INFO.EQ.-i, the i-th argument had an */
     175             : /*                       illegal value. */
     176             : /*          > 0:  if INFO.EQ.1, an eigenvalue did not converge */
     177             : /*                if INFO.EQ.2, the deflation counters DZ and DE do not sum */
     178             : /*                              up to the total number N-K of components */
     179             : /*                              deflated */
     180             : 
     181             : /*  Further Details */
     182             : /*  =============== */
     183             : 
     184             : /*  Based on code written by */
     185             : /*     Wilfried Gansterer and Bob Ward, */
     186             : /*     Department of Computer Science, University of Tennessee */
     187             : 
     188             : /*  Based on the design of the LAPACK code Dlaed1.f written by Jeff */
     189             : /*  Rutter, Computer Science Division, University of California at */
     190             : /*  Berkeley, and modified by Francoise Tisseur, University of Tennessee. */
     191             : 
     192             : /*  ===================================================================== */
     193             : 
     194          21 :   PetscBLASInt   i, k, n1, n2, de, is, dz, iw, iz, iq2, nmc, cpp1;
     195          21 :   PetscBLASInt   indx, indxc, indxp, lwmin, idlmda;
     196          21 :   PetscBLASInt   spneed, coltyp, tmpcut, i__1, i__2, one=1, mone=-1;
     197          21 :   char           defl[1];
     198          21 :   PetscReal      done = 1.0, dzero = 0.0;
     199             : 
     200          21 :   PetscFunctionBegin;
     201          21 :   *info = 0;
     202          21 :   lwmin = n*n + n * 3;
     203             : 
     204          21 :   if (n < 0) *info = -3;
     205          21 :   else if (ldq < PetscMax(1,n)) *info = -6;
     206          21 :   else if (cutpnt < PetscMin(1,n) || cutpnt > PetscMax(1,n)) *info = -13;
     207          21 :   else if (lwork < lwmin) *info = -15;
     208          21 :   PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %" PetscBLASInt_FMT " in DMERG2",-(*info));
     209             : 
     210             : /* **************************************************************************** */
     211             : 
     212             :   /* Quick return if possible */
     213             : 
     214          21 :   if (n == 0) PetscFunctionReturn(PETSC_SUCCESS);
     215             : 
     216             : /* **************************************************************************** */
     217             : 
     218             :   /* The following values are integer pointers which indicate */
     219             :   /* the portion of the workspace used by a particular array in DSRTDF */
     220             :   /* and DLAED3M. */
     221             : 
     222          21 :   iz = 0;
     223          21 :   idlmda = iz + n;
     224          21 :   iw = idlmda + n;
     225          21 :   iq2 = iw + n;
     226          21 :   is = iq2 + n * n;
     227             : 
     228             :   /* After the pointer IS the matrix S is stored and read in WORK */
     229             :   /* in the routine DLAED3M. */
     230             : 
     231          21 :   indx = 0;
     232          21 :   indxc = indx + n;
     233          21 :   coltyp = indxc + n;
     234          21 :   indxp = coltyp + n;
     235             : 
     236             :   /* If eigenvectors are to be accumulated in the divide-and-conquer */
     237             :   /* process (JOBZ.EQ.'D') form the z-vector which consists of */
     238             :   /* Q_1^T * V and Q_2^T * U. */
     239             : 
     240          21 :   cpp1 = cutpnt + 1;
     241          21 :   if (rkct == 1) {
     242             : 
     243             :     /* for the first rank modification the eigenvector matrix has */
     244             :     /* special block-diagonal structure and therefore Q_1^T * V and */
     245             :     /* Q_2^T * U can be formed separately. */
     246             : 
     247           7 :     PetscCallBLAS("BLASgemv",BLASgemv_("T", &sbrk, &cutpnt, &done,
     248             :               &q[cutpnt - sbrk], &ldq, v, &one, &dzero, &work[iz], &one));
     249           7 :     nmc = n - cutpnt;
     250           7 :     PetscCallBLAS("BLASgemv",BLASgemv_("T", &sbrkp1, &nmc, &done,
     251             :               &q[cpp1-1 + (cpp1-1)*ldq], &ldq, u,
     252             :               &one, &dzero, &work[iz + cutpnt], &one));
     253             : 
     254             :   } else {
     255             : 
     256             :     /* for the higher rank modifications, the vectors V and U */
     257             :     /* have to be multiplied with the full eigenvector matrix */
     258             : 
     259          14 :     PetscCallBLAS("BLASgemv",BLASgemv_("T", &sbrk, &n, &done,
     260             :               &q[cutpnt - sbrk], &ldq, v, &one, &dzero, &work[iz], &one));
     261          14 :     PetscCallBLAS("BLASgemv",BLASgemv_("T", &sbrkp1, &n, &done, &q[cpp1-1],
     262             :               &ldq, u, &one, &done, &work[iz], &one));
     263             : 
     264             :   }
     265             : 
     266             : /* **************************************************************************** */
     267             : 
     268             :   /* Deflate eigenvalues. */
     269             : 
     270          21 :   if (rkct == 1) {
     271             : 
     272             :     /* for the first rank modification we need the actual cutpoint */
     273             : 
     274           7 :     n1 = cutpnt;
     275           7 :     tmpcut = cutpnt;
     276             :   } else {
     277             : 
     278             :     /* for the later rank modifications there is no actual cutpoint any more */
     279             : 
     280          14 :     n1 = n;
     281             : 
     282             :     /* The original value of CUTPNT has to be preserved for the next time */
     283             :     /* this subroutine is called (therefore, CUTPNT is an INPUT parameter */
     284             :     /* and not to be changed). Thus, assign N to TMPCUT and use the local */
     285             :     /* variable TMPCUT from now on for the cut point. */
     286             : 
     287          14 :     tmpcut = n;
     288             :   }
     289             : 
     290             :   /* initialize the flag DEFL (indicates whether deflation occurred - */
     291             :   /* this information is needed later in DLAED3M) */
     292             : 
     293          21 :   *(unsigned char *)defl = '0';
     294             : 
     295             :   /* call DSRTDF for deflation */
     296             : 
     297          21 :   PetscCall(BDC_dsrtdf_(&k, n, n1, ev, q, ldq, indxq, rho, &work[iz],
     298             :           &work[idlmda], &work[iw], &work[iq2], &iwork[indx],
     299             :           &iwork[indxc], &iwork[indxp], &iwork[coltyp], tol, &dz, &de, info));
     300          21 :   PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_LIB,"dmerg2: error in dsrtdf, info = %" PetscBLASInt_FMT,*info);
     301             : 
     302          21 :   if (k < n) {
     303             : 
     304             :    /* ....some deflation occurred in dsrtdf, set the flag DEFL */
     305             :    /*     (needed in DLAED3M.f, since Givens rotations need to be */
     306             :    /*     applied to the eigenvector matrix only if some deflation */
     307             :    /*     happened) */
     308             : 
     309           9 :     *(unsigned char *)defl = '1';
     310             :   }
     311             : 
     312             : /* **************************************************************************** */
     313             : 
     314             :   /* Solve the Secular Equation. */
     315             : 
     316          21 :   if (k != 0) {
     317             : 
     318             :     /* ....not everything was deflated. */
     319             : 
     320             :     /* ....check whether enough workspace is available: */
     321             : 
     322             :     /* Note that the following (upper) bound SPNEED for the workspace */
     323             :     /* requirements should also hold in the extreme case TMPCUT=N, */
     324             :     /* which happens for every rank modification after the first one. */
     325             : 
     326          21 :     i__1 = (iwork[coltyp] + iwork[coltyp+1]) * k;
     327          21 :     i__2 = (iwork[coltyp+1] + iwork[coltyp + 2]) * k;
     328          21 :     spneed = is + PetscMax(i__1,i__2) - 1;
     329             : 
     330          21 :     PetscCheck(spneed<=lwork,PETSC_COMM_SELF,PETSC_ERR_MEM,"dmerg2: Workspace needed exceeds the workspace provided by %" PetscBLASInt_FMT " numbers",spneed-lwork);
     331             : 
     332             :     /* calling DLAED3M for solving the secular equation. */
     333             : 
     334          21 :     PetscCall(BDC_dlaed3m_(jobz, defl, k, n, tmpcut, ev, q, ldq,
     335             :                 *rho, &work[idlmda], &work[iq2], &iwork[indxc], &iwork[coltyp],
     336             :                 &work[iw], &work[is], info, 1, 1));
     337          21 :     PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_LIB,"dmerg2: error in dlaed3m, info = %" PetscBLASInt_FMT,*info);
     338             : 
     339             :     /* Prepare the INDXQ sorting permutation. */
     340             : 
     341          21 :     n1 = k;
     342          21 :     n2 = n - k;
     343          21 :     PetscCallBLAS("LAPACKlamrg",LAPACKlamrg_(&n1, &n2, ev, &one, &mone, indxq));
     344          21 :     if (k == 0) for (i = 0; i < n; ++i) indxq[i] = i+1;
     345             : 
     346             :   } else {
     347             : 
     348             :     /* ....everything was deflated (K.EQ.0) */
     349             : 
     350           0 :     for (i = 0; i < n; ++i) indxq[i] = i+1;
     351             :   }
     352          21 :   PetscFunctionReturn(PETSC_SUCCESS);
     353             : }

Generated by: LCOV version 1.14