LCOV - code coverage report
Current view: top level - sys/classes/ds/impls/hep/bdc - dlaed3m.c (source / functions) Hit Total Coverage
Test: SLEPc Lines: 63 64 98.4 %
Date: 2024-12-18 00:42:09 Functions: 1 1 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       3             :    SLEPc - Scalable Library for Eigenvalue Problem Computations
       4             :    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
       5             : 
       6             :    This file is part of SLEPc.
       7             :    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
       8             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       9             : */
      10             : /*
      11             :    BDC - Block-divide and conquer (see description in README file)
      12             : */
      13             : 
      14             : #include <slepc/private/dsimpl.h>
      15             : #include <slepcblaslapack.h>
      16             : 
      17          21 : PetscErrorCode BDC_dlaed3m_(const char *jobz,const char *defl,PetscBLASInt k,PetscBLASInt n,
      18             :         PetscBLASInt n1,PetscReal *d,PetscReal *q,PetscBLASInt ldq,
      19             :         PetscReal rho,PetscReal *dlamda,PetscReal *q2,PetscBLASInt *indx,
      20             :         PetscBLASInt *ctot,PetscReal *w,PetscReal *s,PetscBLASInt *info,
      21             :         PetscBLASInt jobz_len,PetscBLASInt defl_len)
      22             : {
      23             : /*  -- Routine written in LAPACK version 3.0 style -- */
      24             : /* *************************************************** */
      25             : /*     Written by */
      26             : /*     Michael Moldaschl and Wilfried Gansterer */
      27             : /*     University of Vienna */
      28             : /*     last modification: March 16, 2014 */
      29             : 
      30             : /*     Small adaptations of original code written by */
      31             : /*     Wilfried Gansterer and Bob Ward, */
      32             : /*     Department of Computer Science, University of Tennessee */
      33             : /*     see https://doi.org/10.1137/S1064827501399432 */
      34             : /* *************************************************** */
      35             : 
      36             : /*  Purpose */
      37             : /*  ======= */
      38             : 
      39             : /*  DLAED3M finds the roots of the secular equation, as defined by the */
      40             : /*  values in D, W, and RHO, between 1 and K.  It makes the */
      41             : /*  appropriate calls to DLAED4 and then updates the eigenvectors by */
      42             : /*  multiplying the matrix of eigenvectors of the pair of eigensystems */
      43             : /*  being combined by the matrix of eigenvectors of the K-by-K system */
      44             : /*  which is solved here. */
      45             : 
      46             : /*  This code makes very mild assumptions about floating point */
      47             : /*  arithmetic. It will work on machines with a guard digit in */
      48             : /*  add/subtract, or on those binary machines without guard digits */
      49             : /*  which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. */
      50             : /*  It could conceivably fail on hexadecimal or decimal machines */
      51             : /*  without guard digits, but we know of none. */
      52             : 
      53             : /*  Arguments */
      54             : /*  ========= */
      55             : 
      56             : /*  JOBZ    (input) CHARACTER*1 */
      57             : /*          = 'N':  Do not accumulate eigenvectors (not implemented); */
      58             : /*          = 'D':  Do accumulate eigenvectors in the divide-and-conquer */
      59             : /*                  process. */
      60             : 
      61             : /*  DEFL    (input) CHARACTER*1 */
      62             : /*          = '0':  No deflation happened in DSRTDF */
      63             : /*          = '1':  Some deflation happened in DSRTDF (and therefore some */
      64             : /*                  Givens rotations need to be applied to the computed */
      65             : /*                  eigenvector matrix Q) */
      66             : 
      67             : /*  K       (input) INTEGER */
      68             : /*          The number of terms in the rational function to be solved by */
      69             : /*          DLAED4. 0 <= K <= N. */
      70             : 
      71             : /*  N       (input) INTEGER */
      72             : /*          The number of rows and columns in the Q matrix. */
      73             : /*          N >= K (deflation may result in N>K). */
      74             : 
      75             : /*  N1      (input) INTEGER */
      76             : /*          The location of the last eigenvalue in the leading submatrix. */
      77             : /*          min(1,N) <= N1 <= max(1,N-1). */
      78             : 
      79             : /*  D       (output) DOUBLE PRECISION array, dimension (N) */
      80             : /*          D(I) contains the updated eigenvalues for */
      81             : /*          1 <= I <= K. */
      82             : 
      83             : /*  Q       (output) DOUBLE PRECISION array, dimension (LDQ,N) */
      84             : /*          Initially the first K columns are used as workspace. */
      85             : /*          On output the columns 1 to K contain */
      86             : /*          the updated eigenvectors. */
      87             : 
      88             : /*  LDQ     (input) INTEGER */
      89             : /*          The leading dimension of the array Q.  LDQ >= max(1,N). */
      90             : 
      91             : /*  RHO     (input) DOUBLE PRECISION */
      92             : /*          The value of the parameter in the rank one update equation. */
      93             : /*          RHO >= 0 required. */
      94             : 
      95             : /*  DLAMDA  (input/output) DOUBLE PRECISION array, dimension (K) */
      96             : /*          The first K elements of this array contain the old roots */
      97             : /*          of the deflated updating problem.  These are the poles */
      98             : /*          of the secular equation. May be changed on output by */
      99             : /*          having lowest order bit set to zero on Cray X-MP, Cray Y-MP, */
     100             : /*          Cray-2, or Cray C-90, as described above. */
     101             : 
     102             : /*  Q2      (input) DOUBLE PRECISION array, dimension (LDQ2, N) */
     103             : /*          The first K columns of this matrix contain the non-deflated */
     104             : /*          eigenvectors for the split problem. */
     105             : 
     106             : /*  INDX    (input) INTEGER array, dimension (N) */
     107             : /*          The permutation used to arrange the columns of the deflated */
     108             : /*          Q matrix into three groups (see DLAED2). */
     109             : /*          The rows of the eigenvectors found by DLAED4 must be likewise */
     110             : /*          permuted before the matrix multiply can take place. */
     111             : 
     112             : /*  CTOT    (input) INTEGER array, dimension (4) */
     113             : /*          A count of the total number of the various types of columns */
     114             : /*          in Q, as described in INDX.  The fourth column type is any */
     115             : /*          column which has been deflated. */
     116             : 
     117             : /*  W       (input/output) DOUBLE PRECISION array, dimension (K) */
     118             : /*          The first K elements of this array contain the components */
     119             : /*          of the deflation-adjusted updating vector. Destroyed on */
     120             : /*          output. */
     121             : 
     122             : /*  S       (workspace) DOUBLE PRECISION array, dimension */
     123             : /*          (MAX(CTOT(1)+CTOT(2),CTOT(2)+CTOT(3)) + 1)*K */
     124             : /*          Will contain parts of the eigenvectors of the repaired matrix */
     125             : /*          which will be multiplied by the previously accumulated */
     126             : /*          eigenvectors to update the system. This array is a major */
     127             : /*          source of workspace requirements ! */
     128             : 
     129             : /*  INFO    (output) INTEGER */
     130             : /*          = 0:  successful exit. */
     131             : /*          < 0:  if INFO = -i, the i-th argument had an illegal value. */
     132             : /*          > 0:  if INFO = i, eigenpair i was not computed successfully */
     133             : 
     134             : /*  Further Details */
     135             : /*  =============== */
     136             : 
     137             : /*  Based on code written by */
     138             : /*     Wilfried Gansterer and Bob Ward, */
     139             : /*     Department of Computer Science, University of Tennessee */
     140             : /*  Based on the design of the LAPACK code DLAED3 with small modifications */
     141             : /*  (Note that in contrast to the original DLAED3, this routine */
     142             : /*  DOES NOT require that N1 <= N/2) */
     143             : 
     144             : /*  Based on contributions by */
     145             : /*     Jeff Rutter, Computer Science Division, University of California */
     146             : /*     at Berkeley, USA */
     147             : /*  Modified by Francoise Tisseur, University of Tennessee. */
     148             : 
     149             : /*  ===================================================================== */
     150             : 
     151          21 :   PetscReal    temp, done = 1.0, dzero = 0.0;
     152          21 :   PetscBLASInt i, j, n2, n12, ii, n23, iq2, i1, one=1;
     153             : 
     154          21 :   PetscFunctionBegin;
     155          21 :   *info = 0;
     156             : 
     157          21 :   if (k < 0) *info = -3;
     158          21 :   else if (n < k) *info = -4;
     159          21 :   else if (n1 < PetscMin(1,n) || n1 > PetscMax(1,n)) *info = -5;
     160          21 :   else if (ldq < PetscMax(1,n)) *info = -8;
     161          21 :   else if (rho < 0.) *info = -9;
     162          21 :   PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %" PetscBLASInt_FMT " in DLAED3M",-(*info));
     163             : 
     164             :   /* Quick return if possible */
     165             : 
     166          21 :   if (k == 0) PetscFunctionReturn(PETSC_SUCCESS);
     167             : 
     168             :   /* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can */
     169             :   /* be computed with high relative accuracy (barring over/underflow). */
     170             :   /* This is a problem on machines without a guard digit in */
     171             :   /* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */
     172             :   /* The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), */
     173             :   /* which on any of these machines zeros out the bottommost */
     174             :   /* bit of DLAMDA(I) if it is 1; this makes the subsequent */
     175             :   /* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation */
     176             :   /* occurs. On binary machines with a guard digit (almost all */
     177             :   /* machines) it does not change DLAMDA(I) at all. On hexadecimal */
     178             :   /* and decimal machines with a guard digit, it slightly */
     179             :   /* changes the bottommost bits of DLAMDA(I). It does not account */
     180             :   /* for hexadecimal or decimal machines without guard digits */
     181             :   /* (we know of none). We use a subroutine call to compute */
     182             :   /* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating */
     183             :   /* this code. */
     184             : 
     185         181 :   for (i = 0; i < k; ++i) {
     186         160 :     dlamda[i] = LAPACKlamc3_(&dlamda[i], &dlamda[i]) - dlamda[i];
     187             :   }
     188             : 
     189         181 :   for (j = 1; j <= k; ++j) {
     190             : 
     191             :     /* ....calling DLAED4 for eigenpair J.... */
     192             : 
     193         160 :     PetscCallBLAS("LAPACKlaed4",LAPACKlaed4_(&k, &j, dlamda, w, &q[(j-1)*ldq], &rho, &d[j-1], info));
     194         160 :     SlepcCheckLapackInfo("laed4",*info);
     195             : 
     196         160 :     if (j < k) {
     197             : 
     198             :       /* If the zero finder terminated properly, but the computed */
     199             :       /* eigenvalues are not ordered, issue an error statement */
     200             :       /* but continue computation. */
     201             : 
     202         139 :       PetscCheck(dlamda[j-1]<dlamda[j],PETSC_COMM_SELF,PETSC_ERR_FP,"DLAMDA(%" PetscBLASInt_FMT ") is greater or equal than DLAMDA(%" PetscBLASInt_FMT ")", j, j+1);
     203         160 :       PetscCheck(d[j-1]>=dlamda[j-1] && d[j-1]<=dlamda[j],PETSC_COMM_SELF,PETSC_ERR_FP,"DLAMDA(%" PetscBLASInt_FMT ") = %g D(%" PetscBLASInt_FMT ") = %g DLAMDA(%" PetscBLASInt_FMT ") = %g", j, (double)dlamda[j-1], j, (double)d[j-1], j+1, (double)dlamda[j]);
     204             :     }
     205             :   }
     206             : 
     207          21 :   if (k == 1) goto L110;
     208             : 
     209          21 :   if (k == 2) {
     210             : 
     211             :     /* permute the components of Q(:,J) (the information returned by DLAED4 */
     212             :     /* necessary to construct the eigenvectors) according to the permutation */
     213             :     /* stored in INDX, resulting from deflation */
     214             : 
     215           6 :     for (j = 0; j < k; ++j) {
     216           4 :       w[0] = q[0+j*ldq];
     217           4 :       w[1] = q[1+j*ldq];
     218           4 :       ii = indx[0];
     219           4 :       q[0+j*ldq] = w[ii-1];
     220           4 :       ii = indx[1];
     221           4 :       q[1+j*ldq] = w[ii-1];
     222             :     }
     223           2 :     goto L110;
     224             :   }
     225             : 
     226             :   /* ....K.GE.3.... */
     227             :   /* Compute updated W (used for computing the eigenvectors corresponding */
     228             :   /* to the previously computed eigenvalues). */
     229             : 
     230          19 :   PetscCallBLAS("BLAScopy",BLAScopy_(&k, w, &one, s, &one));
     231             : 
     232             :   /* Initialize W(I) = Q(I,I) */
     233             : 
     234          19 :   i1 = ldq + 1;
     235          19 :   PetscCallBLAS("BLAScopy",BLAScopy_(&k, q, &i1, w, &one));
     236         175 :   for (j = 0; j < k; ++j) {
     237         852 :     for (i = 0; i < j; ++i) {
     238         696 :       w[i] *= q[i+j*ldq] / (dlamda[i] - dlamda[j]);
     239             :     }
     240         852 :     for (i = j + 1; i < k; ++i) {
     241         696 :       w[i] *= q[i+j*ldq] / (dlamda[i] - dlamda[j]);
     242             :     }
     243             :   }
     244         175 :   for (i = 0; i < k; ++i) {
     245         156 :     temp = PetscSqrtReal(-w[i]);
     246         156 :     if (temp<0) temp = -temp;
     247         156 :     w[i] =  (s[i] >= 0) ? temp : -temp;
     248             :   }
     249             : 
     250             :   /* Compute eigenvectors of the modified rank-1 modification (using the */
     251             :   /* vector W). */
     252             : 
     253         175 :   for (j = 0; j < k; ++j) {
     254        1704 :     for (i = 0; i < k; ++i) {
     255        1548 :       s[i] = w[i] / q[i+j*ldq];
     256             :     }
     257         156 :     temp = BLASnrm2_(&k, s, &one);
     258        1860 :     for (i = 0; i < k; ++i) {
     259             : 
     260             :       /* apply the permutation resulting from deflation as stored */
     261             :       /* in INDX */
     262             : 
     263        1548 :       ii = indx[i];
     264        1548 :       q[i+j*ldq] = s[ii-1] / temp;
     265             :     }
     266             :   }
     267             : 
     268             : /* ************************************************************************** */
     269             : 
     270             :   /* ....updating the eigenvectors.... */
     271             : 
     272          19 : L110:
     273             : 
     274          21 :   n2 = n - n1;
     275          21 :   n12 = ctot[0] + ctot[1];
     276          21 :   n23 = ctot[1] + ctot[2];
     277          21 :   if (*(unsigned char *)jobz == 'D') {
     278             : 
     279             :     /* Compute the updated eigenvectors. (NOTE that every call of */
     280             :     /* DGEMM requires three DISTINCT arrays) */
     281             : 
     282             :     /* copy Q(CTOT(1)+1:K,1:K) to S */
     283             : 
     284         523 :     for (j=0;j<k;j++) for (i=0;i<n23;i++) s[i+j*n23] = q[ctot[0]+i+j*ldq];
     285          21 :     iq2 = n1 * n12 + 1;
     286             : 
     287          21 :     if (n23 != 0) {
     288             : 
     289             :       /* multiply the second part of Q2 (the eigenvectors of the */
     290             :       /* lower block) with S and write the result into the lower part of */
     291             :       /* Q, i.e., Q(N1+1:N,1:K) */
     292             : 
     293           7 :       PetscCallBLAS("BLASgemm",BLASgemm_("N", "N", &n2, &k, &n23, &done,
     294             :                   &q2[iq2-1], &n2, s, &n23, &dzero, &q[n1], &ldq));
     295             :     } else {
     296         120 :       for (j=0;j<k;j++) for (i=0;i<n2;i++) q[n1+i+j*ldq] = 0.0;
     297             :     }
     298             : 
     299             :     /* copy Q(1:CTOT(1)+CTOT(2),1:K) to S */
     300             : 
     301        1557 :     for (j=0;j<k;j++) for (i=0;i<n12;i++) s[i+j*n12] = q[i+j*ldq];
     302             : 
     303          21 :     if (n12 != 0) {
     304             : 
     305             :       /* multiply the first part of Q2 (the eigenvectors of the */
     306             :       /* upper block) with S and write the result into the upper part of */
     307             :       /* Q, i.e., Q(1:N1,1:K) */
     308             : 
     309          21 :       PetscCallBLAS("BLASgemm",BLASgemm_("N", "N", &n1, &k, &n12, &done,
     310             :                   q2, &n1, s, &n12, &dzero, q, &ldq));
     311             :     } else {
     312           0 :       for (j=0;j<k;j++) for (i=0;i<n1;i++) q[i+j*ldq] = 0.0;
     313             :     }
     314             :   }
     315          21 :   PetscFunctionReturn(PETSC_SUCCESS);
     316             : }

Generated by: LCOV version 1.14