GCC Code Coverage Report

Directory:	./
File:	src/sys/classes/ds/impls/hep/bdc/dlaed3m.c
Date:	2025-12-10 04:20:18

	Exec	Total	Coverage
Lines:	63	64	98.4%
Functions:	1	1	100.0%
Branches:	113	210	53.8%

  
      Line
      Branch
      Exec
      Source
    
      /*
    
         - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    
         SLEPc - Scalable Library for Eigenvalue Problem Computations
    
         Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
    
         This file is part of SLEPc.
    
         SLEPc is distributed under a 2-clause BSD license (see LICENSE).
    
         - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    
      */
    
      /*
    
         BDC - Block-divide and conquer (see description in README file)
    
      */
    
      #include <slepc/private/dsimpl.h>
    
      #include <slepcblaslapack.h>
    
      105
      PetscErrorCode BDC_dlaed3m_(const char *jobz,const char *defl,PetscBLASInt k,PetscBLASInt n,
    
              PetscBLASInt n1,PetscReal *d,PetscReal *q,PetscBLASInt ldq,
    
              PetscReal rho,PetscReal *dlamda,PetscReal *q2,PetscBLASInt *indx,
    
              PetscBLASInt *ctot,PetscReal *w,PetscReal *s,PetscBLASInt *info,
    
              PetscBLASInt jobz_len,PetscBLASInt defl_len)
    
      {
    
      /*  -- Routine written in LAPACK version 3.0 style -- */
    
      /* *************************************************** */
    
      /*     Written by */
    
      /*     Michael Moldaschl and Wilfried Gansterer */
    
      /*     University of Vienna */
    
      /*     last modification: March 16, 2014 */
    
      /*     Small adaptations of original code written by */
    
      /*     Wilfried Gansterer and Bob Ward, */
    
      /*     Department of Computer Science, University of Tennessee */
    
      /*     see https://doi.org/10.1137/S1064827501399432 */
    
      /* *************************************************** */
    
      /*  Purpose */
    
      /*  ======= */
    
      /*  DLAED3M finds the roots of the secular equation, as defined by the */
    
      /*  values in D, W, and RHO, between 1 and K.  It makes the */
    
      /*  appropriate calls to DLAED4 and then updates the eigenvectors by */
    
      /*  multiplying the matrix of eigenvectors of the pair of eigensystems */
    
      /*  being combined by the matrix of eigenvectors of the K-by-K system */
    
      /*  which is solved here. */
    
      /*  This code makes very mild assumptions about floating point */
    
      /*  arithmetic. It will work on machines with a guard digit in */
    
      /*  add/subtract, or on those binary machines without guard digits */
    
      /*  which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. */
    
      /*  It could conceivably fail on hexadecimal or decimal machines */
    
      /*  without guard digits, but we know of none. */
    
      /*  Arguments */
    
      /*  ========= */
    
      /*  JOBZ    (input) CHARACTER*1 */
    
      /*          = 'N':  Do not accumulate eigenvectors (not implemented); */
    
      /*          = 'D':  Do accumulate eigenvectors in the divide-and-conquer */
    
      /*                  process. */
    
      /*  DEFL    (input) CHARACTER*1 */
    
      /*          = '0':  No deflation happened in DSRTDF */
    
      /*          = '1':  Some deflation happened in DSRTDF (and therefore some */
    
      /*                  Givens rotations need to be applied to the computed */
    
      /*                  eigenvector matrix Q) */
    
      /*  K       (input) INTEGER */
    
      /*          The number of terms in the rational function to be solved by */
    
      /*          DLAED4. 0 <= K <= N. */
    
      /*  N       (input) INTEGER */
    
      /*          The number of rows and columns in the Q matrix. */
    
      /*          N >= K (deflation may result in N>K). */
    
      /*  N1      (input) INTEGER */
    
      /*          The location of the last eigenvalue in the leading submatrix. */
    
      /*          min(1,N) <= N1 <= max(1,N-1). */
    
      /*  D       (output) DOUBLE PRECISION array, dimension (N) */
    
      /*          D(I) contains the updated eigenvalues for */
    
      /*          1 <= I <= K. */
    
      /*  Q       (output) DOUBLE PRECISION array, dimension (LDQ,N) */
    
      /*          Initially the first K columns are used as workspace. */
    
      /*          On output the columns 1 to K contain */
    
      /*          the updated eigenvectors. */
    
      /*  LDQ     (input) INTEGER */
    
      /*          The leading dimension of the array Q.  LDQ >= max(1,N). */
    
      /*  RHO     (input) DOUBLE PRECISION */
    
      /*          The value of the parameter in the rank one update equation. */
    
      /*          RHO >= 0 required. */
    
      /*  DLAMDA  (input/output) DOUBLE PRECISION array, dimension (K) */
    
      /*          The first K elements of this array contain the old roots */
    
      /*          of the deflated updating problem.  These are the poles */
    
      /*          of the secular equation. May be changed on output by */
    
      /*          having lowest order bit set to zero on Cray X-MP, Cray Y-MP, */
    
      /*          Cray-2, or Cray C-90, as described above. */
    
      /*  Q2      (input) DOUBLE PRECISION array, dimension (LDQ2, N) */
    
      /*          The first K columns of this matrix contain the non-deflated */
    
      /*          eigenvectors for the split problem. */
    
      /*  INDX    (input) INTEGER array, dimension (N) */
    
      /*          The permutation used to arrange the columns of the deflated */
    
      /*          Q matrix into three groups (see DLAED2). */
    
      /*          The rows of the eigenvectors found by DLAED4 must be likewise */
    
      /*          permuted before the matrix multiply can take place. */
    
      /*  CTOT    (input) INTEGER array, dimension (4) */
    
      /*          A count of the total number of the various types of columns */
    
      /*          in Q, as described in INDX.  The fourth column type is any */
    
      /*          column which has been deflated. */
    
      /*  W       (input/output) DOUBLE PRECISION array, dimension (K) */
    
      /*          The first K elements of this array contain the components */
    
      /*          of the deflation-adjusted updating vector. Destroyed on */
    
      /*          output. */
    
      /*  S       (workspace) DOUBLE PRECISION array, dimension */
    
      /*          (MAX(CTOT(1)+CTOT(2),CTOT(2)+CTOT(3)) + 1)*K */
    
      /*          Will contain parts of the eigenvectors of the repaired matrix */
    
      /*          which will be multiplied by the previously accumulated */
    
      /*          eigenvectors to update the system. This array is a major */
    
      /*          source of workspace requirements ! */
    
      /*  INFO    (output) INTEGER */
    
      /*          = 0:  successful exit. */
    
      /*          < 0:  if INFO = -i, the i-th argument had an illegal value. */
    
      /*          > 0:  if INFO = i, eigenpair i was not computed successfully */
    
      /*  Further Details */
    
      /*  =============== */
    
      /*  Based on code written by */
    
      /*     Wilfried Gansterer and Bob Ward, */
    
      /*     Department of Computer Science, University of Tennessee */
    
      /*  Based on the design of the LAPACK code DLAED3 with small modifications */
    
      /*  (Note that in contrast to the original DLAED3, this routine */
    
      /*  DOES NOT require that N1 <= N/2) */
    
      /*  Based on contributions by */
    
      /*     Jeff Rutter, Computer Science Division, University of California */
    
      /*     at Berkeley, USA */
    
      /*  Modified by Francoise Tisseur, University of Tennessee. */
    
      /*  ===================================================================== */
    
      105
        PetscReal    temp, done = 1.0, dzero = 0.0;
    
      105
        PetscBLASInt i, j, n2, n12, ii, n23, iq2, i1, one=1;
    
        1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.

      105
        PetscFunctionBegin;
    
      105
        *info = 0;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.

      105
        if (k < 0) *info = -3;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.

      105
        else if (n < k) *info = -4;
    
        2/4✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 5 times.

      105
        else if (n1 < PetscMin(1,n) || n1 > PetscMax(1,n)) *info = -5;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.

      105
        else if (ldq < PetscMax(1,n)) *info = -8;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.

      105
        else if (rho < 0.) *info = -9;
    
        1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.

      105
        PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %" PetscBLASInt_FMT " in DLAED3M",-(*info));
    
        /* Quick return if possible */
    
        2/14✓ Branch 0 taken 4 times.
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.

      105
        if (k == 0) PetscFunctionReturn(PETSC_SUCCESS);
    
        /* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can */
    
        /* be computed with high relative accuracy (barring over/underflow). */
    
        /* This is a problem on machines without a guard digit in */
    
        /* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */
    
        /* The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), */
    
        /* which on any of these machines zeros out the bottommost */
    
        /* bit of DLAMDA(I) if it is 1; this makes the subsequent */
    
        /* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation */
    
        /* occurs. On binary machines with a guard digit (almost all */
    
        /* machines) it does not change DLAMDA(I) at all. On hexadecimal */
    
        /* and decimal machines with a guard digit, it slightly */
    
        /* changes the bottommost bits of DLAMDA(I). It does not account */
    
        /* for hexadecimal or decimal machines without guard digits */
    
        /* (we know of none). We use a subroutine call to compute */
    
        /* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating */
    
        /* this code. */
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      905
        for (i = 0; i < k; ++i) {
    
      800
          dlamda[i] = LAPACKlamc3_(&dlamda[i], &dlamda[i]) - dlamda[i];
    
        }
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      905
        for (j = 1; j <= k; ++j) {
    
          /* ....calling DLAED4 for eigenpair J.... */
    
        10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.

      800
          PetscCallBLAS("LAPACKlaed4",LAPACKlaed4_(&k, &j, dlamda, w, &q[(j-1)*ldq], &rho, &d[j-1], info));
    
        1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.

      800
          SlepcCheckLapackInfo("laed4",*info);
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      800
          if (j < k) {
    
            /* If the zero finder terminated properly, but the computed */
    
            /* eigenvalues are not ordered, issue an error statement */
    
            /* but continue computation. */
    
        1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.

      695
            PetscCheck(dlamda[j-1]<dlamda[j],PETSC_COMM_SELF,PETSC_ERR_FP,"DLAMDA(%" PetscBLASInt_FMT ") is greater or equal than DLAMDA(%" PetscBLASInt_FMT ")", j, j+1);
    
        2/6✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 5 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.

      800
            PetscCheck(d[j-1]>=dlamda[j-1] && d[j-1]<=dlamda[j],PETSC_COMM_SELF,PETSC_ERR_FP,"DLAMDA(%" PetscBLASInt_FMT ") = %g D(%" PetscBLASInt_FMT ") = %g DLAMDA(%" PetscBLASInt_FMT ") = %g", j, (double)dlamda[j-1], j, (double)d[j-1], j+1, (double)dlamda[j]);
    
          }
    
        }
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.

      105
        if (k == 1) goto L110;
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      105
        if (k == 2) {
    
          /* permute the components of Q(:,J) (the information returned by DLAED4 */
    
          /* necessary to construct the eigenvectors) according to the permutation */
    
          /* stored in INDX, resulting from deflation */
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      30
          for (j = 0; j < k; ++j) {
    
      20
            w[0] = q[0+j*ldq];
    
      20
            w[1] = q[1+j*ldq];
    
      20
            ii = indx[0];
    
      20
            q[0+j*ldq] = w[ii-1];
    
      20
            ii = indx[1];
    
      20
            q[1+j*ldq] = w[ii-1];
    
          }
    
      10
          goto L110;
    
        }
    
        /* ....K.GE.3.... */
    
        /* Compute updated W (used for computing the eigenvectors corresponding */
    
        /* to the previously computed eigenvalues). */
    
        10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.

      95
        PetscCallBLAS("BLAScopy",BLAScopy_(&k, w, &one, s, &one));
    
        /* Initialize W(I) = Q(I,I) */
    
      95
        i1 = ldq + 1;
    
        10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.

      95
        PetscCallBLAS("BLAScopy",BLAScopy_(&k, q, &i1, w, &one));
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      875
        for (j = 0; j < k; ++j) {
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      4260
          for (i = 0; i < j; ++i) {
    
      3480
            w[i] *= q[i+j*ldq] / (dlamda[i] - dlamda[j]);
    
          }
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      4260
          for (i = j + 1; i < k; ++i) {
    
      3480
            w[i] *= q[i+j*ldq] / (dlamda[i] - dlamda[j]);
    
          }
    
        }
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      875
        for (i = 0; i < k; ++i) {
    
      780
          temp = PetscSqrtReal(-w[i]);
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.

      780
          if (temp<0) temp = -temp;
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      780
          w[i] =  (s[i] >= 0) ? temp : -temp;
    
        }
    
        /* Compute eigenvectors of the modified rank-1 modification (using the */
    
        /* vector W). */
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      875
        for (j = 0; j < k; ++j) {
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      8520
          for (i = 0; i < k; ++i) {
    
      7740
            s[i] = w[i] / q[i+j*ldq];
    
          }
    
      780
          temp = BLASnrm2_(&k, s, &one);
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      9300
          for (i = 0; i < k; ++i) {
    
            /* apply the permutation resulting from deflation as stored */
    
            /* in INDX */
    
      7740
            ii = indx[i];
    
      7740
            q[i+j*ldq] = s[ii-1] / temp;
    
          }
    
        }
    
      /* ************************************************************************** */
    
        /* ....updating the eigenvectors.... */
    
      95
      L110:
    
      105
        n2 = n - n1;
    
      105
        n12 = ctot[0] + ctot[1];
    
      105
        n23 = ctot[1] + ctot[2];
    
        1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.

      105
        if (*(unsigned char *)jobz == 'D') {
    
          /* Compute the updated eigenvectors. (NOTE that every call of */
    
          /* DGEMM requires three DISTINCT arrays) */
    
          /* copy Q(CTOT(1)+1:K,1:K) to S */
    
        4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.

      2615
          for (j=0;j<k;j++) for (i=0;i<n23;i++) s[i+j*n23] = q[ctot[0]+i+j*ldq];
    
      105
          iq2 = n1 * n12 + 1;
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      105
          if (n23 != 0) {
    
            /* multiply the second part of Q2 (the eigenvectors of the */
    
            /* lower block) with S and write the result into the lower part of */
    
            /* Q, i.e., Q(N1+1:N,1:K) */
    
        10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.

      35
            PetscCallBLAS("BLASgemm",BLASgemm_("N", "N", &n2, &k, &n23, &done,
    
                        &q2[iq2-1], &n2, s, &n23, &dzero, &q[n1], &ldq));
    
          } else {
    
        3/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.

      600
            for (j=0;j<k;j++) for (i=0;i<n2;i++) q[n1+i+j*ldq] = 0.0;
    
          }
    
          /* copy Q(1:CTOT(1)+CTOT(2),1:K) to S */
    
        4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.

      7785
          for (j=0;j<k;j++) for (i=0;i<n12;i++) s[i+j*n12] = q[i+j*ldq];
    
        1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.

      105
          if (n12 != 0) {
    
            /* multiply the first part of Q2 (the eigenvectors of the */
    
            /* upper block) with S and write the result into the upper part of */
    
            /* Q, i.e., Q(1:N1,1:K) */
    
        10/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.

      105
            PetscCallBLAS("BLASgemm",BLASgemm_("N", "N", &n1, &k, &n12, &done,
    
                        q2, &n1, s, &n12, &dzero, q, &ldq));
    
          } else {
    
      ✗
            for (j=0;j<k;j++) for (i=0;i<n1;i++) q[i+j*ldq] = 0.0;
    
          }
    
        }
    
        6/12✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.

      21
        PetscFunctionReturn(PETSC_SUCCESS);
    
      }

Line	Branch	Exec	Source
1			/*
2			- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3			SLEPc - Scalable Library for Eigenvalue Problem Computations
4			Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
5
6			This file is part of SLEPc.
7			SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8			- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9			*/
10			/*
11			BDC - Block-divide and conquer (see description in README file)
12			*/
13
14			#include <slepc/private/dsimpl.h>
15			#include <slepcblaslapack.h>
16
17		105	PetscErrorCode BDC_dlaed3m_(const char jobz,const char defl,PetscBLASInt k,PetscBLASInt n,
18			PetscBLASInt n1,PetscReal d,PetscReal q,PetscBLASInt ldq,
19			PetscReal rho,PetscReal dlamda,PetscReal q2,PetscBLASInt *indx,
20			PetscBLASInt ctot,PetscReal w,PetscReal s,PetscBLASInt info,
21			PetscBLASInt jobz_len,PetscBLASInt defl_len)
22			{
23			/* -- Routine written in LAPACK version 3.0 style -- */
24			/* *************************************************** */
25			/* Written by */
26			/* Michael Moldaschl and Wilfried Gansterer */
27			/* University of Vienna */
28			/* last modification: March 16, 2014 */
29
30			/* Small adaptations of original code written by */
31			/* Wilfried Gansterer and Bob Ward, */
32			/* Department of Computer Science, University of Tennessee */
33			/* see https://doi.org/10.1137/S1064827501399432 */
34			/* *************************************************** */
35
36			/* Purpose */
37			/* ======= */
38
39			/* DLAED3M finds the roots of the secular equation, as defined by the */
40			/* values in D, W, and RHO, between 1 and K. It makes the */
41			/* appropriate calls to DLAED4 and then updates the eigenvectors by */
42			/* multiplying the matrix of eigenvectors of the pair of eigensystems */
43			/* being combined by the matrix of eigenvectors of the K-by-K system */
44			/* which is solved here. */
45
46			/* This code makes very mild assumptions about floating point */
47			/* arithmetic. It will work on machines with a guard digit in */
48			/* add/subtract, or on those binary machines without guard digits */
49			/* which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. */
50			/* It could conceivably fail on hexadecimal or decimal machines */
51			/* without guard digits, but we know of none. */
52
53			/* Arguments */
54			/* ========= */
55
56			/* JOBZ (input) CHARACTER1 /
57			/* = 'N': Do not accumulate eigenvectors (not implemented); */
58			/* = 'D': Do accumulate eigenvectors in the divide-and-conquer */
59			/* process. */
60
61			/* DEFL (input) CHARACTER1 /
62			/* = '0': No deflation happened in DSRTDF */
63			/* = '1': Some deflation happened in DSRTDF (and therefore some */
64			/* Givens rotations need to be applied to the computed */
65			/* eigenvector matrix Q) */
66
67			/* K (input) INTEGER */
68			/* The number of terms in the rational function to be solved by */
69			/* DLAED4. 0 <= K <= N. */
70
71			/* N (input) INTEGER */
72			/* The number of rows and columns in the Q matrix. */
73			/* N >= K (deflation may result in N>K). */
74
75			/* N1 (input) INTEGER */
76			/* The location of the last eigenvalue in the leading submatrix. */
77			/* min(1,N) <= N1 <= max(1,N-1). */
78
79			/* D (output) DOUBLE PRECISION array, dimension (N) */
80			/* D(I) contains the updated eigenvalues for */
81			/* 1 <= I <= K. */
82
83			/* Q (output) DOUBLE PRECISION array, dimension (LDQ,N) */
84			/* Initially the first K columns are used as workspace. */
85			/* On output the columns 1 to K contain */
86			/* the updated eigenvectors. */
87
88			/* LDQ (input) INTEGER */
89			/* The leading dimension of the array Q. LDQ >= max(1,N). */
90
91			/* RHO (input) DOUBLE PRECISION */
92			/* The value of the parameter in the rank one update equation. */
93			/* RHO >= 0 required. */
94
95			/* DLAMDA (input/output) DOUBLE PRECISION array, dimension (K) */
96			/* The first K elements of this array contain the old roots */
97			/* of the deflated updating problem. These are the poles */
98			/* of the secular equation. May be changed on output by */
99			/* having lowest order bit set to zero on Cray X-MP, Cray Y-MP, */
100			/* Cray-2, or Cray C-90, as described above. */
101
102			/* Q2 (input) DOUBLE PRECISION array, dimension (LDQ2, N) */
103			/* The first K columns of this matrix contain the non-deflated */
104			/* eigenvectors for the split problem. */
105
106			/* INDX (input) INTEGER array, dimension (N) */
107			/* The permutation used to arrange the columns of the deflated */
108			/* Q matrix into three groups (see DLAED2). */
109			/* The rows of the eigenvectors found by DLAED4 must be likewise */
110			/* permuted before the matrix multiply can take place. */
111
112			/* CTOT (input) INTEGER array, dimension (4) */
113			/* A count of the total number of the various types of columns */
114			/* in Q, as described in INDX. The fourth column type is any */
115			/* column which has been deflated. */
116
117			/* W (input/output) DOUBLE PRECISION array, dimension (K) */
118			/* The first K elements of this array contain the components */
119			/* of the deflation-adjusted updating vector. Destroyed on */
120			/* output. */
121
122			/* S (workspace) DOUBLE PRECISION array, dimension */
123			/* (MAX(CTOT(1)+CTOT(2),CTOT(2)+CTOT(3)) + 1)K /
124			/* Will contain parts of the eigenvectors of the repaired matrix */
125			/* which will be multiplied by the previously accumulated */
126			/* eigenvectors to update the system. This array is a major */
127			/* source of workspace requirements ! */
128
129			/* INFO (output) INTEGER */
130			/* = 0: successful exit. */
131			/* < 0: if INFO = -i, the i-th argument had an illegal value. */
132			/* > 0: if INFO = i, eigenpair i was not computed successfully */
133
134			/* Further Details */
135			/* =============== */
136
137			/* Based on code written by */
138			/* Wilfried Gansterer and Bob Ward, */
139			/* Department of Computer Science, University of Tennessee */
140			/* Based on the design of the LAPACK code DLAED3 with small modifications */
141			/* (Note that in contrast to the original DLAED3, this routine */
142			/* DOES NOT require that N1 <= N/2) */
143
144			/* Based on contributions by */
145			/* Jeff Rutter, Computer Science Division, University of California */
146			/* at Berkeley, USA */
147			/* Modified by Francoise Tisseur, University of Tennessee. */
148
149			/* ===================================================================== */
150
151		105	PetscReal temp, done = 1.0, dzero = 0.0;
152		105	PetscBLASInt i, j, n2, n12, ii, n23, iq2, i1, one=1;
153
154	1/2 ✓ Branch 0 taken 1 times. ✗ Branch 1 not taken.	105	PetscFunctionBegin;
155		105	*info = 0;
156
157	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 5 times.	105	if (k < 0) *info = -3;
158	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 5 times.	105	else if (n < k) *info = -4;
159	2/4 ✓ Branch 0 taken 5 times. ✗ Branch 1 not taken. ✗ Branch 2 not taken. ✓ Branch 3 taken 5 times.	105	else if (n1 < PetscMin(1,n) \|\| n1 > PetscMax(1,n)) *info = -5;
160	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 5 times.	105	else if (ldq < PetscMax(1,n)) *info = -8;
161	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 5 times.	105	else if (rho < 0.) *info = -9;
162	1/4 ✗ Branch 0 not taken. ✓ Branch 1 taken 5 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken.	105	PetscCheck(!info,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %" PetscBLASInt_FMT " in DLAED3M",-(info));
163
164			/* Quick return if possible */
165
166	2/14 ✓ Branch 0 taken 4 times. ✓ Branch 1 taken 1 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken. ✗ Branch 5 not taken. ✗ Branch 6 not taken. ✗ Branch 7 not taken. ✗ Branch 8 not taken. ✗ Branch 9 not taken. ✗ Branch 10 not taken. ✗ Branch 11 not taken. ✗ Branch 12 not taken. ✗ Branch 13 not taken.	105	if (k == 0) PetscFunctionReturn(PETSC_SUCCESS);
167
168			/* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can */
169			/* be computed with high relative accuracy (barring over/underflow). */
170			/* This is a problem on machines without a guard digit in */
171			/* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */
172			/* The following code replaces DLAMDA(I) by 2DLAMDA(I)-DLAMDA(I), /
173			/* which on any of these machines zeros out the bottommost */
174			/* bit of DLAMDA(I) if it is 1; this makes the subsequent */
175			/* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation */
176			/* occurs. On binary machines with a guard digit (almost all */
177			/* machines) it does not change DLAMDA(I) at all. On hexadecimal */
178			/* and decimal machines with a guard digit, it slightly */
179			/* changes the bottommost bits of DLAMDA(I). It does not account */
180			/* for hexadecimal or decimal machines without guard digits */
181			/* (we know of none). We use a subroutine call to compute */
182			/* 2DLAMBDA(I) to prevent optimizing compilers from eliminating /
183			/* this code. */
184
185	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	905	for (i = 0; i < k; ++i) {
186		800	dlamda[i] = LAPACKlamc3_(&dlamda[i], &dlamda[i]) - dlamda[i];
187			}
188
189	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	905	for (j = 1; j <= k; ++j) {
190
191			/* ....calling DLAED4 for eigenpair J.... */
192
193	10/20 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 4 times. ✓ Branch 2 taken 1 times. ✗ Branch 3 not taken. ✓ Branch 4 taken 1 times. ✗ Branch 5 not taken. ✗ Branch 6 not taken. ✓ Branch 7 taken 1 times. ✓ Branch 8 taken 1 times. ✗ Branch 9 not taken. ✗ Branch 10 not taken. ✓ Branch 11 taken 1 times. ✓ Branch 12 taken 1 times. ✗ Branch 13 not taken. ✓ Branch 14 taken 1 times. ✗ Branch 15 not taken. ✗ Branch 16 not taken. ✓ Branch 17 taken 1 times. ✗ Branch 18 not taken. ✗ Branch 19 not taken.	800	PetscCallBLAS("LAPACKlaed4",LAPACKlaed4_(&k, &j, dlamda, w, &q[(j-1)*ldq], &rho, &d[j-1], info));
194	1/4 ✗ Branch 0 not taken. ✓ Branch 1 taken 5 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken.	800	SlepcCheckLapackInfo("laed4",*info);
195
196	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	800	if (j < k) {
197
198			/* If the zero finder terminated properly, but the computed */
199			/* eigenvalues are not ordered, issue an error statement */
200			/* but continue computation. */
201
202	1/4 ✗ Branch 0 not taken. ✓ Branch 1 taken 5 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken.	695	PetscCheck(dlamda[j-1]<dlamda[j],PETSC_COMM_SELF,PETSC_ERR_FP,"DLAMDA(%" PetscBLASInt_FMT ") is greater or equal than DLAMDA(%" PetscBLASInt_FMT ")", j, j+1);
203	2/6 ✓ Branch 0 taken 5 times. ✗ Branch 1 not taken. ✗ Branch 2 not taken. ✓ Branch 3 taken 5 times. ✗ Branch 4 not taken. ✗ Branch 5 not taken.	800	PetscCheck(d[j-1]>=dlamda[j-1] && d[j-1]<=dlamda[j],PETSC_COMM_SELF,PETSC_ERR_FP,"DLAMDA(%" PetscBLASInt_FMT ") = %g D(%" PetscBLASInt_FMT ") = %g DLAMDA(%" PetscBLASInt_FMT ") = %g", j, (double)dlamda[j-1], j, (double)d[j-1], j+1, (double)dlamda[j]);
204			}
205			}
206
207	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 5 times.	105	if (k == 1) goto L110;
208
209	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	105	if (k == 2) {
210
211			/* permute the components of Q(:,J) (the information returned by DLAED4 */
212			/* necessary to construct the eigenvectors) according to the permutation */
213			/* stored in INDX, resulting from deflation */
214
215	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	30	for (j = 0; j < k; ++j) {
216		20	w[0] = q[0+j*ldq];
217		20	w[1] = q[1+j*ldq];
218		20	ii = indx[0];
219		20	q[0+j*ldq] = w[ii-1];
220		20	ii = indx[1];
221		20	q[1+j*ldq] = w[ii-1];
222			}
223		10	goto L110;
224			}
225
226			/* ....K.GE.3.... */
227			/* Compute updated W (used for computing the eigenvectors corresponding */
228			/* to the previously computed eigenvalues). */
229
230	10/20 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 4 times. ✓ Branch 2 taken 1 times. ✗ Branch 3 not taken. ✓ Branch 4 taken 1 times. ✗ Branch 5 not taken. ✗ Branch 6 not taken. ✓ Branch 7 taken 1 times. ✓ Branch 8 taken 1 times. ✗ Branch 9 not taken. ✗ Branch 10 not taken. ✓ Branch 11 taken 1 times. ✓ Branch 12 taken 1 times. ✗ Branch 13 not taken. ✓ Branch 14 taken 1 times. ✗ Branch 15 not taken. ✗ Branch 16 not taken. ✓ Branch 17 taken 1 times. ✗ Branch 18 not taken. ✗ Branch 19 not taken.	95	PetscCallBLAS("BLAScopy",BLAScopy_(&k, w, &one, s, &one));
231
232			/* Initialize W(I) = Q(I,I) */
233
234		95	i1 = ldq + 1;
235	10/20 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 4 times. ✓ Branch 2 taken 1 times. ✗ Branch 3 not taken. ✓ Branch 4 taken 1 times. ✗ Branch 5 not taken. ✗ Branch 6 not taken. ✓ Branch 7 taken 1 times. ✓ Branch 8 taken 1 times. ✗ Branch 9 not taken. ✗ Branch 10 not taken. ✓ Branch 11 taken 1 times. ✓ Branch 12 taken 1 times. ✗ Branch 13 not taken. ✓ Branch 14 taken 1 times. ✗ Branch 15 not taken. ✗ Branch 16 not taken. ✓ Branch 17 taken 1 times. ✗ Branch 18 not taken. ✗ Branch 19 not taken.	95	PetscCallBLAS("BLAScopy",BLAScopy_(&k, q, &i1, w, &one));
236	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	875	for (j = 0; j < k; ++j) {
237	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	4260	for (i = 0; i < j; ++i) {
238		3480	w[i] = q[i+jldq] / (dlamda[i] - dlamda[j]);
239			}
240	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	4260	for (i = j + 1; i < k; ++i) {
241		3480	w[i] = q[i+jldq] / (dlamda[i] - dlamda[j]);
242			}
243			}
244	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	875	for (i = 0; i < k; ++i) {
245		780	temp = PetscSqrtReal(-w[i]);
246	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 5 times.	780	if (temp<0) temp = -temp;
247	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	780	w[i] = (s[i] >= 0) ? temp : -temp;
248			}
249
250			/* Compute eigenvectors of the modified rank-1 modification (using the */
251			/* vector W). */
252
253	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	875	for (j = 0; j < k; ++j) {
254	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	8520	for (i = 0; i < k; ++i) {
255		7740	s[i] = w[i] / q[i+j*ldq];
256			}
257		780	temp = BLASnrm2_(&k, s, &one);
258	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	9300	for (i = 0; i < k; ++i) {
259
260			/* apply the permutation resulting from deflation as stored */
261			/* in INDX */
262
263		7740	ii = indx[i];
264		7740	q[i+j*ldq] = s[ii-1] / temp;
265			}
266			}
267
268			/* ************************************************************************** */
269
270			/* ....updating the eigenvectors.... */
271
272		95	L110:
273
274		105	n2 = n - n1;
275		105	n12 = ctot[0] + ctot[1];
276		105	n23 = ctot[1] + ctot[2];
277	1/2 ✓ Branch 0 taken 5 times. ✗ Branch 1 not taken.	105	if ((unsigned char )jobz == 'D') {
278
279			/* Compute the updated eigenvectors. (NOTE that every call of */
280			/* DGEMM requires three DISTINCT arrays) */
281
282			/* copy Q(CTOT(1)+1:K,1:K) to S */
283
284	4/4 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times. ✓ Branch 2 taken 5 times. ✓ Branch 3 taken 5 times.	2615	for (j=0;j<k;j++) for (i=0;i<n23;i++) s[i+jn23] = q[ctot[0]+i+jldq];
285		105	iq2 = n1 * n12 + 1;
286
287	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	105	if (n23 != 0) {
288
289			/* multiply the second part of Q2 (the eigenvectors of the */
290			/* lower block) with S and write the result into the lower part of */
291			/* Q, i.e., Q(N1+1:N,1:K) */
292
293	10/20 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 4 times. ✓ Branch 2 taken 1 times. ✗ Branch 3 not taken. ✓ Branch 4 taken 1 times. ✗ Branch 5 not taken. ✗ Branch 6 not taken. ✓ Branch 7 taken 1 times. ✓ Branch 8 taken 1 times. ✗ Branch 9 not taken. ✗ Branch 10 not taken. ✓ Branch 11 taken 1 times. ✓ Branch 12 taken 1 times. ✗ Branch 13 not taken. ✓ Branch 14 taken 1 times. ✗ Branch 15 not taken. ✗ Branch 16 not taken. ✓ Branch 17 taken 1 times. ✗ Branch 18 not taken. ✗ Branch 19 not taken.	35	PetscCallBLAS("BLASgemm",BLASgemm_("N", "N", &n2, &k, &n23, &done,
294			&q2[iq2-1], &n2, s, &n23, &dzero, &q[n1], &ldq));
295			} else {
296	3/4 ✗ Branch 0 not taken. ✓ Branch 1 taken 5 times. ✓ Branch 2 taken 5 times. ✓ Branch 3 taken 5 times.	600	for (j=0;j<k;j++) for (i=0;i<n2;i++) q[n1+i+j*ldq] = 0.0;
297			}
298
299			/* copy Q(1:CTOT(1)+CTOT(2),1:K) to S */
300
301	4/4 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times. ✓ Branch 2 taken 5 times. ✓ Branch 3 taken 5 times.	7785	for (j=0;j<k;j++) for (i=0;i<n12;i++) s[i+jn12] = q[i+jldq];
302
303	1/2 ✓ Branch 0 taken 5 times. ✗ Branch 1 not taken.	105	if (n12 != 0) {
304
305			/* multiply the first part of Q2 (the eigenvectors of the */
306			/* upper block) with S and write the result into the upper part of */
307			/* Q, i.e., Q(1:N1,1:K) */
308
309	10/20 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 4 times. ✓ Branch 2 taken 1 times. ✗ Branch 3 not taken. ✓ Branch 4 taken 1 times. ✗ Branch 5 not taken. ✗ Branch 6 not taken. ✓ Branch 7 taken 1 times. ✓ Branch 8 taken 1 times. ✗ Branch 9 not taken. ✗ Branch 10 not taken. ✓ Branch 11 taken 1 times. ✓ Branch 12 taken 1 times. ✗ Branch 13 not taken. ✓ Branch 14 taken 1 times. ✗ Branch 15 not taken. ✗ Branch 16 not taken. ✓ Branch 17 taken 1 times. ✗ Branch 18 not taken. ✗ Branch 19 not taken.	105	PetscCallBLAS("BLASgemm",BLASgemm_("N", "N", &n1, &k, &n12, &done,
310			q2, &n1, s, &n12, &dzero, q, &ldq));
311			} else {
312		✗	for (j=0;j<k;j++) for (i=0;i<n1;i++) q[i+j*ldq] = 0.0;
313			}
314			}
315	6/12 ✓ Branch 0 taken 1 times. ✗ Branch 1 not taken. ✗ Branch 2 not taken. ✓ Branch 3 taken 1 times. ✓ Branch 4 taken 1 times. ✗ Branch 5 not taken. ✓ Branch 6 taken 1 times. ✗ Branch 7 not taken. ✓ Branch 8 taken 1 times. ✗ Branch 9 not taken. ✗ Branch 10 not taken. ✓ Branch 11 taken 1 times.	21	PetscFunctionReturn(PETSC_SUCCESS);
316			}
317