LCOV - code coverage report
Current view: top level - sys/classes/ds/impls/ghiep - invit.c (source / functions) Hit Total Coverage
Test: SLEPc Lines: 438 481 91.1 %
Date: 2024-04-19 00:31:36 Functions: 8 8 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       3             :    SLEPc - Scalable Library for Eigenvalue Problem Computations
       4             :    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
       5             : 
       6             :    This file is part of SLEPc.
       7             :    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
       8             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       9             : */
      10             : 
      11             : #include <slepc/private/dsimpl.h>
      12             : #include <slepcblaslapack.h>
      13             : 
      14             : struct HRtr
      15             : {
      16             :   PetscScalar *data;
      17             :   PetscInt    m;
      18             :   PetscInt    idx[2];
      19             :   PetscInt    n[2];
      20             :   PetscScalar tau[2];
      21             :   PetscReal   alpha;
      22             :   PetscReal   cs;
      23             :   PetscReal   sn;
      24             :   PetscInt    type;
      25             : };
      26             : 
      27             : /*
      28             :   Generates a hyperbolic rotation
      29             :     if x1*x1 - x2*x2 != 0
      30             :       r = sqrt(|x1*x1 - x2*x2|)
      31             :       c = x1/r  s = x2/r
      32             : 
      33             :       | c -s||x1|   |d*r|
      34             :       |-s  c||x2| = | 0 |
      35             :       where d = 1 for type==1 and -1 for type==2
      36             :   Returns the condition number of the reduction
      37             : */
      38        6258 : static PetscErrorCode HRGen(PetscReal x1,PetscReal x2,PetscInt *type,PetscReal *c,PetscReal *s,PetscReal *r,PetscReal *cond)
      39             : {
      40        6258 :   PetscReal t,n2,xa,xb;
      41        6258 :   PetscInt  type_;
      42             : 
      43        6258 :   PetscFunctionBegin;
      44        6258 :   if (x2==0.0) {
      45           0 :     *r = PetscAbsReal(x1); *c = (x1>=0.0)?1.0:-1.0; *s = 0.0;
      46           0 :     if (type) *type = 1;
      47           0 :     PetscFunctionReturn(PETSC_SUCCESS);
      48             :   }
      49        6258 :   if (PetscAbsReal(x1) == PetscAbsReal(x2)) {
      50             :     /* hyperbolic rotation doesn't exist */
      51           0 :     *c = *s = *r = 0.0;
      52           0 :     if (type) *type = 0;
      53           0 :     *cond = PETSC_MAX_REAL;
      54           0 :     PetscFunctionReturn(PETSC_SUCCESS);
      55             :   }
      56             : 
      57        6258 :   if (PetscAbsReal(x1)>PetscAbsReal(x2)) {
      58             :     xa = x1; xb = x2; type_ = 1;
      59             :   } else {
      60        1528 :     xa = x2; xb = x1; type_ = 2;
      61             :   }
      62        6258 :   t = xb/xa;
      63        6258 :   n2 = PetscAbsReal(1 - t*t);
      64        6258 :   *r = PetscSqrtReal(n2)*PetscAbsReal(xa);
      65        6258 :   *c = x1/(*r);
      66        6258 :   *s = x2/(*r);
      67        6258 :   if (type_ == 2) *r *= -1;
      68        6258 :   if (type) *type = type_;
      69        6258 :   if (cond) *cond = (PetscAbsReal(*c) + PetscAbsReal(*s))/PetscAbsReal(PetscAbsReal(*c) - PetscAbsReal(*s));
      70        6258 :   PetscFunctionReturn(PETSC_SUCCESS);
      71             : }
      72             : 
      73             : /*
      74             :                                 |c  s|
      75             :   Applies an hyperbolic rotator |s  c|
      76             :            |c  s|
      77             :     [x1 x2]|s  c|
      78             : */
      79       12263 : static PetscErrorCode HRApply(PetscInt n,PetscScalar *x1,PetscInt inc1,PetscScalar *x2,PetscInt inc2,PetscReal c,PetscReal s)
      80             : {
      81       12263 :   PetscInt    i;
      82       12263 :   PetscReal   t;
      83       12263 :   PetscScalar tmp;
      84             : 
      85       12263 :   PetscFunctionBegin;
      86       12263 :   if (PetscAbsReal(c)>PetscAbsReal(s)) { /* Type I */
      87        9387 :     t = s/c;
      88      674897 :     for (i=0;i<n;i++) {
      89      665510 :       x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
      90      665510 :       x2[i*inc2] = t*x1[i*inc1] + x2[i*inc2]/c;
      91             :     }
      92             :   } else { /* Type II */
      93        2876 :     t = c/s;
      94       93305 :     for (i=0;i<n;i++) {
      95       90429 :       tmp = x1[i*inc1];
      96       90429 :       x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
      97       90429 :       x2[i*inc2] = t*x1[i*inc1] + tmp/s;
      98             :     }
      99             :   }
     100       12263 :   PetscFunctionReturn(PETSC_SUCCESS);
     101             : }
     102             : 
     103             : /*
     104             :   Reduction to tridiagonal-diagonal form (see F. Tisseur, SIMAX 26(1), 2004).
     105             : 
     106             :   Input:
     107             :     A symmetric (only lower triangular part is referred)
     108             :     s vector +1 and -1 (signature matrix)
     109             :   Output:
     110             :     d,e
     111             :     s
     112             :     Q s-orthogonal matrix with Q^T*A*Q = T (symmetric tridiagonal matrix)
     113             : */
     114         324 : static PetscErrorCode TridiagDiag_HHR(PetscInt n,PetscScalar *A,PetscInt lda,PetscReal *s,PetscScalar* Q,PetscInt ldq,PetscBool flip,PetscReal *d,PetscReal *e,PetscInt *perm_,PetscScalar *work,PetscReal *rwork,PetscBLASInt *iwork)
     115             : {
     116         324 :   PetscInt       i,j,k,*ii,*jj,i0=0,ik=0,tmp,type;
     117         324 :   PetscInt       nwu=0;
     118         324 :   PetscReal      *ss,cond=1.0,cs,sn,r;
     119         324 :   PetscScalar    tau,t,*AA;
     120         324 :   PetscBLASInt   n0,n1,ni,inc=1,m,n_,lda_,ldq_,*perm;
     121         324 :   PetscBool      breakdown = PETSC_TRUE;
     122             : 
     123         324 :   PetscFunctionBegin;
     124         324 :   if (n<3) {
     125           0 :     if (n==1) Q[0]=1;
     126           0 :     if (n==2) {
     127           0 :       Q[0] = Q[1+ldq] = 1;
     128           0 :       Q[1] = Q[ldq] = 0;
     129             :     }
     130           0 :     PetscFunctionReturn(PETSC_SUCCESS);
     131             :   }
     132         324 :   PetscCall(PetscBLASIntCast(lda,&lda_));
     133         324 :   PetscCall(PetscBLASIntCast(n,&n_));
     134         324 :   PetscCall(PetscBLASIntCast(ldq,&ldq_));
     135         324 :   ss = rwork;
     136         324 :   perm = iwork;
     137         324 :   AA = work;
     138        4987 :   for (i=0;i<n;i++) PetscCall(PetscArraycpy(AA+i*n,A+i*lda,n));
     139         324 :   nwu += n*n;
     140         324 :   k=0;
     141         648 :   while (breakdown && k<n) {
     142         324 :     breakdown = PETSC_FALSE;
     143             :     /* Classify (and flip) A and s according to sign */
     144         324 :     if (flip) {
     145        4943 :       for (i=0;i<n;i++) {
     146        4623 :         perm[i] = n-1-perm_[i];
     147        4623 :         if (perm[i]==0) i0 = i;
     148        4623 :         if (perm[i]==k) ik = i;
     149             :       }
     150             :     } else {
     151          44 :       for (i=0;i<n;i++) {
     152          40 :         perm[i] = perm_[i];
     153          40 :         if (perm[i]==0) i0 = i;
     154          40 :         if (perm[i]==k) ik = i;
     155             :       }
     156             :     }
     157         324 :     perm[ik] = 0;
     158         324 :     perm[i0] = k;
     159         324 :     i=1;
     160        3264 :     while (i<n-1 && s[perm[i-1]]==s[perm[0]]) {
     161        2940 :       if (s[perm[i]]!=s[perm[0]]) {
     162         721 :         j=i+1;
     163        2342 :         while (j<n-1 && s[perm[j]]!=s[perm[0]])j++;
     164         721 :         tmp = perm[i]; perm[i] = perm[j]; perm[j] = tmp;
     165             :       }
     166        2940 :       i++;
     167             :     }
     168        4987 :     for (i=0;i<n;i++) {
     169        4663 :       ss[i] = s[perm[i]];
     170             :     }
     171         324 :     if (flip) {
     172             :       ii = &j;
     173             :       jj = &i;
     174             :     } else {
     175           4 :       ii = &i;
     176           4 :       jj = &j;
     177             :     }
     178        4987 :     for (i=0;i<n;i++)
     179      123542 :       for (j=0;j<n;j++)
     180      118879 :         A[i+j*lda] = AA[perm[*ii]+perm[*jj]*n];
     181             :     /* Initialize Q */
     182        4987 :     for (i=0;i<n;i++) {
     183        4663 :       PetscCall(PetscArrayzero(Q+i*ldq,n));
     184        4663 :       Q[perm[i]+i*ldq] = 1.0;
     185             :     }
     186        3336 :     for (ni=1;ni<n && ss[ni]==ss[0]; ni++);
     187         324 :     n0 = ni-1;
     188         324 :     n1 = n_-ni;
     189        4339 :     for (j=0;j<n-2;j++) {
     190        4015 :       PetscCall(PetscBLASIntCast(n-j-1,&m));
     191             :       /* Forming and applying reflectors */
     192        4015 :       if (n0 > 1) {
     193        2840 :         PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0,A+ni-n0+j*lda,A+ni-n0+j*lda+1,&inc,&tau));
     194             :         /* Apply reflector */
     195        2840 :         if (PetscAbsScalar(tau) != 0.0) {
     196        2840 :           t=*(A+ni-n0+j*lda);  *(A+ni-n0+j*lda)=1.0;
     197        2840 :           PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&m,&n0,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu));
     198        2840 :           PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0,&m,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu));
     199             :           /* Update Q */
     200        2840 :           PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0,A+ni-n0+j*lda,&inc,&tau,Q+(j+1)*ldq,&ldq_,work+nwu));
     201        2840 :           *(A+ni-n0+j*lda) = t;
     202       47317 :           for (i=1;i<n0;i++) {
     203       44477 :             *(A+ni-n0+j*lda+i) = 0.0;  *(A+j+(ni-n0+i)*lda) = 0.0;
     204             :           }
     205        2840 :           *(A+j+(ni-n0)*lda) = *(A+ni-n0+j*lda);
     206             :         }
     207             :       }
     208        4015 :       if (n1 > 1) {
     209        1301 :         PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1,A+n-n1+j*lda,A+n-n1+j*lda+1,&inc,&tau));
     210             :         /* Apply reflector */
     211        1301 :         if (PetscAbsScalar(tau) != 0.0) {
     212        1301 :           t=*(A+n-n1+j*lda);  *(A+n-n1+j*lda)=1.0;
     213        1301 :           PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&m,&n1,A+n-n1+j*lda,&inc,&tau,A+j+1+(n-n1)*lda,&lda_,work+nwu));
     214        1301 :           PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1,&m,A+n-n1+j*lda,&inc,&tau,A+n-n1+(j+1)*lda,&lda_,work+nwu));
     215             :           /* Update Q */
     216        1301 :           PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1,A+n-n1+j*lda,&inc,&tau,Q+(n-n1)*ldq,&ldq_,work+nwu));
     217        1301 :           *(A+n-n1+j*lda) = t;
     218        9366 :           for (i=1;i<n1;i++) {
     219        8065 :             *(A+n-n1+i+j*lda) = 0.0;  *(A+j+(n-n1+i)*lda) = 0.0;
     220             :           }
     221        1301 :           *(A+j+(n-n1)*lda) = *(A+n-n1+j*lda);
     222             :         }
     223             :       }
     224             :       /* Hyperbolic rotation */
     225        4015 :       if (n0 > 0 && n1 > 0) {
     226         227 :         PetscCall(HRGen(PetscRealPart(A[ni-n0+j*lda]),PetscRealPart(A[n-n1+j*lda]),&type,&cs,&sn,&r,&cond));
     227             :         /* Check condition number */
     228         227 :         if (cond > 1.0/(10*PETSC_SQRT_MACHINE_EPSILON)) {
     229           0 :           breakdown = PETSC_TRUE;
     230           0 :           k++;
     231           0 :           PetscCheck(k<n && !flip,PETSC_COMM_SELF,PETSC_ERR_SUP,"Breakdown in construction of hyperbolic transformation");
     232             :           break;
     233             :         }
     234         227 :         A[ni-n0+j*lda] = r; A[n-n1+j*lda] = 0.0;
     235         227 :         A[j+(ni-n0)*lda] = r; A[j+(n-n1)*lda] = 0.0;
     236             :         /* Apply to A */
     237         227 :         PetscCall(HRApply(m,A+j+1+(ni-n0)*lda,1,A+j+1+(n-n1)*lda,1,cs,-sn));
     238         227 :         PetscCall(HRApply(m,A+ni-n0+(j+1)*lda,lda,A+n-n1+(j+1)*lda,lda,cs,-sn));
     239             : 
     240             :         /* Update Q */
     241         227 :         PetscCall(HRApply(n,Q+(ni-n0)*ldq,1,Q+(n-n1)*ldq,1,cs,-sn));
     242         227 :         if (type==2) {
     243          97 :           ss[ni-n0] = -ss[ni-n0]; ss[n-n1] = -ss[n-n1];
     244          97 :           n0++;ni++;n1--;
     245             :         }
     246             :       }
     247        4015 :       if (n0>0) n0--;
     248        1125 :       else n1--;
     249             :     }
     250             :   }
     251             : 
     252             :   /* flip matrices */
     253         324 :   if (flip) {
     254        4623 :     for (i=0;i<n-1;i++) {
     255        4303 :       d[i] = PetscRealPart(A[n-i-1+(n-i-1)*lda]);
     256        4303 :       e[i] = PetscRealPart(A[n-i-1+(n-i-2)*lda]);
     257        4303 :       s[i] = ss[n-i-1];
     258             :     }
     259         320 :     s[n-1] = ss[0];
     260         320 :     d[n-1] = PetscRealPart(A[0]);
     261        4943 :     for (i=0;i<n;i++) PetscCall(PetscArraycpy(work+i*n,Q+i*ldq,n));
     262        4943 :     for (i=0;i<n;i++)
     263      123102 :       for (j=0;j<n;j++)
     264      118479 :         Q[i+j*ldq] = work[i+(n-j-1)*n];
     265             :   } else {
     266          40 :     for (i=0;i<n-1;i++) {
     267          36 :       d[i] = PetscRealPart(A[i+i*lda]);
     268          36 :       e[i] = PetscRealPart(A[i+1+i*lda]);
     269          36 :       s[i] = ss[i];
     270             :     }
     271           4 :     s[n-1] = ss[n-1];
     272           4 :     d[n-1] = PetscRealPart(A[n-1 + (n-1)*lda]);
     273             :   }
     274         324 :   PetscFunctionReturn(PETSC_SUCCESS);
     275             : }
     276             : 
     277       10635 : static PetscErrorCode MadeHRtr(PetscInt sz,PetscInt n,PetscInt idx0,PetscInt n0,PetscInt idx1,PetscInt n1,struct HRtr *tr1,struct HRtr *tr2,PetscReal *ncond,PetscScalar *work)
     278             : {
     279       10635 :   PetscScalar    *x,*y;
     280       10635 :   PetscReal      ncond2=1.0;
     281       10635 :   PetscBLASInt   n0_,n1_,inc=1;
     282             : 
     283       10635 :   PetscFunctionBegin;
     284             :   /* Hyperbolic transformation to make zeros in x */
     285       10635 :   x = tr1->data;
     286       10635 :   tr1->n[0] = n0;
     287       10635 :   tr1->n[1] = n1;
     288       10635 :   tr1->idx[0] = idx0;
     289       10635 :   tr1->idx[1] = idx1;
     290       10635 :   PetscCall(PetscBLASIntCast(tr1->n[0],&n0_));
     291       10635 :   PetscCall(PetscBLASIntCast(tr1->n[1],&n1_));
     292       10635 :   if (tr1->n[0] > 1) PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0_,x+tr1->idx[0],x+tr1->idx[0]+1,&inc,tr1->tau));
     293       10635 :   if (tr1->n[1]> 1) PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1_,x+tr1->idx[1],x+tr1->idx[1]+1,&inc,tr1->tau+1));
     294       10635 :   if (tr1->idx[0]<tr1->idx[1]) PetscCall(HRGen(PetscRealPart(x[tr1->idx[0]]),PetscRealPart(x[tr1->idx[1]]),&tr1->type,&tr1->cs,&tr1->sn,&tr1->alpha,ncond));
     295             :   else {
     296        5099 :     tr1->alpha = PetscRealPart(x[tr1->idx[0]]);
     297        5099 :     *ncond = 1.0;
     298             :   }
     299       10635 :   if (sz==2) {
     300         578 :     y = tr2->data;
     301             :     /* Apply first transformation to second column */
     302         578 :     if (tr1->n[0] > 1 && PetscAbsScalar(tr1->tau[0])!=0.0) {
     303         524 :       x[tr1->idx[0]] = 1.0;
     304         524 :       PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&inc,x+tr1->idx[0],&inc,tr1->tau,y+tr1->idx[0],&n0_,work));
     305             :     }
     306         578 :     if (tr1->n[1] > 1 && PetscAbsScalar(tr1->tau[1])!=0.0) {
     307         466 :       x[tr1->idx[1]] = 1.0;
     308         466 :       PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&inc,x+tr1->idx[1],&inc,tr1->tau+1,y+tr1->idx[1],&n1_,work));
     309             :     }
     310         578 :     if (tr1->idx[0]<tr1->idx[1]) PetscCall(HRApply(1,y+tr1->idx[0],1,y+tr1->idx[1],1,tr1->cs,-tr1->sn));
     311         578 :     tr2->n[0] = tr1->n[0];
     312         578 :     tr2->n[1] = tr1->n[1];
     313         578 :     tr2->idx[0] = tr1->idx[0];
     314         578 :     tr2->idx[1] = tr1->idx[1];
     315         578 :     if (tr1->idx[0]<tr1->idx[1] && tr1->type==2) {
     316         289 :       tr2->idx[1]++; tr2->n[1]--; tr2->n[0]++;
     317             :     }
     318         578 :     if (tr2->n[0]>0) {
     319         578 :       tr2->n[0]--; tr2->idx[0]++;
     320         578 :       if (tr2->n[1]==0) tr2->idx[1] = tr2->idx[0];
     321             :     } else {
     322           0 :       tr2->n[1]--; tr2->idx[1]++; tr2->idx[0] = tr2->idx[1];
     323             :     }
     324             :     /* Hyperbolic transformation to make zeros in y */
     325         578 :     PetscCall(PetscBLASIntCast(tr2->n[0],&n0_));
     326         578 :     PetscCall(PetscBLASIntCast(tr2->n[1],&n1_));
     327         578 :     if (tr2->n[0] > 1) PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0_,y+tr2->idx[0],y+tr2->idx[0]+1,&inc,tr2->tau));
     328         578 :     if (tr2->n[1]> 1) PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1_,y+tr2->idx[1],y+tr2->idx[1]+1,&inc,tr2->tau+1));
     329         578 :     if (tr2->idx[0]<tr2->idx[1]) PetscCall(HRGen(PetscRealPart(y[tr2->idx[0]]),PetscRealPart(y[tr2->idx[1]]),&tr2->type,&tr2->cs,&tr2->sn,&tr2->alpha,&ncond2));
     330             :     else {
     331          83 :       tr2->alpha = PetscRealPart(y[tr2->idx[0]]);
     332          83 :       ncond2 = 1.0;
     333             :     }
     334         578 :     if (ncond2>*ncond) *ncond = ncond2;
     335             :   }
     336       10635 :   PetscFunctionReturn(PETSC_SUCCESS);
     337             : }
     338             : 
     339             : /*
     340             :   Auxiliary function to try perform one iteration of hr routine,
     341             :   checking condition number. If it is < tolD, apply the
     342             :   transformation to H and R, if not, ok=false and it do nothing
     343             :   tolE, tolerance to exchange complex pairs to improve conditioning
     344             : */
     345       10346 : static PetscErrorCode TryHRIt(PetscInt n,PetscInt j,PetscInt sz,PetscScalar *H,PetscInt ldh,PetscScalar *R,PetscInt ldr,PetscReal *s,PetscBool *exg,PetscBool *ok,PetscInt *n0,PetscInt *n1,PetscInt *idx0,PetscInt *idx1,PetscReal *cond,PetscScalar *work)
     346             : {
     347       10346 :   struct HRtr    *tr1,*tr2,tr1_t,tr2_t,tr1_te,tr2_te;
     348       10346 :   PetscScalar    *x,*y;
     349       10346 :   PetscReal      ncond=0,ncond_e;
     350       10346 :   PetscInt       nwu=0,i,d=1;
     351       10346 :   PetscBLASInt   n0_,n1_,inc=1,mh,mr,n_,ldr_,ldh_;
     352       10346 :   PetscReal      tolD = 1e+5;
     353             : 
     354       10346 :   PetscFunctionBegin;
     355       10346 :   if (cond) *cond = 1.0;
     356       10346 :   PetscCall(PetscBLASIntCast(n,&n_));
     357       10346 :   PetscCall(PetscBLASIntCast(ldr,&ldr_));
     358       10346 :   PetscCall(PetscBLASIntCast(ldh,&ldh_));
     359       10346 :   x = work+nwu;
     360       10346 :   nwu += n;
     361       10346 :   PetscCall(PetscArraycpy(x,R+j*ldr,n));
     362       10346 :   *exg = PETSC_FALSE;
     363       10346 :   *ok = PETSC_TRUE;
     364       10346 :   tr1_t.data = x;
     365       10346 :   if (sz==1) {
     366             :     /* Hyperbolic transformation to make zeros in x */
     367       10057 :     PetscCall(MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_t,NULL,&ncond,work+nwu));
     368             :     /* Check condition number to single column*/
     369       10057 :     if (ncond>tolD) *ok = PETSC_FALSE;
     370             :     tr1 = &tr1_t;
     371             :     tr2 = &tr2_t;
     372             :   } else {
     373         289 :     y = work+nwu;
     374         289 :     nwu += n;
     375         289 :     PetscCall(PetscArraycpy(y,R+(j+1)*ldr,n));
     376         289 :     tr2_t.data = y;
     377         289 :     PetscCall(MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_t,&tr2_t,&ncond,work+nwu));
     378             :     /* Computing hyperbolic transformations also for exchanged vectors */
     379         289 :     tr1_te.data = work+nwu;
     380         289 :     nwu += n;
     381         289 :     PetscCall(PetscArraycpy(tr1_te.data,R+(j+1)*ldr,n));
     382         289 :     tr2_te.data = work+nwu;
     383         289 :     nwu += n;
     384         289 :     PetscCall(PetscArraycpy(tr2_te.data,R+j*ldr,n));
     385         289 :     PetscCall(MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_te,&tr2_te,&ncond_e,work+nwu));
     386         289 :     if (ncond > d*ncond_e) {
     387         188 :       *exg = PETSC_TRUE;
     388         188 :       tr1 = &tr1_te;
     389         188 :       tr2 = &tr2_te;
     390         188 :       ncond = ncond_e;
     391             :     } else {
     392             :       tr1 = &tr1_t;
     393             :       tr2 = &tr2_t;
     394             :     }
     395         289 :     if (ncond>tolD) *ok = PETSC_FALSE;
     396             :   }
     397       10346 :   if (*ok) {
     398             :     /* Everything is OK, apply transformations to R and H */
     399             :     /* First column */
     400       10346 :     if (cond && *cond<ncond) *cond = ncond;
     401       10346 :     x = tr1->data;
     402       10346 :     PetscCall(PetscBLASIntCast(tr1->n[0],&n0_));
     403       10346 :     PetscCall(PetscBLASIntCast(tr1->n[1],&n1_));
     404       10346 :     PetscCall(PetscBLASIntCast(n-j-sz,&mr));
     405       10346 :     if (tr1->n[0] > 1 && PetscAbsScalar(tr1->tau[0])!=0.0) {
     406        9162 :       x[tr1->idx[0]] = 1.0;
     407        9162 :       PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&mr,x+tr1->idx[0],&inc,tr1->tau,R+(j+sz)*ldr+tr1->idx[0],&ldr_,work+nwu));
     408        9162 :       PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0_,x+tr1->idx[0],&inc,tr1->tau,H+(tr1->idx[0])*ldh,&ldh_,work+nwu));
     409             :     }
     410       10346 :     if (tr1->n[1] > 1 && PetscAbsScalar(tr1->tau[1])!=0.0) {
     411        5033 :       x[tr1->idx[1]] = 1.0;
     412        5033 :       PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&mr,x+tr1->idx[1],&inc,tr1->tau+1,R+(j+sz)*ldr+tr1->idx[1],&ldr_,work+nwu));
     413        5033 :       PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1_,x+tr1->idx[1],&inc,tr1->tau+1,H+(tr1->idx[1])*ldh,&ldh_,work+nwu));
     414             :     }
     415       10346 :     if (tr1->idx[0]<tr1->idx[1]) {
     416        5247 :       PetscCall(HRApply(mr,R+(j+sz)*ldr+tr1->idx[0],ldr,R+(j+sz)*ldr+tr1->idx[1],ldr,tr1->cs,-tr1->sn));
     417        5247 :       if (tr1->type==1) PetscCall(HRApply(n,H+(tr1->idx[0])*ldh,1,H+(tr1->idx[1])*ldh,1,tr1->cs,tr1->sn));
     418             :       else {
     419         977 :         PetscCall(HRApply(n,H+(tr1->idx[0])*ldh,1,H+(tr1->idx[1])*ldh,1,-tr1->cs,-tr1->sn));
     420         977 :         s[tr1->idx[0]] = -s[tr1->idx[0]];
     421         977 :         s[tr1->idx[1]] = -s[tr1->idx[1]];
     422             :       }
     423             :     }
     424      307526 :     for (i=0;i<tr1->idx[0];i++) *(R+j*ldr+i) = x[i];
     425      307520 :     for (i=tr1->idx[0]+1;i<n;i++) *(R+j*ldr+i) = 0.0;
     426       10346 :     *(R+j*ldr+tr1->idx[0]) = tr1->alpha;
     427       10346 :     if (sz==2) {
     428         289 :       y = tr2->data;
     429             :       /* Second column */
     430         289 :       PetscCall(PetscBLASIntCast(tr2->n[0],&n0_));
     431         289 :       PetscCall(PetscBLASIntCast(tr2->n[1],&n1_));
     432         289 :       PetscCall(PetscBLASIntCast(n-j-sz,&mr));
     433         289 :       PetscCall(PetscBLASIntCast(n-tr2->idx[0],&mh));
     434         289 :       if (tr2->n[0] > 1 && PetscAbsScalar(tr2->tau[0])!=0.0) {
     435         248 :         y[tr2->idx[0]] = 1.0;
     436         248 :         PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&mr,y+tr2->idx[0],&inc,tr2->tau,R+(j+2)*ldr+tr2->idx[0],&ldr_,work+nwu));
     437         248 :         PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0_,y+tr2->idx[0],&inc,tr2->tau,H+(tr2->idx[0])*ldh,&ldh_,work+nwu));
     438             :       }
     439         289 :       if (tr2->n[1] > 1 && PetscAbsScalar(tr2->tau[1])!=0.0) {
     440         219 :         y[tr2->idx[1]] = 1.0;
     441         219 :         PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&mr,y+tr2->idx[1],&inc,tr2->tau+1,R+(j+2)*ldr+tr2->idx[1],&ldr_,work+nwu));
     442         219 :         PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1_,y+tr2->idx[1],&inc,tr2->tau+1,H+(tr2->idx[1])*ldh,&ldh_,work+nwu));
     443             :       }
     444         289 :       if (tr2->idx[0]<tr2->idx[1]) {
     445         255 :         PetscCall(HRApply(mr,R+(j+2)*ldr+tr2->idx[0],ldr,R+(j+2)*ldr+tr2->idx[1],ldr,tr2->cs,-tr2->sn));
     446         255 :         if (tr2->type==1) PetscCall(HRApply(n,H+(tr2->idx[0])*ldh,1,H+(tr2->idx[1])*ldh,1,tr2->cs,tr2->sn));
     447             :         else {
     448         171 :           PetscCall(HRApply(n,H+(tr2->idx[0])*ldh,1,H+(tr2->idx[1])*ldh,1,-tr2->cs,-tr2->sn));
     449         171 :           s[tr2->idx[0]] = -s[tr2->idx[0]];
     450         171 :           s[tr2->idx[1]] = -s[tr2->idx[1]];
     451             :         }
     452             :       }
     453        9534 :       for (i=0;i<tr2->idx[0]-1;i++) *(R+(j+1)*ldr+i) = y[i];
     454         289 :       *(R+(j+1)*ldr+tr2->idx[0]-1) = y[tr2->idx[0]-1];
     455        9829 :       for (i=tr2->idx[0]+1;i<n;i++) *(R+(j+1)*ldr+i) = 0.0;
     456         289 :       *(R+(j+1)*ldr+tr2->idx[0]) = tr2->alpha;
     457         289 :       *n0 = tr2->n[0];
     458         289 :       *n1 = tr2->n[1];
     459         289 :       *idx0 = tr2->idx[0];
     460         289 :       *idx1 = tr2->idx[1];
     461         289 :       if (tr2->idx[0]<tr2->idx[1] && tr2->type==2) {
     462         171 :         (*idx1)++; (*n1)--; (*n0)++;
     463             :       }
     464             :     } else {
     465       10057 :       *n0 = tr1->n[0];
     466       10057 :       *n1 = tr1->n[1];
     467       10057 :       *idx0 = tr1->idx[0];
     468       10057 :       *idx1 = tr1->idx[1];
     469       10057 :       if (tr1->idx[0]<tr1->idx[1] && tr1->type==2) {
     470         880 :         (*idx1)++; (*n1)--; (*n0)++;
     471             :       }
     472             :     }
     473       10346 :     if (*n0>0) {
     474        9548 :       (*n0)--; (*idx0)++;
     475        9548 :       if (*n1==0) *idx1 = *idx0;
     476             :     } else {
     477         798 :       (*n1)--; (*idx1)++; *idx0 = *idx1;
     478             :     }
     479             :   }
     480       10346 :   PetscFunctionReturn(PETSC_SUCCESS);
     481             : }
     482             : 
     483             : /*
     484             :   compute V = HR whit H s-orthogonal and R upper triangular
     485             :   (work: work space of minimum size 6*nv)
     486             : */
     487         374 : PetscErrorCode DSPseudoOrthog_HR(PetscInt *nv,PetscScalar *V,PetscInt ldv,PetscReal *s,PetscScalar *R,PetscInt ldr,PetscBLASInt *perm,PetscBLASInt *cmplxEig,PetscBool *breakdown,PetscScalar *work)
     488             : {
     489         374 :   PetscInt       i,j,n,n0,n1,np,idx0,idx1,sz=1,k=0,t1,t2,nwu=0;
     490         374 :   PetscScalar    *col1,*col2;
     491         374 :   PetscBool      exg=PETSC_FALSE,ok=PETSC_FALSE;
     492             : 
     493         374 :   PetscFunctionBegin;
     494         374 :   n = *nv;
     495         374 :   col1 = work+nwu;
     496         374 :   nwu += n;
     497         374 :   col2 = work+nwu;
     498         374 :   nwu += n;
     499             :   /* Sort R and s according to sing(s) */
     500         374 :   np = 0;
     501       11009 :   for (i=0;i<n;i++) if (s[i]>0) np++;
     502         374 :   if (s[0]>0) n1 = np;
     503          65 :   else n1 = n-np;
     504         374 :   n0 = 0;
     505       11009 :   for (i=0;i<n;i++) {
     506       10635 :     if (s[i]==s[0]) {
     507        8689 :       s[n0] = s[0];
     508        8689 :       perm[n0++] = i;
     509        1946 :     } else perm[n1++] = i;
     510             :   }
     511        2320 :   for (i=n0;i<n;i++) s[i] = -s[0];
     512         374 :   n1 -= n0;
     513         374 :   idx0 = 0;
     514         374 :   idx1 = n0;
     515         374 :   if (idx1==n) idx1=idx0;
     516       11009 :   for (i=0;i<n;i++) {
     517      634698 :     for (j=0;j<n;j++) R[j*ldr+i] = V[j*ldv+perm[i]];
     518             :   }
     519             :   /* Initialize H */
     520       11009 :   for (i=0;i<n;i++) {
     521       10635 :     PetscCall(PetscArrayzero(V+i*ldv,n));
     522       10635 :     V[perm[i]+i*ldv] = 1.0;
     523             :   }
     524       11009 :   for (i=0;i<n;i++) perm[i] = i;
     525             :   j = 0;
     526       10720 :   while (j<n-k) {
     527       10346 :     if (cmplxEig[j]==0) sz=1;
     528         289 :     else sz=2;
     529       10346 :     PetscCall(TryHRIt(n,j,sz,V,ldv,R,ldr,s,&exg,&ok,&n0,&n1,&idx0,&idx1,NULL,work+nwu));
     530       10346 :     if (ok) {
     531       10346 :       if (exg) cmplxEig[j] = -cmplxEig[j];
     532       10346 :       j = j+sz;
     533             :     } else { /* to be discarded */
     534           0 :       k = k+1;
     535           0 :       if (cmplxEig[j]==0) {
     536           0 :         if (j<n) {
     537           0 :           t1 = perm[j];
     538           0 :           for (i=j;i<n-1;i++) perm[i] = perm[i+1];
     539           0 :           perm[n-1] = t1;
     540           0 :           t1 = cmplxEig[j];
     541           0 :           for (i=j;i<n-1;i++) cmplxEig[i] = cmplxEig[i+1];
     542           0 :           cmplxEig[n-1] = t1;
     543           0 :           PetscCall(PetscArraycpy(col1,R+j*ldr,n));
     544           0 :           for (i=j;i<n-1;i++) PetscCall(PetscArraycpy(R+i*ldr,R+(i+1)*ldr,n));
     545           0 :           PetscCall(PetscArraycpy(R+(n-1)*ldr,col1,n));
     546             :         }
     547             :       } else {
     548           0 :         k = k+1;
     549           0 :         if (j<n-1) {
     550           0 :           t1 = perm[j]; t2 = perm[j+1];
     551           0 :           for (i=j;i<n-2;i++) perm[i] = perm[i+2];
     552           0 :           perm[n-2] = t1; perm[n-1] = t2;
     553           0 :           t1 = cmplxEig[j]; t2 = cmplxEig[j+1];
     554           0 :           for (i=j;i<n-2;i++) cmplxEig[i] = cmplxEig[i+2];
     555           0 :           cmplxEig[n-2] = t1; cmplxEig[n-1] = t2;
     556           0 :           PetscCall(PetscArraycpy(col1,R+j*ldr,n));
     557           0 :           PetscCall(PetscArraycpy(col2,R+(j+1)*ldr,n));
     558           0 :           for (i=j;i<n-2;i++) PetscCall(PetscArraycpy(R+i*ldr,R+(i+2)*ldr,n));
     559           0 :           PetscCall(PetscArraycpy(R+(n-2)*ldr,col1,n));
     560       10720 :           PetscCall(PetscArraycpy(R+(n-1)*ldr,col2,n));
     561             :         }
     562             :       }
     563             :     }
     564             :   }
     565         374 :   if (k!=0) {
     566           0 :     if (breakdown) *breakdown = PETSC_TRUE;
     567           0 :     *nv = n-k;
     568             :   }
     569         374 :   PetscFunctionReturn(PETSC_SUCCESS);
     570             : }
     571             : 
     572         372 : PetscErrorCode DSGHIEPOrthogEigenv(DS ds,DSMatType mat,PetscScalar *wr,PetscScalar *wi,PetscBool accum)
     573             : {
     574         372 :   PetscInt          lws,nwus=0,nwui=0,lwi,off,n,nv,ld,i,ldr,l;
     575         372 :   const PetscScalar *B,*W;
     576         372 :   PetscScalar       *Q,*X,*R,*ts,szero=0.0,sone=1.0;
     577         372 :   PetscReal         *s,vi,vr,tr,*d,*e;
     578         372 :   PetscBLASInt      ld_,n_,nv_,*perm,*cmplxEig;
     579             : 
     580         372 :   PetscFunctionBegin;
     581         372 :   l = ds->l;
     582         372 :   n = ds->n-l;
     583         372 :   PetscCall(PetscBLASIntCast(n,&n_));
     584         372 :   ld = ds->ld;
     585         372 :   PetscCall(PetscBLASIntCast(ld,&ld_));
     586         372 :   off = l*ld+l;
     587         372 :   PetscCall(DSGetArrayReal(ds,DS_MAT_D,&s));
     588         372 :   if (!ds->compact) {
     589           4 :     PetscCall(MatDenseGetArrayRead(ds->omat[DS_MAT_B],&B));
     590          44 :     for (i=l;i<ds->n;i++) s[i] = PetscRealPart(B[i*ld+i]);
     591           4 :     PetscCall(MatDenseRestoreArrayRead(ds->omat[DS_MAT_B],&B));
     592             :   }
     593         372 :   lws = n*n+7*n;
     594         372 :   lwi = 2*n;
     595         372 :   PetscCall(DSAllocateWork_Private(ds,lws,0,lwi));
     596         372 :   R = ds->work+nwus;
     597         372 :   nwus += n*n;
     598         372 :   ldr = n;
     599         372 :   perm = ds->iwork + nwui;
     600         372 :   nwui += n;
     601         372 :   cmplxEig = ds->iwork+nwui;
     602         372 :   PetscCall(MatDenseGetArray(ds->omat[mat],&X));
     603       10702 :   for (i=0;i<n;i++) {
     604             : #if defined(PETSC_USE_COMPLEX)
     605       10330 :     vi = PetscImaginaryPart(wr[l+i]);
     606             : #else
     607             :     vi = wi?PetscRealPart(wi[l+i]):0.0;
     608             : #endif
     609       10330 :     if (vi!=0) {
     610         289 :       cmplxEig[i] = 1;
     611         289 :       cmplxEig[i+1] = 2;
     612         289 :       i++;
     613       10041 :     } else cmplxEig[i] = 0;
     614             :   }
     615         372 :   nv = n;
     616             : 
     617             :   /* Perform HR decomposition */
     618             :   /* Hyperbolic rotators */
     619         372 :   PetscCall(DSPseudoOrthog_HR(&nv,X+off,ld,s+l,R,ldr,perm,cmplxEig,NULL,ds->work+nwus));
     620             :   /* Sort wr,wi perm */
     621         372 :   ts = ds->work+nwus;
     622         372 :   PetscCall(PetscArraycpy(ts,wr+l,n));
     623       10991 :   for (i=0;i<n;i++) wr[i+l] = ts[perm[i]];
     624             : #if !defined(PETSC_USE_COMPLEX)
     625             :   if (wi) {
     626             :     PetscCall(PetscArraycpy(ts,wi+l,n));
     627             :     for (i=0;i<n;i++) wi[i+l] = ts[perm[i]];
     628             :   }
     629             : #endif
     630             :   /* Projected Matrix */
     631         372 :   PetscCall(DSGetArrayReal(ds,DS_MAT_T,&d));
     632         372 :   PetscCall(PetscArrayzero(d+2*ld,ld));
     633         372 :   e = d+ld;
     634         372 :   d[l+nv-1] = PetscRealPart(wr[l+nv-1]*s[l+nv-1]);
     635       10356 :   for (i=0;i<nv-1;i++) {
     636        9984 :     if (cmplxEig[i]==0) { /* Real */
     637        9695 :       d[l+i] = PetscRealPart(wr[l+i]*s[l+i]);
     638        9695 :       e[l+i] = 0.0;
     639             :     } else {
     640         289 :       vr = PetscRealPart(wr[l+i]);
     641             : #if defined(PETSC_USE_COMPLEX)
     642         289 :       vi = PetscImaginaryPart(wr[l+i]);
     643             : #else
     644             :       vi = wi?PetscRealPart(wi[l+i]):0.0;
     645             : #endif
     646         289 :       if (cmplxEig[i]==-1) vi = -vi;
     647         289 :       tr = PetscRealPart((R[i+(i+1)*ldr]/R[i+i*ldr]))*vi;
     648         289 :       d[l+i] = (vr-tr)*s[l+i];
     649         289 :       d[l+i+1] = (vr+tr)*s[l+i+1];
     650         289 :       e[l+i] = PetscRealPart(s[l+i]*(R[(i+1)+(i+1)*ldr]/R[i+i*ldr])*vi);
     651         289 :       if (i<nv-2) e[l+i+1] = 0.0;
     652             :       i++;
     653             :     }
     654             :   }
     655         372 :   PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));
     656         372 :   PetscCall(DSRestoreArrayReal(ds,DS_MAT_D,&s));
     657             :   /* accumulate previous Q */
     658         372 :   if (accum) {
     659         369 :     PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
     660         369 :     PetscCall(PetscBLASIntCast(nv,&nv_));
     661         369 :     PetscCall(DSAllocateMat_Private(ds,DS_MAT_W));
     662         369 :     PetscCall(MatCopy(ds->omat[DS_MAT_Q],ds->omat[DS_MAT_W],SAME_NONZERO_PATTERN));
     663         369 :     PetscCall(MatDenseGetArrayRead(ds->omat[DS_MAT_W],&W));
     664         369 :     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,W+off,&ld_,X+off,&ld_,&szero,Q+off,&ld_));
     665         369 :     PetscCall(MatDenseRestoreArrayRead(ds->omat[DS_MAT_W],&W));
     666         369 :     PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
     667             :   }
     668         372 :   ds->t = nv+l;
     669         372 :   PetscCall(MatDenseRestoreArray(ds->omat[mat],&X));
     670         372 :   if (!ds->compact) PetscCall(DSSwitchFormat_GHIEP(ds,PETSC_FALSE));
     671         372 :   PetscFunctionReturn(PETSC_SUCCESS);
     672             : }
     673             : 
     674             : /*
     675             :    Reduce to tridiagonal-diagonal pair by means of TridiagDiag_HHR.
     676             : */
     677         372 : PetscErrorCode DSIntermediate_GHIEP(DS ds)
     678             : {
     679         372 :   PetscInt       i,ld,off;
     680         372 :   PetscInt       nwall,nwallr,nwalli;
     681         372 :   PetscScalar    *A,*B,*Q;
     682         372 :   PetscReal      *d,*e,*s;
     683             : 
     684         372 :   PetscFunctionBegin;
     685         372 :   ld = ds->ld;
     686         372 :   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
     687         372 :   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_B],&B));
     688         372 :   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
     689         372 :   PetscCall(DSGetArrayReal(ds,DS_MAT_T,&d));
     690         372 :   PetscCall(DSGetArrayReal(ds,DS_MAT_D,&s));
     691         372 :   e = d+ld;
     692         372 :   off = ds->l+ds->l*ld;
     693         372 :   PetscCall(PetscArrayzero(Q,ld*ld));
     694         372 :   nwall = ld*ld+ld;
     695         372 :   nwallr = ld;
     696         372 :   nwalli = ld;
     697         372 :   PetscCall(DSAllocateWork_Private(ds,nwall,nwallr,nwalli));
     698       12197 :   for (i=0;i<ds->n;i++) Q[i+i*ld]=1.0;
     699       10991 :   for (i=0;i<ds->n-ds->l;i++) *(ds->perm+i)=i;
     700         372 :   if (ds->compact) {
     701         368 :     if (ds->state < DS_STATE_INTERMEDIATE) {
     702         320 :       PetscCall(DSSwitchFormat_GHIEP(ds,PETSC_FALSE));
     703         320 :       PetscCall(TridiagDiag_HHR(ds->k-ds->l+1,A+off,ld,s+ds->l,Q+off,ld,PETSC_TRUE,d+ds->l,e+ds->l,ds->perm,ds->work,ds->rwork,ds->iwork));
     704         320 :       ds->k = ds->l;
     705         320 :       PetscCall(PetscArrayzero(d+2*ld+ds->l,ds->n-ds->l));
     706             :     }
     707             :   } else {
     708           4 :     if (ds->state < DS_STATE_INTERMEDIATE) {
     709          44 :       for (i=0;i<ds->n;i++) s[i] = PetscRealPart(B[i+i*ld]);
     710           4 :       PetscCall(TridiagDiag_HHR(ds->n-ds->l,A+off,ld,s+ds->l,Q+off,ld,PETSC_FALSE,d+ds->l,e+ds->l,ds->perm,ds->work,ds->rwork,ds->iwork));
     711           4 :       PetscCall(PetscArrayzero(d+2*ld,ds->n));
     712           4 :       ds->k = ds->l;
     713           4 :       PetscCall(DSSwitchFormat_GHIEP(ds,PETSC_FALSE));
     714           0 :     } else PetscCall(DSSwitchFormat_GHIEP(ds,PETSC_TRUE));
     715             :   }
     716         372 :   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
     717         372 :   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_B],&B));
     718         372 :   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
     719         372 :   PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));
     720         372 :   PetscCall(DSRestoreArrayReal(ds,DS_MAT_D,&s));
     721         372 :   PetscFunctionReturn(PETSC_SUCCESS);
     722             : }

Generated by: LCOV version 1.14