LCOV - code coverage report
Current view: top level - sys/classes/ds/impls/ghiep - invit.c (source / functions) Hit Total Coverage
Test: SLEPc Lines: 439 482 91.1 %
Date: 2024-11-23 00:39:48 Functions: 8 8 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       3             :    SLEPc - Scalable Library for Eigenvalue Problem Computations
       4             :    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
       5             : 
       6             :    This file is part of SLEPc.
       7             :    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
       8             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       9             : */
      10             : 
      11             : #include <slepc/private/dsimpl.h>
      12             : #include <slepcblaslapack.h>
      13             : 
      14             : struct HRtr
      15             : {
      16             :   PetscScalar *data;
      17             :   PetscInt    m;
      18             :   PetscInt    idx[2];
      19             :   PetscInt    n[2];
      20             :   PetscScalar tau[2];
      21             :   PetscReal   alpha;
      22             :   PetscReal   cs;
      23             :   PetscReal   sn;
      24             :   PetscInt    type;
      25             : };
      26             : 
      27             : /*
      28             :   Generates a hyperbolic rotation
      29             :     if x1*x1 - x2*x2 != 0
      30             :       r = sqrt(|x1*x1 - x2*x2|)
      31             :       c = x1/r  s = x2/r
      32             : 
      33             :       | c -s||x1|   |d*r|
      34             :       |-s  c||x2| = | 0 |
      35             :       where d = 1 for type==1 and -1 for type==2
      36             :   Returns the condition number of the reduction
      37             : */
      38        6341 : static PetscErrorCode HRGen(PetscReal x1,PetscReal x2,PetscInt *type,PetscReal *c,PetscReal *s,PetscReal *r,PetscReal *cond)
      39             : {
      40        6341 :   PetscReal t,n2,xa,xb;
      41        6341 :   PetscInt  type_;
      42             : 
      43        6341 :   PetscFunctionBegin;
      44        6341 :   if (x2==0.0) {
      45           0 :     *r = PetscAbsReal(x1); *c = (x1>=0.0)?1.0:-1.0; *s = 0.0;
      46           0 :     if (type) *type = 1;
      47           0 :     PetscFunctionReturn(PETSC_SUCCESS);
      48             :   }
      49        6341 :   if (PetscAbsReal(x1) == PetscAbsReal(x2)) {
      50             :     /* hyperbolic rotation doesn't exist */
      51           0 :     *c = *s = *r = 0.0;
      52           0 :     if (type) *type = 0;
      53           0 :     *cond = PETSC_MAX_REAL;
      54           0 :     PetscFunctionReturn(PETSC_SUCCESS);
      55             :   }
      56             : 
      57        6341 :   if (PetscAbsReal(x1)>PetscAbsReal(x2)) {
      58             :     xa = x1; xb = x2; type_ = 1;
      59             :   } else {
      60        1533 :     xa = x2; xb = x1; type_ = 2;
      61             :   }
      62        6341 :   t = xb/xa;
      63        6341 :   n2 = PetscAbsReal(1 - t*t);
      64        6341 :   *r = PetscSqrtReal(n2)*PetscAbsReal(xa);
      65        6341 :   *c = x1/(*r);
      66        6341 :   *s = x2/(*r);
      67        6341 :   if (type_ == 2) *r *= -1;
      68        6341 :   if (type) *type = type_;
      69        6341 :   if (cond) *cond = (PetscAbsReal(*c) + PetscAbsReal(*s))/PetscAbsReal(PetscAbsReal(*c) - PetscAbsReal(*s));
      70        6341 :   PetscFunctionReturn(PETSC_SUCCESS);
      71             : }
      72             : 
      73             : /*
      74             :                                 |c  s|
      75             :   Applies an hyperbolic rotator |s  c|
      76             :            |c  s|
      77             :     [x1 x2]|s  c|
      78             : */
      79       12420 : static PetscErrorCode HRApply(PetscInt n,PetscScalar *x1,PetscInt inc1,PetscScalar *x2,PetscInt inc2,PetscReal c,PetscReal s)
      80             : {
      81       12420 :   PetscInt    i;
      82       12420 :   PetscReal   t;
      83       12420 :   PetscScalar tmp;
      84             : 
      85       12420 :   PetscFunctionBegin;
      86       12420 :   if (PetscAbsReal(c)>PetscAbsReal(s)) { /* Type I */
      87        9526 :     t = s/c;
      88      693636 :     for (i=0;i<n;i++) {
      89      684110 :       x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
      90      684110 :       x2[i*inc2] = t*x1[i*inc1] + x2[i*inc2]/c;
      91             :     }
      92             :   } else { /* Type II */
      93        2894 :     t = c/s;
      94       92695 :     for (i=0;i<n;i++) {
      95       89801 :       tmp = x1[i*inc1];
      96       89801 :       x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
      97       89801 :       x2[i*inc2] = t*x1[i*inc1] + tmp/s;
      98             :     }
      99             :   }
     100       12420 :   PetscFunctionReturn(PETSC_SUCCESS);
     101             : }
     102             : 
     103             : /*
     104             :   Reduction to tridiagonal-diagonal form (see F. Tisseur, SIMAX 26(1), 2004).
     105             : 
     106             :   Input:
     107             :     A symmetric (only lower triangular part is referred)
     108             :     s vector +1 and -1 (signature matrix)
     109             :   Output:
     110             :     d,e
     111             :     s
     112             :     Q s-orthogonal matrix with Q^T*A*Q = T (symmetric tridiagonal matrix)
     113             : */
     114         323 : static PetscErrorCode TridiagDiag_HHR(PetscInt n,PetscScalar *A,PetscInt lda,PetscReal *s,PetscScalar* Q,PetscInt ldq,PetscBool flip,PetscReal *d,PetscReal *e,PetscInt *perm_,PetscScalar *work,PetscReal *rwork,PetscBLASInt *iwork)
     115             : {
     116         323 :   PetscInt       i,j,k,*ii,*jj,i0=0,ik=0,type;
     117         323 :   PetscInt       nwu=0;
     118         323 :   PetscReal      *ss,cond=1.0,cs,sn,r;
     119         323 :   PetscScalar    tau,t,*AA;
     120         323 :   PetscBLASInt   n0,n1,ni,inc=1,m,n_,lda_,ldq_,*perm,tmp;
     121         323 :   PetscBool      breakdown = PETSC_TRUE;
     122             : 
     123         323 :   PetscFunctionBegin;
     124         323 :   if (n<3) {
     125           0 :     if (n==1) Q[0]=1;
     126           0 :     if (n==2) {
     127           0 :       Q[0] = Q[1+ldq] = 1;
     128           0 :       Q[1] = Q[ldq] = 0;
     129             :     }
     130           0 :     PetscFunctionReturn(PETSC_SUCCESS);
     131             :   }
     132         323 :   PetscCall(PetscBLASIntCast(lda,&lda_));
     133         323 :   PetscCall(PetscBLASIntCast(n,&n_));
     134         323 :   PetscCall(PetscBLASIntCast(ldq,&ldq_));
     135         323 :   ss = rwork;
     136         323 :   perm = iwork;
     137         323 :   AA = work;
     138        4965 :   for (i=0;i<n;i++) PetscCall(PetscArraycpy(AA+i*n,A+i*lda,n));
     139         323 :   nwu += n*n;
     140         323 :   k=0;
     141         646 :   while (breakdown && k<n) {
     142         323 :     breakdown = PETSC_FALSE;
     143             :     /* Classify (and flip) A and s according to sign */
     144         323 :     if (flip) {
     145        4921 :       for (i=0;i<n;i++) {
     146        4602 :         PetscCall(PetscBLASIntCast(n-1-perm_[i],&perm[i]));
     147        4602 :         if (perm[i]==0) i0 = i;
     148        4602 :         if (perm[i]==k) ik = i;
     149             :       }
     150             :     } else {
     151          44 :       for (i=0;i<n;i++) {
     152          40 :         PetscCall(PetscBLASIntCast(perm_[i],&perm[i]));
     153          40 :         if (perm[i]==0) i0 = i;
     154          40 :         if (perm[i]==k) ik = i;
     155             :       }
     156             :     }
     157         323 :     perm[ik] = 0;
     158         323 :     PetscCall(PetscBLASIntCast(k,&perm[i0]));
     159         323 :     i=1;
     160        3244 :     while (i<n-1 && s[perm[i-1]]==s[perm[0]]) {
     161        2921 :       if (s[perm[i]]!=s[perm[0]]) {
     162         716 :         j=i+1;
     163        2333 :         while (j<n-1 && s[perm[j]]!=s[perm[0]])j++;
     164         716 :         tmp = perm[i]; perm[i] = perm[j]; perm[j] = tmp;
     165             :       }
     166        2921 :       i++;
     167             :     }
     168        4965 :     for (i=0;i<n;i++) {
     169        4642 :       ss[i] = s[perm[i]];
     170             :     }
     171         323 :     if (flip) {
     172             :       ii = &j;
     173             :       jj = &i;
     174             :     } else {
     175           4 :       ii = &i;
     176           4 :       jj = &j;
     177             :     }
     178        4965 :     for (i=0;i<n;i++)
     179      122778 :       for (j=0;j<n;j++)
     180      118136 :         A[i+j*lda] = AA[perm[*ii]+perm[*jj]*n];
     181             :     /* Initialize Q */
     182        4965 :     for (i=0;i<n;i++) {
     183        4642 :       PetscCall(PetscArrayzero(Q+i*ldq,n));
     184        4642 :       Q[perm[i]+i*ldq] = 1.0;
     185             :     }
     186        3315 :     for (ni=1;ni<n && ss[ni]==ss[0]; ni++);
     187         323 :     n0 = ni-1;
     188         323 :     n1 = n_-ni;
     189        4319 :     for (j=0;j<n-2;j++) {
     190        3996 :       PetscCall(PetscBLASIntCast(n-j-1,&m));
     191             :       /* Forming and applying reflectors */
     192        3996 :       if (n0 > 1) {
     193        2821 :         PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0,A+ni-n0+j*lda,A+ni-n0+j*lda+1,&inc,&tau));
     194             :         /* Apply reflector */
     195        2821 :         if (PetscAbsScalar(tau) != 0.0) {
     196        2821 :           t=*(A+ni-n0+j*lda);  *(A+ni-n0+j*lda)=1.0;
     197        2821 :           PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&m,&n0,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu));
     198        2821 :           PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0,&m,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu));
     199             :           /* Update Q */
     200        2821 :           PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0,A+ni-n0+j*lda,&inc,&tau,Q+(j+1)*ldq,&ldq_,work+nwu));
     201        2821 :           *(A+ni-n0+j*lda) = t;
     202       46962 :           for (i=1;i<n0;i++) {
     203       44141 :             *(A+ni-n0+j*lda+i) = 0.0;  *(A+j+(ni-n0+i)*lda) = 0.0;
     204             :           }
     205        2821 :           *(A+j+(ni-n0)*lda) = *(A+ni-n0+j*lda);
     206             :         }
     207             :       }
     208        3996 :       if (n1 > 1) {
     209        1296 :         PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1,A+n-n1+j*lda,A+n-n1+j*lda+1,&inc,&tau));
     210             :         /* Apply reflector */
     211        1296 :         if (PetscAbsScalar(tau) != 0.0) {
     212        1296 :           t=*(A+n-n1+j*lda);  *(A+n-n1+j*lda)=1.0;
     213        1296 :           PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&m,&n1,A+n-n1+j*lda,&inc,&tau,A+j+1+(n-n1)*lda,&lda_,work+nwu));
     214        1296 :           PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1,&m,A+n-n1+j*lda,&inc,&tau,A+n-n1+(j+1)*lda,&lda_,work+nwu));
     215             :           /* Update Q */
     216        1296 :           PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1,A+n-n1+j*lda,&inc,&tau,Q+(n-n1)*ldq,&ldq_,work+nwu));
     217        1296 :           *(A+n-n1+j*lda) = t;
     218        9355 :           for (i=1;i<n1;i++) {
     219        8059 :             *(A+n-n1+i+j*lda) = 0.0;  *(A+j+(n-n1+i)*lda) = 0.0;
     220             :           }
     221        1296 :           *(A+j+(n-n1)*lda) = *(A+n-n1+j*lda);
     222             :         }
     223             :       }
     224             :       /* Hyperbolic rotation */
     225        3996 :       if (n0 > 0 && n1 > 0) {
     226         228 :         PetscCall(HRGen(PetscRealPart(A[ni-n0+j*lda]),PetscRealPart(A[n-n1+j*lda]),&type,&cs,&sn,&r,&cond));
     227             :         /* Check condition number */
     228         228 :         if (cond > 1.0/(10*PETSC_SQRT_MACHINE_EPSILON)) {
     229           0 :           breakdown = PETSC_TRUE;
     230           0 :           k++;
     231           0 :           PetscCheck(k<n && !flip,PETSC_COMM_SELF,PETSC_ERR_SUP,"Breakdown in construction of hyperbolic transformation");
     232             :           break;
     233             :         }
     234         228 :         A[ni-n0+j*lda] = r; A[n-n1+j*lda] = 0.0;
     235         228 :         A[j+(ni-n0)*lda] = r; A[j+(n-n1)*lda] = 0.0;
     236             :         /* Apply to A */
     237         228 :         PetscCall(HRApply(m,A+j+1+(ni-n0)*lda,1,A+j+1+(n-n1)*lda,1,cs,-sn));
     238         228 :         PetscCall(HRApply(m,A+ni-n0+(j+1)*lda,lda,A+n-n1+(j+1)*lda,lda,cs,-sn));
     239             : 
     240             :         /* Update Q */
     241         228 :         PetscCall(HRApply(n,Q+(ni-n0)*ldq,1,Q+(n-n1)*ldq,1,cs,-sn));
     242         228 :         if (type==2) {
     243          97 :           ss[ni-n0] = -ss[ni-n0]; ss[n-n1] = -ss[n-n1];
     244          97 :           n0++;ni++;n1--;
     245             :         }
     246             :       }
     247        3996 :       if (n0>0) n0--;
     248        1125 :       else n1--;
     249             :     }
     250             :   }
     251             : 
     252             :   /* flip matrices */
     253         323 :   if (flip) {
     254        4602 :     for (i=0;i<n-1;i++) {
     255        4283 :       d[i] = PetscRealPart(A[n-i-1+(n-i-1)*lda]);
     256        4283 :       e[i] = PetscRealPart(A[n-i-1+(n-i-2)*lda]);
     257        4283 :       s[i] = ss[n-i-1];
     258             :     }
     259         319 :     s[n-1] = ss[0];
     260         319 :     d[n-1] = PetscRealPart(A[0]);
     261        4921 :     for (i=0;i<n;i++) PetscCall(PetscArraycpy(work+i*n,Q+i*ldq,n));
     262        4921 :     for (i=0;i<n;i++)
     263      122338 :       for (j=0;j<n;j++)
     264      117736 :         Q[i+j*ldq] = work[i+(n-j-1)*n];
     265             :   } else {
     266          40 :     for (i=0;i<n-1;i++) {
     267          36 :       d[i] = PetscRealPart(A[i+i*lda]);
     268          36 :       e[i] = PetscRealPart(A[i+1+i*lda]);
     269          36 :       s[i] = ss[i];
     270             :     }
     271           4 :     s[n-1] = ss[n-1];
     272           4 :     d[n-1] = PetscRealPart(A[n-1 + (n-1)*lda]);
     273             :   }
     274         323 :   PetscFunctionReturn(PETSC_SUCCESS);
     275             : }
     276             : 
     277       10615 : static PetscErrorCode MadeHRtr(PetscInt sz,PetscInt n,PetscInt idx0,PetscInt n0,PetscInt idx1,PetscInt n1,struct HRtr *tr1,struct HRtr *tr2,PetscReal *ncond,PetscScalar *work)
     278             : {
     279       10615 :   PetscScalar    *x,*y;
     280       10615 :   PetscReal      ncond2=1.0;
     281       10615 :   PetscBLASInt   n0_,n1_,inc=1;
     282             : 
     283       10615 :   PetscFunctionBegin;
     284             :   /* Hyperbolic transformation to make zeros in x */
     285       10615 :   x = tr1->data;
     286       10615 :   tr1->n[0] = n0;
     287       10615 :   tr1->n[1] = n1;
     288       10615 :   tr1->idx[0] = idx0;
     289       10615 :   tr1->idx[1] = idx1;
     290       10615 :   PetscCall(PetscBLASIntCast(tr1->n[0],&n0_));
     291       10615 :   PetscCall(PetscBLASIntCast(tr1->n[1],&n1_));
     292       10615 :   if (tr1->n[0] > 1) PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0_,x+tr1->idx[0],x+tr1->idx[0]+1,&inc,tr1->tau));
     293       10615 :   if (tr1->n[1]> 1) PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1_,x+tr1->idx[1],x+tr1->idx[1]+1,&inc,tr1->tau+1));
     294       10615 :   if (tr1->idx[0]<tr1->idx[1]) PetscCall(HRGen(PetscRealPart(x[tr1->idx[0]]),PetscRealPart(x[tr1->idx[1]]),&tr1->type,&tr1->cs,&tr1->sn,&tr1->alpha,ncond));
     295             :   else {
     296        4997 :     tr1->alpha = PetscRealPart(x[tr1->idx[0]]);
     297        4997 :     *ncond = 1.0;
     298             :   }
     299       10615 :   if (sz==2) {
     300         578 :     y = tr2->data;
     301             :     /* Apply first transformation to second column */
     302         578 :     if (tr1->n[0] > 1 && PetscAbsScalar(tr1->tau[0])!=0.0) {
     303         524 :       x[tr1->idx[0]] = 1.0;
     304         524 :       PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&inc,x+tr1->idx[0],&inc,tr1->tau,y+tr1->idx[0],&n0_,work));
     305             :     }
     306         578 :     if (tr1->n[1] > 1 && PetscAbsScalar(tr1->tau[1])!=0.0) {
     307         466 :       x[tr1->idx[1]] = 1.0;
     308         466 :       PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&inc,x+tr1->idx[1],&inc,tr1->tau+1,y+tr1->idx[1],&n1_,work));
     309             :     }
     310         578 :     if (tr1->idx[0]<tr1->idx[1]) PetscCall(HRApply(1,y+tr1->idx[0],1,y+tr1->idx[1],1,tr1->cs,-tr1->sn));
     311         578 :     tr2->n[0] = tr1->n[0];
     312         578 :     tr2->n[1] = tr1->n[1];
     313         578 :     tr2->idx[0] = tr1->idx[0];
     314         578 :     tr2->idx[1] = tr1->idx[1];
     315         578 :     if (tr1->idx[0]<tr1->idx[1] && tr1->type==2) {
     316         289 :       tr2->idx[1]++; tr2->n[1]--; tr2->n[0]++;
     317             :     }
     318         578 :     if (tr2->n[0]>0) {
     319         578 :       tr2->n[0]--; tr2->idx[0]++;
     320         578 :       if (tr2->n[1]==0) tr2->idx[1] = tr2->idx[0];
     321             :     } else {
     322           0 :       tr2->n[1]--; tr2->idx[1]++; tr2->idx[0] = tr2->idx[1];
     323             :     }
     324             :     /* Hyperbolic transformation to make zeros in y */
     325         578 :     PetscCall(PetscBLASIntCast(tr2->n[0],&n0_));
     326         578 :     PetscCall(PetscBLASIntCast(tr2->n[1],&n1_));
     327         578 :     if (tr2->n[0] > 1) PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0_,y+tr2->idx[0],y+tr2->idx[0]+1,&inc,tr2->tau));
     328         578 :     if (tr2->n[1]> 1) PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1_,y+tr2->idx[1],y+tr2->idx[1]+1,&inc,tr2->tau+1));
     329         578 :     if (tr2->idx[0]<tr2->idx[1]) PetscCall(HRGen(PetscRealPart(y[tr2->idx[0]]),PetscRealPart(y[tr2->idx[1]]),&tr2->type,&tr2->cs,&tr2->sn,&tr2->alpha,&ncond2));
     330             :     else {
     331          83 :       tr2->alpha = PetscRealPart(y[tr2->idx[0]]);
     332          83 :       ncond2 = 1.0;
     333             :     }
     334         578 :     if (ncond2>*ncond) *ncond = ncond2;
     335             :   }
     336       10615 :   PetscFunctionReturn(PETSC_SUCCESS);
     337             : }
     338             : 
     339             : /*
     340             :   Auxiliary function to try perform one iteration of hr routine,
     341             :   checking condition number. If it is < tolD, apply the
     342             :   transformation to H and R, if not, ok=false and it do nothing
     343             :   tolE, tolerance to exchange complex pairs to improve conditioning
     344             : */
     345       10326 : static PetscErrorCode TryHRIt(PetscInt n,PetscInt j,PetscInt sz,PetscScalar *H,PetscInt ldh,PetscScalar *R,PetscInt ldr,PetscReal *s,PetscBool *exg,PetscBool *ok,PetscInt *n0,PetscInt *n1,PetscInt *idx0,PetscInt *idx1,PetscReal *cond,PetscScalar *work)
     346             : {
     347       10326 :   struct HRtr    *tr1,*tr2,tr1_t,tr2_t,tr1_te,tr2_te;
     348       10326 :   PetscScalar    *x,*y;
     349       10326 :   PetscReal      ncond=0,ncond_e;
     350       10326 :   PetscInt       nwu=0,i,d=1;
     351       10326 :   PetscBLASInt   n0_,n1_,inc=1,mh,mr,n_,ldr_,ldh_;
     352       10326 :   PetscReal      tolD = 1e+5;
     353             : 
     354       10326 :   PetscFunctionBegin;
     355       10326 :   if (cond) *cond = 1.0;
     356       10326 :   PetscCall(PetscBLASIntCast(n,&n_));
     357       10326 :   PetscCall(PetscBLASIntCast(ldr,&ldr_));
     358       10326 :   PetscCall(PetscBLASIntCast(ldh,&ldh_));
     359       10326 :   x = work+nwu;
     360       10326 :   nwu += n;
     361       10326 :   PetscCall(PetscArraycpy(x,R+j*ldr,n));
     362       10326 :   *exg = PETSC_FALSE;
     363       10326 :   *ok = PETSC_TRUE;
     364       10326 :   tr1_t.data = x;
     365       10326 :   if (sz==1) {
     366             :     /* Hyperbolic transformation to make zeros in x */
     367       10037 :     PetscCall(MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_t,NULL,&ncond,work+nwu));
     368             :     /* Check condition number to single column*/
     369       10037 :     if (ncond>tolD) *ok = PETSC_FALSE;
     370             :     tr1 = &tr1_t;
     371             :     tr2 = &tr2_t;
     372             :   } else {
     373         289 :     y = work+nwu;
     374         289 :     nwu += n;
     375         289 :     PetscCall(PetscArraycpy(y,R+(j+1)*ldr,n));
     376         289 :     tr2_t.data = y;
     377         289 :     PetscCall(MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_t,&tr2_t,&ncond,work+nwu));
     378             :     /* Computing hyperbolic transformations also for exchanged vectors */
     379         289 :     tr1_te.data = work+nwu;
     380         289 :     nwu += n;
     381         289 :     PetscCall(PetscArraycpy(tr1_te.data,R+(j+1)*ldr,n));
     382         289 :     tr2_te.data = work+nwu;
     383         289 :     nwu += n;
     384         289 :     PetscCall(PetscArraycpy(tr2_te.data,R+j*ldr,n));
     385         289 :     PetscCall(MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_te,&tr2_te,&ncond_e,work+nwu));
     386         289 :     if (ncond > d*ncond_e) {
     387         181 :       *exg = PETSC_TRUE;
     388         181 :       tr1 = &tr1_te;
     389         181 :       tr2 = &tr2_te;
     390         181 :       ncond = ncond_e;
     391             :     } else {
     392             :       tr1 = &tr1_t;
     393             :       tr2 = &tr2_t;
     394             :     }
     395         289 :     if (ncond>tolD) *ok = PETSC_FALSE;
     396             :   }
     397       10326 :   if (*ok) {
     398             :     /* Everything is OK, apply transformations to R and H */
     399             :     /* First column */
     400       10326 :     if (cond && *cond<ncond) *cond = ncond;
     401       10326 :     x = tr1->data;
     402       10326 :     PetscCall(PetscBLASIntCast(tr1->n[0],&n0_));
     403       10326 :     PetscCall(PetscBLASIntCast(tr1->n[1],&n1_));
     404       10326 :     PetscCall(PetscBLASIntCast(n-j-sz,&mr));
     405       10326 :     if (tr1->n[0] > 1 && PetscAbsScalar(tr1->tau[0])!=0.0) {
     406        9148 :       x[tr1->idx[0]] = 1.0;
     407        9148 :       PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&mr,x+tr1->idx[0],&inc,tr1->tau,R+(j+sz)*ldr+tr1->idx[0],&ldr_,work+nwu));
     408        9148 :       PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0_,x+tr1->idx[0],&inc,tr1->tau,H+(tr1->idx[0])*ldh,&ldh_,work+nwu));
     409             :     }
     410       10326 :     if (tr1->n[1] > 1 && PetscAbsScalar(tr1->tau[1])!=0.0) {
     411        5172 :       x[tr1->idx[1]] = 1.0;
     412        5172 :       PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&mr,x+tr1->idx[1],&inc,tr1->tau+1,R+(j+sz)*ldr+tr1->idx[1],&ldr_,work+nwu));
     413        5172 :       PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1_,x+tr1->idx[1],&inc,tr1->tau+1,H+(tr1->idx[1])*ldh,&ldh_,work+nwu));
     414             :     }
     415       10326 :     if (tr1->idx[0]<tr1->idx[1]) {
     416        5329 :       PetscCall(HRApply(mr,R+(j+sz)*ldr+tr1->idx[0],ldr,R+(j+sz)*ldr+tr1->idx[1],ldr,tr1->cs,-tr1->sn));
     417        5329 :       if (tr1->type==1) PetscCall(HRApply(n,H+(tr1->idx[0])*ldh,1,H+(tr1->idx[1])*ldh,1,tr1->cs,tr1->sn));
     418             :       else {
     419         991 :         PetscCall(HRApply(n,H+(tr1->idx[0])*ldh,1,H+(tr1->idx[1])*ldh,1,-tr1->cs,-tr1->sn));
     420         991 :         s[tr1->idx[0]] = -s[tr1->idx[0]];
     421         991 :         s[tr1->idx[1]] = -s[tr1->idx[1]];
     422             :       }
     423             :     }
     424      306411 :     for (i=0;i<tr1->idx[0];i++) *(R+j*ldr+i) = x[i];
     425      307527 :     for (i=tr1->idx[0]+1;i<n;i++) *(R+j*ldr+i) = 0.0;
     426       10326 :     *(R+j*ldr+tr1->idx[0]) = tr1->alpha;
     427       10326 :     if (sz==2) {
     428         289 :       y = tr2->data;
     429             :       /* Second column */
     430         289 :       PetscCall(PetscBLASIntCast(tr2->n[0],&n0_));
     431         289 :       PetscCall(PetscBLASIntCast(tr2->n[1],&n1_));
     432         289 :       PetscCall(PetscBLASIntCast(n-j-sz,&mr));
     433         289 :       PetscCall(PetscBLASIntCast(n-tr2->idx[0],&mh));
     434         289 :       if (tr2->n[0] > 1 && PetscAbsScalar(tr2->tau[0])!=0.0) {
     435         250 :         y[tr2->idx[0]] = 1.0;
     436         250 :         PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&mr,y+tr2->idx[0],&inc,tr2->tau,R+(j+2)*ldr+tr2->idx[0],&ldr_,work+nwu));
     437         250 :         PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0_,y+tr2->idx[0],&inc,tr2->tau,H+(tr2->idx[0])*ldh,&ldh_,work+nwu));
     438             :       }
     439         289 :       if (tr2->n[1] > 1 && PetscAbsScalar(tr2->tau[1])!=0.0) {
     440         219 :         y[tr2->idx[1]] = 1.0;
     441         219 :         PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&mr,y+tr2->idx[1],&inc,tr2->tau+1,R+(j+2)*ldr+tr2->idx[1],&ldr_,work+nwu));
     442         219 :         PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1_,y+tr2->idx[1],&inc,tr2->tau+1,H+(tr2->idx[1])*ldh,&ldh_,work+nwu));
     443             :       }
     444         289 :       if (tr2->idx[0]<tr2->idx[1]) {
     445         250 :         PetscCall(HRApply(mr,R+(j+2)*ldr+tr2->idx[0],ldr,R+(j+2)*ldr+tr2->idx[1],ldr,tr2->cs,-tr2->sn));
     446         250 :         if (tr2->type==1) PetscCall(HRApply(n,H+(tr2->idx[0])*ldh,1,H+(tr2->idx[1])*ldh,1,tr2->cs,tr2->sn));
     447             :         else {
     448         166 :           PetscCall(HRApply(n,H+(tr2->idx[0])*ldh,1,H+(tr2->idx[1])*ldh,1,-tr2->cs,-tr2->sn));
     449         166 :           s[tr2->idx[0]] = -s[tr2->idx[0]];
     450         166 :           s[tr2->idx[1]] = -s[tr2->idx[1]];
     451             :         }
     452             :       }
     453       10085 :       for (i=0;i<tr2->idx[0]-1;i++) *(R+(j+1)*ldr+i) = y[i];
     454         289 :       *(R+(j+1)*ldr+tr2->idx[0]-1) = y[tr2->idx[0]-1];
     455        9258 :       for (i=tr2->idx[0]+1;i<n;i++) *(R+(j+1)*ldr+i) = 0.0;
     456         289 :       *(R+(j+1)*ldr+tr2->idx[0]) = tr2->alpha;
     457         289 :       *n0 = tr2->n[0];
     458         289 :       *n1 = tr2->n[1];
     459         289 :       *idx0 = tr2->idx[0];
     460         289 :       *idx1 = tr2->idx[1];
     461         289 :       if (tr2->idx[0]<tr2->idx[1] && tr2->type==2) {
     462         166 :         (*idx1)++; (*n1)--; (*n0)++;
     463             :       }
     464             :     } else {
     465       10037 :       *n0 = tr1->n[0];
     466       10037 :       *n1 = tr1->n[1];
     467       10037 :       *idx0 = tr1->idx[0];
     468       10037 :       *idx1 = tr1->idx[1];
     469       10037 :       if (tr1->idx[0]<tr1->idx[1] && tr1->type==2) {
     470         885 :         (*idx1)++; (*n1)--; (*n0)++;
     471             :       }
     472             :     }
     473       10326 :     if (*n0>0) {
     474        9537 :       (*n0)--; (*idx0)++;
     475        9537 :       if (*n1==0) *idx1 = *idx0;
     476             :     } else {
     477         789 :       (*n1)--; (*idx1)++; *idx0 = *idx1;
     478             :     }
     479             :   }
     480       10326 :   PetscFunctionReturn(PETSC_SUCCESS);
     481             : }
     482             : 
     483             : /*
     484             :   compute V = HR whit H s-orthogonal and R upper triangular
     485             :   (work: work space of minimum size 6*nv)
     486             : */
     487         373 : PetscErrorCode DSPseudoOrthog_HR(PetscInt *nv,PetscScalar *V,PetscInt ldv,PetscReal *s,PetscScalar *R,PetscInt ldr,PetscBLASInt *perm,PetscBLASInt *cmplxEig,PetscBool *breakdown,PetscScalar *work)
     488             : {
     489         373 :   PetscInt       i,j,n,n0,n1,np,idx0,idx1,sz=1,k=0,nwu=0;
     490         373 :   PetscBLASInt   t1,t2;
     491         373 :   PetscScalar    *col1,*col2;
     492         373 :   PetscBool      exg=PETSC_FALSE,ok=PETSC_FALSE;
     493             : 
     494         373 :   PetscFunctionBegin;
     495         373 :   n = *nv;
     496         373 :   col1 = work+nwu;
     497         373 :   nwu += n;
     498         373 :   col2 = work+nwu;
     499         373 :   nwu += n;
     500             :   /* Sort R and s according to sing(s) */
     501         373 :   np = 0;
     502       10988 :   for (i=0;i<n;i++) if (s[i]>0) np++;
     503         373 :   if (s[0]>0) n1 = np;
     504          64 :   else n1 = n-np;
     505         373 :   n0 = 0;
     506       10988 :   for (i=0;i<n;i++) {
     507       10615 :     if (s[i]==s[0]) {
     508        8669 :       s[n0] = s[0];
     509        8669 :       PetscCall(PetscBLASIntCast(i,&perm[n0++]));
     510       10615 :     } else PetscCall(PetscBLASIntCast(i,&perm[n1++]));
     511             :   }
     512        2319 :   for (i=n0;i<n;i++) s[i] = -s[0];
     513         373 :   n1 -= n0;
     514         373 :   idx0 = 0;
     515         373 :   idx1 = n0;
     516         373 :   if (idx1==n) idx1=idx0;
     517       10988 :   for (i=0;i<n;i++) {
     518      633570 :     for (j=0;j<n;j++) R[j*ldr+i] = V[j*ldv+perm[i]];
     519             :   }
     520             :   /* Initialize H */
     521       10988 :   for (i=0;i<n;i++) {
     522       10615 :     PetscCall(PetscArrayzero(V+i*ldv,n));
     523       10615 :     V[perm[i]+i*ldv] = 1.0;
     524             :   }
     525       10988 :   for (i=0;i<n;i++) PetscCall(PetscBLASIntCast(i,&perm[i]));
     526             :   j = 0;
     527       10699 :   while (j<n-k) {
     528       10326 :     if (cmplxEig[j]==0) sz=1;
     529         289 :     else sz=2;
     530       10326 :     PetscCall(TryHRIt(n,j,sz,V,ldv,R,ldr,s,&exg,&ok,&n0,&n1,&idx0,&idx1,NULL,work+nwu));
     531       10326 :     if (ok) {
     532       10326 :       if (exg) cmplxEig[j] = -cmplxEig[j];
     533       10326 :       j = j+sz;
     534             :     } else { /* to be discarded */
     535           0 :       k = k+1;
     536           0 :       if (cmplxEig[j]==0) {
     537           0 :         if (j<n) {
     538           0 :           t1 = perm[j];
     539           0 :           for (i=j;i<n-1;i++) perm[i] = perm[i+1];
     540           0 :           perm[n-1] = t1;
     541           0 :           t1 = cmplxEig[j];
     542           0 :           for (i=j;i<n-1;i++) cmplxEig[i] = cmplxEig[i+1];
     543           0 :           cmplxEig[n-1] = t1;
     544           0 :           PetscCall(PetscArraycpy(col1,R+j*ldr,n));
     545           0 :           for (i=j;i<n-1;i++) PetscCall(PetscArraycpy(R+i*ldr,R+(i+1)*ldr,n));
     546           0 :           PetscCall(PetscArraycpy(R+(n-1)*ldr,col1,n));
     547             :         }
     548             :       } else {
     549           0 :         k = k+1;
     550           0 :         if (j<n-1) {
     551           0 :           t1 = perm[j]; t2 = perm[j+1];
     552           0 :           for (i=j;i<n-2;i++) perm[i] = perm[i+2];
     553           0 :           perm[n-2] = t1; perm[n-1] = t2;
     554           0 :           t1 = cmplxEig[j]; t2 = cmplxEig[j+1];
     555           0 :           for (i=j;i<n-2;i++) cmplxEig[i] = cmplxEig[i+2];
     556           0 :           cmplxEig[n-2] = t1; cmplxEig[n-1] = t2;
     557           0 :           PetscCall(PetscArraycpy(col1,R+j*ldr,n));
     558           0 :           PetscCall(PetscArraycpy(col2,R+(j+1)*ldr,n));
     559           0 :           for (i=j;i<n-2;i++) PetscCall(PetscArraycpy(R+i*ldr,R+(i+2)*ldr,n));
     560           0 :           PetscCall(PetscArraycpy(R+(n-2)*ldr,col1,n));
     561       10699 :           PetscCall(PetscArraycpy(R+(n-1)*ldr,col2,n));
     562             :         }
     563             :       }
     564             :     }
     565             :   }
     566         373 :   if (k!=0) {
     567           0 :     if (breakdown) *breakdown = PETSC_TRUE;
     568           0 :     *nv = n-k;
     569             :   }
     570         373 :   PetscFunctionReturn(PETSC_SUCCESS);
     571             : }
     572             : 
     573         371 : PetscErrorCode DSGHIEPOrthogEigenv(DS ds,DSMatType mat,PetscScalar *wr,PetscScalar *wi,PetscBool accum)
     574             : {
     575         371 :   PetscInt          lws,nwus=0,nwui=0,lwi,off,n,nv,ld,i,ldr,l;
     576         371 :   const PetscScalar *B,*W;
     577         371 :   PetscScalar       *Q,*X,*R,*ts,szero=0.0,sone=1.0;
     578         371 :   PetscReal         *s,vi,vr,tr,*d,*e;
     579         371 :   PetscBLASInt      ld_,n_,nv_,*perm,*cmplxEig;
     580             : 
     581         371 :   PetscFunctionBegin;
     582         371 :   l = ds->l;
     583         371 :   n = ds->n-l;
     584         371 :   PetscCall(PetscBLASIntCast(n,&n_));
     585         371 :   ld = ds->ld;
     586         371 :   PetscCall(PetscBLASIntCast(ld,&ld_));
     587         371 :   off = l*ld+l;
     588         371 :   PetscCall(DSGetArrayReal(ds,DS_MAT_D,&s));
     589         371 :   if (!ds->compact) {
     590           4 :     PetscCall(MatDenseGetArrayRead(ds->omat[DS_MAT_B],&B));
     591          44 :     for (i=l;i<ds->n;i++) s[i] = PetscRealPart(B[i*ld+i]);
     592           4 :     PetscCall(MatDenseRestoreArrayRead(ds->omat[DS_MAT_B],&B));
     593             :   }
     594         371 :   lws = n*n+7*n;
     595         371 :   lwi = 2*n;
     596         371 :   PetscCall(DSAllocateWork_Private(ds,lws,0,lwi));
     597         371 :   R = ds->work+nwus;
     598         371 :   nwus += n*n;
     599         371 :   ldr = n;
     600         371 :   perm = ds->iwork + nwui;
     601         371 :   nwui += n;
     602         371 :   cmplxEig = ds->iwork+nwui;
     603         371 :   PetscCall(MatDenseGetArray(ds->omat[mat],&X));
     604       10681 :   for (i=0;i<n;i++) {
     605             : #if defined(PETSC_USE_COMPLEX)
     606       10310 :     vi = PetscImaginaryPart(wr[l+i]);
     607             : #else
     608             :     vi = wi?PetscRealPart(wi[l+i]):0.0;
     609             : #endif
     610       10310 :     if (vi!=0) {
     611         289 :       cmplxEig[i] = 1;
     612         289 :       cmplxEig[i+1] = 2;
     613         289 :       i++;
     614       10021 :     } else cmplxEig[i] = 0;
     615             :   }
     616         371 :   nv = n;
     617             : 
     618             :   /* Perform HR decomposition */
     619             :   /* Hyperbolic rotators */
     620         371 :   PetscCall(DSPseudoOrthog_HR(&nv,X+off,ld,s+l,R,ldr,perm,cmplxEig,NULL,ds->work+nwus));
     621             :   /* Sort wr,wi perm */
     622         371 :   ts = ds->work+nwus;
     623         371 :   PetscCall(PetscArraycpy(ts,wr+l,n));
     624       10970 :   for (i=0;i<n;i++) wr[i+l] = ts[perm[i]];
     625             : #if !defined(PETSC_USE_COMPLEX)
     626             :   if (wi) {
     627             :     PetscCall(PetscArraycpy(ts,wi+l,n));
     628             :     for (i=0;i<n;i++) wi[i+l] = ts[perm[i]];
     629             :   }
     630             : #endif
     631             :   /* Projected Matrix */
     632         371 :   PetscCall(DSGetArrayReal(ds,DS_MAT_T,&d));
     633         371 :   PetscCall(PetscArrayzero(d+2*ld,ld));
     634         371 :   e = d+ld;
     635         371 :   d[l+nv-1] = PetscRealPart(wr[l+nv-1]*s[l+nv-1]);
     636       10336 :   for (i=0;i<nv-1;i++) {
     637        9965 :     if (cmplxEig[i]==0) { /* Real */
     638        9676 :       d[l+i] = PetscRealPart(wr[l+i]*s[l+i]);
     639        9676 :       e[l+i] = 0.0;
     640             :     } else {
     641         289 :       vr = PetscRealPart(wr[l+i]);
     642             : #if defined(PETSC_USE_COMPLEX)
     643         289 :       vi = PetscImaginaryPart(wr[l+i]);
     644             : #else
     645             :       vi = wi?PetscRealPart(wi[l+i]):0.0;
     646             : #endif
     647         289 :       if (cmplxEig[i]==-1) vi = -vi;
     648         289 :       tr = PetscRealPart((R[i+(i+1)*ldr]/R[i+i*ldr]))*vi;
     649         289 :       d[l+i] = (vr-tr)*s[l+i];
     650         289 :       d[l+i+1] = (vr+tr)*s[l+i+1];
     651         289 :       e[l+i] = PetscRealPart(s[l+i]*(R[(i+1)+(i+1)*ldr]/R[i+i*ldr])*vi);
     652         289 :       if (i<nv-2) e[l+i+1] = 0.0;
     653             :       i++;
     654             :     }
     655             :   }
     656         371 :   PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));
     657         371 :   PetscCall(DSRestoreArrayReal(ds,DS_MAT_D,&s));
     658             :   /* accumulate previous Q */
     659         371 :   if (accum) {
     660         368 :     PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
     661         368 :     PetscCall(PetscBLASIntCast(nv,&nv_));
     662         368 :     PetscCall(DSAllocateMat_Private(ds,DS_MAT_W));
     663         368 :     PetscCall(MatCopy(ds->omat[DS_MAT_Q],ds->omat[DS_MAT_W],SAME_NONZERO_PATTERN));
     664         368 :     PetscCall(MatDenseGetArrayRead(ds->omat[DS_MAT_W],&W));
     665         368 :     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,W+off,&ld_,X+off,&ld_,&szero,Q+off,&ld_));
     666         368 :     PetscCall(MatDenseRestoreArrayRead(ds->omat[DS_MAT_W],&W));
     667         368 :     PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
     668             :   }
     669         371 :   ds->t = nv+l;
     670         371 :   PetscCall(MatDenseRestoreArray(ds->omat[mat],&X));
     671         371 :   if (!ds->compact) PetscCall(DSSwitchFormat_GHIEP(ds,PETSC_FALSE));
     672         371 :   PetscFunctionReturn(PETSC_SUCCESS);
     673             : }
     674             : 
     675             : /*
     676             :    Reduce to tridiagonal-diagonal pair by means of TridiagDiag_HHR.
     677             : */
     678         371 : PetscErrorCode DSIntermediate_GHIEP(DS ds)
     679             : {
     680         371 :   PetscInt       i,ld,off;
     681         371 :   PetscInt       nwall,nwallr,nwalli;
     682         371 :   PetscScalar    *A,*B,*Q;
     683         371 :   PetscReal      *d,*e,*s;
     684             : 
     685         371 :   PetscFunctionBegin;
     686         371 :   ld = ds->ld;
     687         371 :   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
     688         371 :   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_B],&B));
     689         371 :   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
     690         371 :   PetscCall(DSGetArrayReal(ds,DS_MAT_T,&d));
     691         371 :   PetscCall(DSGetArrayReal(ds,DS_MAT_D,&s));
     692         371 :   e = d+ld;
     693         371 :   off = ds->l+ds->l*ld;
     694         371 :   PetscCall(PetscArrayzero(Q,ld*ld));
     695         371 :   nwall = ld*ld+ld;
     696         371 :   nwallr = ld;
     697         371 :   nwalli = ld;
     698         371 :   PetscCall(DSAllocateWork_Private(ds,nwall,nwallr,nwalli));
     699       12172 :   for (i=0;i<ds->n;i++) Q[i+i*ld]=1.0;
     700       10970 :   for (i=0;i<ds->n-ds->l;i++) *(ds->perm+i)=i;
     701         371 :   if (ds->compact) {
     702         367 :     if (ds->state < DS_STATE_INTERMEDIATE) {
     703         319 :       PetscCall(DSSwitchFormat_GHIEP(ds,PETSC_FALSE));
     704         319 :       PetscCall(TridiagDiag_HHR(ds->k-ds->l+1,A+off,ld,s+ds->l,Q+off,ld,PETSC_TRUE,d+ds->l,e+ds->l,ds->perm,ds->work,ds->rwork,ds->iwork));
     705         319 :       ds->k = ds->l;
     706         319 :       PetscCall(PetscArrayzero(d+2*ld+ds->l,ds->n-ds->l));
     707             :     }
     708             :   } else {
     709           4 :     if (ds->state < DS_STATE_INTERMEDIATE) {
     710          44 :       for (i=0;i<ds->n;i++) s[i] = PetscRealPart(B[i+i*ld]);
     711           4 :       PetscCall(TridiagDiag_HHR(ds->n-ds->l,A+off,ld,s+ds->l,Q+off,ld,PETSC_FALSE,d+ds->l,e+ds->l,ds->perm,ds->work,ds->rwork,ds->iwork));
     712           4 :       PetscCall(PetscArrayzero(d+2*ld,ds->n));
     713           4 :       ds->k = ds->l;
     714           4 :       PetscCall(DSSwitchFormat_GHIEP(ds,PETSC_FALSE));
     715           0 :     } else PetscCall(DSSwitchFormat_GHIEP(ds,PETSC_TRUE));
     716             :   }
     717         371 :   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
     718         371 :   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_B],&B));
     719         371 :   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
     720         371 :   PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));
     721         371 :   PetscCall(DSRestoreArrayReal(ds,DS_MAT_D,&s));
     722         371 :   PetscFunctionReturn(PETSC_SUCCESS);
     723             : }

Generated by: LCOV version 1.14