LCOV - code coverage report
Current view: top level - sys/classes/fn/impls/log - fnlog.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 311 341 91.2 %
Date: 2020-07-05 02:54:12 Functions: 16 16 100.0 %

          Line data    Source code
       1             : /*
       2             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       3             :    SLEPc - Scalable Library for Eigenvalue Problem Computations
       4             :    Copyright (c) 2002-2020, Universitat Politecnica de Valencia, Spain
       5             : 
       6             :    This file is part of SLEPc.
       7             :    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
       8             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       9             : */
      10             : /*
      11             :    Logarithm function  log(x)
      12             : */
      13             : 
      14             : #include <slepc/private/fnimpl.h>      /*I "slepcfn.h" I*/
      15             : #include <slepcblaslapack.h>
      16             : 
      17           2 : PetscErrorCode FNEvaluateFunction_Log(FN fn,PetscScalar x,PetscScalar *y)
      18             : {
      19           2 :   PetscFunctionBegin;
      20             : #if !defined(PETSC_USE_COMPLEX)
      21           2 :   if (x<0.0) SETERRQ(PETSC_COMM_SELF,1,"Function not defined in the requested value");
      22             : #endif
      23           2 :   *y = PetscLogScalar(x);
      24           2 :   PetscFunctionReturn(0);
      25             : }
      26             : 
      27           2 : PetscErrorCode FNEvaluateDerivative_Log(FN fn,PetscScalar x,PetscScalar *y)
      28             : {
      29           2 :   PetscFunctionBegin;
      30           2 :   if (x==0.0) SETERRQ(PETSC_COMM_SELF,1,"Derivative not defined in the requested value");
      31             : #if !defined(PETSC_USE_COMPLEX)
      32           2 :   if (x<0.0) SETERRQ(PETSC_COMM_SELF,1,"Derivative not defined in the requested value");
      33             : #endif
      34           2 :   *y = 1.0/x;
      35           2 :   PetscFunctionReturn(0);
      36             : }
      37             : 
      38             : /*
      39             :    Block structure of a quasitriangular matrix T. Returns a list of n-1 numbers, where
      40             :    structure(j) encodes the block type of the j:j+1,j:j+1 diagonal block as one of:
      41             :       0 - not the start of a block
      42             :       1 - start of a 2x2 triangular block
      43             :       2 - start of a 2x2 quasi-triangular (full) block
      44             : */
      45           4 : static PetscErrorCode qtri_struct(PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscInt *structure)
      46             : {
      47             :   PetscBLASInt j;
      48             : 
      49           4 :   PetscFunctionBegin;
      50             : #if defined(PETSC_USE_COMPLEX)
      51             :   for (j=0;j<n-1;j++) structure[j] = 1;
      52             : #else
      53           4 :   if (n==1) PetscFunctionReturn(0);
      54           4 :   else if (n==2) {
      55           0 :     structure[0] = (T[1]==0.0)? 1: 2;
      56           0 :     PetscFunctionReturn(0);
      57             :   }
      58             :   j = 0;
      59         224 :   while (j<n-2) {
      60         220 :     if (T[j+1+j*ld] != 0.0) { /* Start of a 2x2 full block */
      61          72 :       structure[j++] = 2;
      62          72 :       structure[j++] = 0;
      63         148 :     } else if (T[j+1+j*ld] == 0.0 && T[j+2+(j+1)*ld] == 0.0) { /* Start of a 2x2 triangular block */
      64         146 :       structure[j++] = 1;
      65             :     } else { /* Next block must start a 2x2 full block */
      66           2 :       structure[j++] = 0;
      67             :     }
      68             :   }
      69           4 :   if (T[n-1+(n-2)*ld] != 0.0) { /* 2x2 full block at the end */
      70           2 :     structure[n-2] = 2;
      71           2 :   } else if (structure[n-3] == 0 || structure[n-3] == 1) {
      72           2 :     structure[n-2] = 1;
      73             :   }
      74             : #endif
      75           4 :   PetscFunctionReturn(0);
      76             : }
      77             : 
      78             : /*
      79             :    Compute scaling parameter (s) and order of Pade approximant (m).
      80             :    wr,wi is overwritten. Required workspace is 3*n*n.
      81             :    On output, Troot contains the sth square root of T.
      82             : */
      83           4 : static PetscErrorCode logm_params(PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscScalar *wr,PetscScalar *wi,PetscInt maxroots,PetscInt *s,PetscInt *m,PetscScalar *Troot,PetscScalar *work)
      84             : {
      85             :   PetscErrorCode  ierr;
      86             :   PetscInt        i,j,k,p,s0;
      87             :   PetscReal       inrm,eta,a2,a3,a4,d2,d3,d4,d5;
      88           4 :   PetscScalar     *TrootmI=work+2*n*n;
      89           4 :   PetscBool       foundm=PETSC_FALSE,more;
      90             :   PetscRandom     rand;
      91           4 :   const PetscReal xvals[] = { 1.586970738772063e-005, 2.313807884242979e-003, 1.938179313533253e-002,
      92             :        6.209171588994762e-002, 1.276404810806775e-001, 2.060962623452836e-001, 2.879093714241194e-001 };
      93           4 :   const PetscInt  mmax=sizeof(xvals)/sizeof(xvals[0]);
      94             : 
      95           4 :   PetscFunctionBegin;
      96           4 :   ierr = PetscRandomCreate(PETSC_COMM_SELF,&rand);CHKERRQ(ierr);
      97             :   /* get initial s0 so that T^(1/2^s0) < xvals(mmax) */
      98           4 :   *s = 0;
      99             :   do {
     100          16 :     inrm = SlepcAbsEigenvalue(wr[0]-PetscRealConstant(1.0),wi[0]);
     101          16 :     for (i=1;i<n;i++) inrm = PetscMax(inrm,SlepcAbsEigenvalue(wr[i]-PetscRealConstant(1.0),wi[i]));
     102          16 :     if (inrm < xvals[mmax-1]) break;
     103         900 :     for (i=0;i<n;i++) {
     104             : #if defined(PETSC_USE_COMPLEX)
     105             :       wr[i] = PetscSqrtScalar(wr[i]);
     106             : #else
     107             : #if defined(PETSC_HAVE_COMPLEX)
     108        1800 :       PetscComplex z = PetscSqrtComplex(PetscCMPLX(wr[i],wi[i]));
     109         900 :       wr[i] = PetscRealPartComplex(z);
     110         900 :       wi[i] = PetscImaginaryPartComplex(z);
     111             : #else
     112             :       SETERRQ(PETSC_COMM_SELF,1,"This operation requires a compiler with C99 or C++ complex support");
     113             : #endif
     114             : #endif
     115             :     }
     116          12 :     *s = *s + 1;
     117          12 :   } while (*s<maxroots);
     118           4 :   s0 = *s;
     119           4 :   if (*s == maxroots) { ierr = PetscInfo(NULL,"Too many matrix square roots\n");CHKERRQ(ierr); }
     120             : 
     121             :   /* Troot = T */
     122         300 :   for (j=0;j<n;j++) {
     123         300 :     ierr = PetscArraycpy(Troot+j*ld,T+j*ld,PetscMin(j+2,n));CHKERRQ(ierr);
     124             :   }
     125          12 :   for (k=1;k<=PetscMin(*s,maxroots);k++) {
     126          12 :     ierr = SlepcSqrtmSchur(n,Troot,ld,PETSC_FALSE);CHKERRQ(ierr);
     127             :   }
     128             :   /* Compute value of s and m needed */
     129             :   /* TrootmI = Troot - I */
     130         300 :   for (j=0;j<n;j++) {
     131         300 :     ierr = PetscArraycpy(TrootmI+j*ld,Troot+j*ld,PetscMin(j+2,n));CHKERRQ(ierr);
     132         300 :     TrootmI[j+j*ld] -= 1.0;
     133             :   }
     134           4 :   ierr = SlepcNormAm(n,TrootmI,2,work,rand,&d2);CHKERRQ(ierr);
     135           4 :   d2 = PetscPowReal(d2,1.0/2.0);
     136           4 :   ierr = SlepcNormAm(n,TrootmI,3,work,rand,&d3);CHKERRQ(ierr);
     137           4 :   d3 = PetscPowReal(d3,1.0/3.0);
     138           4 :   a2 = PetscMax(d2,d3);
     139           4 :   if (a2 <= xvals[1]) {
     140           0 :     *m = (a2 <= xvals[0])? 1: 2;
     141           0 :     foundm = PETSC_TRUE;
     142             :   }
     143           4 :   p = 0;
     144          14 :   while (!foundm) {
     145          10 :     more = PETSC_FALSE;  /* More norm checks needed */
     146          10 :     if (*s > s0) {
     147           6 :       ierr = SlepcNormAm(n,TrootmI,3,work,rand,&d3);CHKERRQ(ierr);
     148           6 :       d3 = PetscPowReal(d3,1.0/3.0);
     149             :     }
     150          10 :     ierr = SlepcNormAm(n,TrootmI,4,work,rand,&d4);CHKERRQ(ierr);
     151          10 :     d4 = PetscPowReal(d4,1.0/4.0);
     152          10 :     a3 = PetscMax(d3,d4);
     153          10 :     if (a3 <= xvals[mmax-1]) {
     154          28 :       for (j=2;j<mmax;j++) if (a3 <= xvals[j]) break;
     155           6 :       if (j <= 5) {
     156           2 :         *m = j+1;
     157           2 :         break;
     158           4 :       } else if (a3/2.0 <= xvals[4] && p < 2) {
     159           2 :         more = PETSC_TRUE;
     160           2 :         p = p + 1;
     161             :       }
     162             :     }
     163           8 :     if (!more) {
     164           6 :       ierr = SlepcNormAm(n,TrootmI,5,work,rand,&d5);CHKERRQ(ierr);
     165           6 :       d5 = PetscPowReal(d5,1.0/5.0);
     166           6 :       a4 = PetscMax(d4,d5);
     167           6 :       eta = PetscMin(a3,a4);
     168           6 :       if (eta <= xvals[mmax-1]) {
     169           4 :         for (j=5;j<mmax;j++) if (eta <= xvals[j]) break;
     170           2 :         *m = j + 1;
     171           2 :         break;
     172             :       }
     173             :     }
     174           6 :     if (*s == maxroots) {
     175           0 :       ierr = PetscInfo(NULL,"Too many matrix square roots\n");CHKERRQ(ierr);
     176           0 :       *m = mmax;  /* No good value found so take largest */
     177           0 :       break;
     178             :     }
     179           6 :     ierr = SlepcSqrtmSchur(n,Troot,ld,PETSC_FALSE);CHKERRQ(ierr);
     180             :     /* TrootmI = Troot - I */
     181         450 :     for (j=0;j<n;j++) {
     182         450 :       ierr = PetscArraycpy(TrootmI+j*ld,Troot+j*ld,PetscMin(j+2,n));CHKERRQ(ierr);
     183         450 :       TrootmI[j+j*ld] -= 1.0;
     184             :     }
     185           6 :     *s = *s + 1;
     186             :   }
     187           4 :   ierr = PetscRandomDestroy(&rand);CHKERRQ(ierr);
     188           4 :   PetscFunctionReturn(0);
     189             : }
     190             : 
     191             : #if !defined(PETSC_USE_COMPLEX)
     192             : /*
     193             :    Computes a^(1/2^s) - 1 accurately, avoiding subtractive cancellation
     194             : */
     195           2 : static PetscScalar sqrt_obo(PetscScalar a,PetscInt s)
     196             : {
     197             :   PetscScalar val,z0,r;
     198             :   PetscReal   angle;
     199             :   PetscInt    i,n0;
     200             : 
     201           2 :   PetscFunctionBegin;
     202           2 :   if (s == 0) val = a-1.0;
     203             :   else {
     204           2 :     n0 = s;
     205           2 :     angle = PetscAtan2Real(PetscImaginaryPart(a),PetscRealPart(a));
     206           2 :     if (angle >= PETSC_PI/2.0) {
     207           0 :       a = PetscSqrtScalar(a);
     208           0 :       n0 = s - 1;
     209             :     }
     210           2 :     z0 = a - 1.0;
     211           2 :     a = PetscSqrtScalar(a);
     212           2 :     r = 1.0 + a;
     213           8 :     for (i=0;i<n0-1;i++) {
     214           6 :       a = PetscSqrtScalar(a);
     215           6 :       r = r*(1.0+a);
     216             :     }
     217           2 :     val = z0/r;
     218             :   }
     219           2 :   PetscFunctionReturn(val);
     220             : }
     221             : #endif
     222             : 
     223             : /*
     224             :    Square root of 2x2 matrix T from block diagonal of Schur form. Overwrites T.
     225             : */
     226        1036 : static PetscErrorCode sqrtm_tbt(PetscScalar *T)
     227             : {
     228             :   PetscScalar t11,t12,t21,t22,r11,r22;
     229             : #if !defined(PETSC_USE_COMPLEX)
     230             :   PetscScalar mu;
     231             : #endif
     232             : 
     233        1036 :   PetscFunctionBegin;
     234        1036 :   t11 = T[0]; t21 = T[1]; t12 = T[2]; t22 = T[3];
     235        1036 :   if (t21 != 0.0) {
     236             :     /* Compute square root of 2x2 quasitriangular block */
     237             :     /* The algorithm assumes the special structure of real Schur form */
     238             : #if defined(PETSC_USE_COMPLEX)
     239             :     SETERRQ(PETSC_COMM_SELF,1,"Should not reach this line in complex scalars");
     240             : #else
     241         296 :     mu = PetscSqrtReal(-t21*t12);
     242         296 :     if (t11 > 0.0) r11 = PetscSqrtReal((t11+SlepcAbsEigenvalue(t11,mu))/2.0);
     243           0 :     else r11 = mu / PetscSqrtReal(2.0*(-t11+SlepcAbsEigenvalue(t11,mu)));
     244         296 :     T[0] = r11;
     245         296 :     T[1] = t21/(2.0*r11);
     246         296 :     T[2] = t12/(2.0*r11);
     247         296 :     T[3] = r11;
     248             : #endif
     249             :   } else {
     250             :     /* Compute square root of 2x2 upper triangular block */
     251         740 :     r11 = PetscSqrtScalar(t11);
     252         740 :     r22 = PetscSqrtScalar(t22);
     253         740 :     T[0] = r11;
     254         740 :     T[2] = t12/(r11+r22);
     255         740 :     T[3] = r22;
     256             :   }
     257        1036 :   PetscFunctionReturn(0);
     258             : }
     259             : 
     260             : #if defined(PETSC_USE_COMPLEX)
     261             : /*
     262             :    Unwinding number of z
     263             : */
     264             : PETSC_STATIC_INLINE PetscReal unwinding(PetscScalar z)
     265             : {
     266             :   PetscReal u;
     267             : 
     268             :   PetscFunctionBegin;
     269             :   u = PetscCeilReal((PetscImaginaryPart(z)-PETSC_PI)/(2.0*PETSC_PI));
     270             :   PetscFunctionReturn(u);
     271             : }
     272             : #endif
     273             : 
     274             : /*
     275             :    Power of 2-by-2 upper triangular matrix A.
     276             :    Returns the (1,2) element of the pth power of A, where p is an arbitrary real number
     277             : */
     278         148 : static PetscScalar powerm2by2(PetscScalar A11,PetscScalar A22,PetscScalar A12,PetscReal p)
     279             : {
     280             :   PetscScalar a1,a2,a1p,a2p,loga1,loga2,w,dd,x12;
     281             : 
     282         148 :   PetscFunctionBegin;
     283         148 :   a1 = A11;
     284         148 :   a2 = A22;
     285         148 :   if (a1 == a2) {
     286         292 :     x12 = p*A12*PetscPowScalarReal(a1,p-1);
     287           2 :   } else if (PetscAbsScalar(a1) < 0.5*PetscAbsScalar(a2) || PetscAbsScalar(a2) < 0.5*PetscAbsScalar(a1)) {
     288           0 :     a1p = PetscPowScalarReal(a1,p);
     289           0 :     a2p = PetscPowScalarReal(a2,p);
     290           0 :     x12 = A12*(a2p-a1p)/(a2-a1);
     291             :   } else {  /* Close eigenvalues */
     292           2 :     loga1 = PetscLogScalar(a1);
     293           2 :     loga2 = PetscLogScalar(a2);
     294           2 :     w = PetscAtanhScalar((a2-a1)/(a2+a1));
     295             : #if defined(PETSC_USE_COMPLEX)
     296             :     w += PETSC_i*PETSC_PI*unwinding(loga2-loga1);
     297             : #endif
     298           2 :     dd = 2.0*PetscExpScalar(p*(loga1+loga2)/2.0)*PetscSinhScalar(p*w)/(a2-a1);
     299           2 :     x12 = A12*dd;
     300             :   }
     301         148 :   PetscFunctionReturn(x12);
     302             : }
     303             : 
     304             : /*
     305             :    Recomputes diagonal blocks of T = X^(1/2^s) - 1 more accurately
     306             : */
     307           4 : static PetscErrorCode recompute_diag_blocks_sqrt(PetscBLASInt n,PetscScalar *Troot,PetscScalar *T,PetscBLASInt ld,PetscInt *blockStruct,PetscInt s)
     308             : {
     309             :   PetscErrorCode ierr;
     310             :   PetscScalar    A[4],P[4],M[4],Z0[4],det;
     311             :   PetscInt       i,j;
     312             : #if !defined(PETSC_USE_COMPLEX)
     313           4 :   PetscInt       last_block=0;
     314             :   PetscScalar    a;
     315             : #endif
     316             : 
     317           4 :   PetscFunctionBegin;
     318         296 :   for (j=0;j<n-1;j++) {
     319             : #if !defined(PETSC_USE_COMPLEX)
     320         296 :     switch (blockStruct[j]) {
     321             :       case 0: /* Not start of a block */
     322          74 :         if (last_block != 0) {
     323             :           last_block = 0;
     324             :         } else { /* In a 1x1 block */
     325           2 :           a = T[j+j*ld];
     326           2 :           Troot[j+j*ld] = sqrt_obo(a,s);
     327             :         }
     328             :         break;
     329             :       default: /* In a 2x2 block */
     330         222 :         last_block = blockStruct[j];
     331             : #endif
     332         222 :         if (s == 0) {
     333           0 :           Troot[j+j*ld]       = T[j+j*ld]-1.0;
     334           0 :           Troot[j+1+j*ld]     = T[j+1+j*ld];
     335           0 :           Troot[j+(j+1)*ld]   = T[j+(j+1)*ld];
     336           0 :           Troot[j+1+(j+1)*ld] = T[j+1+(j+1)*ld]-1.0;
     337           0 :           continue;
     338             :         }
     339         222 :         A[0] = T[j+j*ld]; A[1] = T[j+1+j*ld]; A[2] = T[j+(j+1)*ld]; A[3] = T[j+1+(j+1)*ld];
     340         222 :         ierr = sqrtm_tbt(A);CHKERRQ(ierr);
     341             :         /* Z0 = A - I */
     342         222 :         Z0[0] = A[0]-1.0; Z0[1] = A[1]; Z0[2] = A[2]; Z0[3] = A[3]-1.0;
     343         222 :         if (s == 1) {
     344           0 :           Troot[j+j*ld]       = Z0[0];
     345           0 :           Troot[j+1+j*ld]     = Z0[1];
     346           0 :           Troot[j+(j+1)*ld]   = Z0[2];
     347           0 :           Troot[j+1+(j+1)*ld] = Z0[3];
     348           0 :           continue;
     349             :         }
     350         222 :         ierr = sqrtm_tbt(A);CHKERRQ(ierr);
     351             :         /* P = A + I */
     352         222 :         P[0] = A[0]+1.0; P[1] = A[1]; P[2] = A[2]; P[3] = A[3]+1.0;
     353         814 :         for (i=0;i<s-2;i++) {
     354         592 :           ierr = sqrtm_tbt(A);CHKERRQ(ierr);
     355             :           /* P = P*(I + A) */
     356         592 :           M[0] = P[0]*(A[0]+1.0)+P[2]*A[1];
     357         592 :           M[1] = P[1]*(A[0]+1.0)+P[3]*A[1];
     358         592 :           M[2] = P[0]*A[2]+P[2]*(A[3]+1.0);
     359         592 :           M[3] = P[1]*A[2]+P[3]*(A[3]+1.0);
     360         592 :           P[0] = M[0]; P[1] = M[1]; P[2] = M[2]; P[3] = M[3];
     361             :         }
     362             :         /* Troot(j:j+1,j:j+1) = Z0 / P (via Cramer) */
     363         222 :         det = P[0]*P[3]-P[1]*P[2];
     364         222 :         Troot[j+j*ld]       = (Z0[0]*P[3]-P[1]*Z0[2])/det;
     365         222 :         Troot[j+(j+1)*ld]   = (P[0]*Z0[2]-Z0[0]*P[2])/det;
     366         222 :         Troot[j+1+j*ld]     = (Z0[1]*P[3]-P[1]*Z0[3])/det;
     367         222 :         Troot[j+1+(j+1)*ld] = (P[0]*Z0[3]-Z0[1]*P[2])/det;
     368             :         /* If block is upper triangular recompute the (1,2) element.
     369             :            Skip when T(j,j) or T(j+1,j+1) < 0 since the implementation of atanh is nonstandard */
     370         222 :         if (T[j+1+j*ld]==0.0 && PetscRealPart(T[j+j*ld])>=0.0 && PetscRealPart(T[j+1+(j+1)*ld])>=0.0) {
     371         148 :           Troot[j+(j+1)*ld] = powerm2by2(T[j+j*ld],T[j+1+(j+1)*ld],T[j+(j+1)*ld],1.0/PetscPowInt(2,s));
     372             :         }
     373             : #if !defined(PETSC_USE_COMPLEX)
     374             :     }
     375             : #endif
     376             :   }
     377             : #if !defined(PETSC_USE_COMPLEX)
     378             :   /* If last diagonal entry is not in a block it will have been missed */
     379           4 :   if (blockStruct[n-2] == 0) {
     380           0 :     a = T[n-1+(n-1)*ld];
     381           0 :     Troot[n-1+(n-1)*ld] = sqrt_obo(a,s);
     382             :   }
     383             : #endif
     384           4 :   PetscFunctionReturn(0);
     385             : }
     386             : 
     387             : /*
     388             :    Nodes x and weights w for n-point Gauss-Legendre quadrature (Q is n*n workspace)
     389             : 
     390             :    G. H. Golub and J. H. Welsch, Calculation of Gauss quadrature
     391             :       rules, Math. Comp., 23(106):221-230, 1969.
     392             : */
     393           4 : static PetscErrorCode gauss_legendre(PetscBLASInt n,PetscScalar *x,PetscScalar *w,PetscScalar *Q)
     394             : {
     395             :   PetscErrorCode ierr;
     396             :   PetscScalar    v,a,*work;
     397             :   PetscReal      *eig,dummy;
     398           4 :   PetscBLASInt   k,ld=n,lwork,info;
     399             : #if defined(PETSC_USE_COMPLEX)
     400             :   PetscReal      *rwork,rdummy;
     401             : #endif
     402             : 
     403           4 :   PetscFunctionBegin;
     404           4 :   ierr = PetscArrayzero(Q,n*n);CHKERRQ(ierr);
     405          22 :   for (k=1;k<n;k++) {
     406          22 :     v = k/PetscSqrtReal(4.0*k*k-1.0);
     407          22 :     Q[k+(k-1)*n] = v;
     408          22 :     Q[(k-1)+k*n] = v;
     409             :   }
     410             : 
     411             :   /* workspace query and memory allocation */
     412           4 :   lwork = -1;
     413             : #if defined(PETSC_USE_COMPLEX)
     414             :   PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,&dummy,&a,&lwork,&rdummy,&info));
     415             :   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(a),&lwork);CHKERRQ(ierr);
     416             :   ierr = PetscMalloc3(n,&eig,lwork,&work,PetscMax(1,3*n-2),&rwork);CHKERRQ(ierr);
     417             : #else
     418           4 :   PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,&dummy,&a,&lwork,&info));
     419           4 :   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(a),&lwork);CHKERRQ(ierr);
     420           4 :   ierr = PetscMalloc2(n,&eig,lwork,&work);CHKERRQ(ierr);
     421             : #endif
     422             : 
     423             :   /* compute eigendecomposition */
     424             : #if defined(PETSC_USE_COMPLEX)
     425             :   PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,eig,work,&lwork,rwork,&info));
     426             : #else
     427           4 :   PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,eig,work,&lwork,&info));
     428             : #endif
     429           4 :   SlepcCheckLapackInfo("syev",info);
     430             : 
     431          26 :   for (k=0;k<n;k++) {
     432          26 :     x[k] = eig[k];
     433          26 :     w[k] = 2.0*Q[k*n]*Q[k*n];
     434             :   }
     435             : #if defined(PETSC_USE_COMPLEX)
     436             :   ierr = PetscFree3(eig,work,rwork);CHKERRQ(ierr);
     437             : #else
     438           4 :   ierr = PetscFree2(eig,work);CHKERRQ(ierr);
     439             : #endif
     440           4 :   ierr = PetscLogFlops(9.0*n*n*n+2.0*n*n*n);CHKERRQ(ierr);
     441           4 :   PetscFunctionReturn(0);
     442             : }
     443             : 
     444             : /*
     445             :    Pade approximation to log(1 + T) via partial fractions
     446             : */
     447           4 : static PetscErrorCode pade_approx(PetscBLASInt n,PetscScalar *T,PetscScalar *L,PetscBLASInt ld,PetscInt m,PetscScalar *work)
     448             : {
     449             :   PetscErrorCode ierr;
     450             :   PetscScalar    *K,*W,*nodes,*wts;
     451             :   PetscBLASInt   *ipiv,info;
     452             :   PetscInt       i,j,k;
     453             : 
     454           4 :   PetscFunctionBegin;
     455           4 :   K     = work;
     456           4 :   W     = work+n*n;
     457           4 :   nodes = work+2*n*n;
     458           4 :   wts   = work+2*n*n+m;
     459           4 :   ipiv  = (PetscBLASInt*)(work+2*n*n+2*m);
     460           4 :   ierr = gauss_legendre(m,nodes,wts,L);CHKERRQ(ierr);
     461             :   /* Convert from [-1,1] to [0,1] */
     462          26 :   for (i=0;i<m;i++) {
     463          26 :     nodes[i] = (nodes[i]+1.0)/2.0;
     464          26 :     wts[i] = wts[i]/2.0;
     465             :   }
     466           4 :   ierr = PetscArrayzero(L,n*n);CHKERRQ(ierr);
     467          26 :   for (k=0;k<m;k++) {
     468      146250 :     for (i=0;i<n;i++) for (j=0;j<n;j++) K[i+j*ld] = nodes[k]*T[i+j*ld];
     469        1950 :     for (i=0;i<n;i++) K[i+i*ld] += 1.0;
     470      146250 :     for (i=0;i<n;i++) for (j=0;j<n;j++) W[i+j*ld] = T[i+j*ld];
     471          26 :     PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,K,&n,ipiv,W,&n,&info));
     472      146250 :     for (i=0;i<n;i++) for (j=0;j<n;j++) L[i+j*ld] += wts[k]*W[i+j*ld];
     473             :   }
     474           4 :   PetscFunctionReturn(0);
     475             : }
     476             : 
     477             : /*
     478             :    Recomputes diagonal blocks of T = X^(1/2^s) - 1 more accurately
     479             : */
     480           4 : static PetscErrorCode recompute_diag_blocks_log(PetscBLASInt n,PetscScalar *L,PetscScalar *T,PetscBLASInt ld,PetscInt *blockStruct)
     481             : {
     482             :   PetscScalar a1,a2,a12,loga1,loga2,z,dd;
     483             :   PetscInt    j;
     484             : #if !defined(PETSC_USE_COMPLEX)
     485           4 :   PetscInt    last_block=0;
     486             :   PetscScalar f,t;
     487             : #endif
     488             : 
     489           4 :   PetscFunctionBegin;
     490         296 :   for (j=0;j<n-1;j++) {
     491             : #if !defined(PETSC_USE_COMPLEX)
     492         296 :     switch (blockStruct[j]) {
     493             :       case 0: /* Not start of a block */
     494          74 :         if (last_block != 0) {
     495             :           last_block = 0;
     496             :         } else { /* In a 1x1 block */
     497           2 :           L[j+j*ld] = PetscLogScalar(T[j+j*ld]);
     498             :         }
     499             :         break;
     500             :       case 1: /* Start of upper-tri block */
     501         148 :         last_block = 1;
     502             : #endif
     503         148 :         a1 = T[j+j*ld];
     504         148 :         a2 = T[j+1+(j+1)*ld];
     505         148 :         loga1 = PetscLogScalar(a1);
     506         148 :         loga2 = PetscLogScalar(a2);
     507         148 :         L[j+j*ld] = loga1;
     508         148 :         L[j+1+(j+1)*ld] = loga2;
     509         148 :         if ((PetscRealPart(a1)<0.0 && PetscImaginaryPart(a1)==0.0) || (PetscRealPart(a2)<0.0 && PetscImaginaryPart(a1)==0.0)) {
     510             :          /* Problems with 2 x 2 formula for (1,2) block
     511             :             since atanh is nonstandard, just redo diagonal part */
     512           0 :           continue;
     513             :         }
     514         148 :         if (a1 == a2) {
     515         146 :           a12 = T[j+(j+1)*ld]/a1;
     516           2 :         } else if (PetscAbsScalar(a1)<0.5*PetscAbsScalar(a2) || PetscAbsScalar(a2)<0.5*PetscAbsScalar(a1)) {
     517           0 :           a12 = T[j+(j+1)*ld]*(loga2-loga1)/(a2-a1);
     518             :         } else {  /* Close eigenvalues */
     519           2 :           z = (a2-a1)/(a2+a1);
     520           2 :           dd = 2.0*PetscAtanhScalar(z);
     521             : #if defined(PETSC_USE_COMPLEX)
     522             :           dd += 2.0*PETSC_i*PETSC_PI*unwinding(loga2-loga1);
     523             : #endif
     524           2 :           dd /= (a2-a1);
     525           2 :           a12 = T[j+(j+1)*ld]*dd;
     526             :         }
     527         148 :         L[j+(j+1)*ld] = a12;
     528             : #if !defined(PETSC_USE_COMPLEX)
     529         148 :         break;
     530             :       case 2: /* Start of quasi-tri block */
     531          74 :         last_block = 2;
     532          74 :         f = 0.5*PetscLogScalar(T[j+j*ld]*T[j+j*ld]-T[j+(j+1)*ld]*T[j+1+j*ld]);
     533          74 :         t = PetscAtan2Real(PetscSqrtScalar(-T[j+(j+1)*ld]*T[j+1+j*ld]),T[j+j*ld])/PetscSqrtScalar(-T[j+(j+1)*ld]*T[j+1+j*ld]);
     534          74 :         L[j+j*ld]       = f;
     535          74 :         L[j+1+j*ld]     = t*T[j+1+j*ld];
     536          74 :         L[j+(j+1)*ld]   = t*T[j+(j+1)*ld];
     537          74 :         L[j+1+(j+1)*ld] = f;
     538             :     }
     539             : #endif
     540             :   }
     541           4 :   PetscFunctionReturn(0);
     542             : }
     543             : /*
     544             :  * Matrix logarithm implementation based on algorithm and matlab code by N. Higham and co-authors
     545             :  *
     546             :  *     H. Al-Mohy and N. J. Higham, "Improved inverse scaling and squaring
     547             :  *     algorithms for the matrix logarithm", SIAM J. Sci. Comput. 34(4):C153-C169, 2012.
     548             :  */
     549           4 : static PetscErrorCode SlepcLogmPade(PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscBool firstonly)
     550             : {
     551             : #if !defined(PETSC_HAVE_COMPLEX)
     552             :   PetscFunctionBegin;
     553             :   SETERRQ(PETSC_COMM_SELF,1,"This function requires C99 or C++ complex support");
     554             : #else
     555             :   PetscErrorCode ierr;
     556             :   PetscBLASInt   k,sdim,lwork,info;
     557           4 :   PetscScalar    *wr,*wi=NULL,*W,*Q,*Troot,*L,*work,one=1.0,zero=0.0,alpha;
     558           4 :   PetscInt       i,j,s=0,m=0,*blockformat;
     559             : #if defined(PETSC_USE_COMPLEX)
     560             :   PetscReal      *rwork;
     561             : #endif
     562             : 
     563           4 :   PetscFunctionBegin;
     564           4 :   lwork = 3*n*n; /* gees needs only 5*n, but work is also passed to logm_params */
     565           4 :   k     = firstonly? 1: n;
     566             : 
     567             :   /* compute Schur decomposition A*Q = Q*T */
     568           4 :   ierr = PetscCalloc7(n,&wr,n*k,&W,n*n,&Q,n*n,&Troot,n*n,&L,lwork,&work,n-1,&blockformat);CHKERRQ(ierr);
     569             : #if !defined(PETSC_USE_COMPLEX)
     570           4 :   ierr = PetscMalloc1(n,&wi);CHKERRQ(ierr);
     571           4 :   PetscStackCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,T,&ld,&sdim,wr,wi,Q,&ld,work,&lwork,NULL,&info));
     572             : #else
     573             :   ierr = PetscMalloc1(n,&rwork);CHKERRQ(ierr);
     574             :   PetscStackCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,T,&ld,&sdim,wr,Q,&ld,work,&lwork,rwork,NULL,&info));
     575             : #endif
     576           4 :   SlepcCheckLapackInfo("gees",info);
     577             : 
     578             : #if !defined(PETSC_USE_COMPLEX)
     579             :   /* check for negative real eigenvalues */
     580         300 :   for (i=0;i<n;i++) {
     581         300 :     if (wr[i]<0.0 && wi[i]==0.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix has negative real eigenvalue; rerun with complex scalars");
     582             :   }
     583             : #endif
     584             : 
     585             :   /* get block structure of Schur factor */
     586           4 :   ierr = qtri_struct(n,T,ld,blockformat);CHKERRQ(ierr);
     587             : 
     588             :   /* get parameters */
     589           4 :   ierr = logm_params(n,T,ld,wr,wi,100,&s,&m,Troot,work);CHKERRQ(ierr);
     590             : 
     591             :   /* compute Troot - I = T(1/2^s) - I more accurately */
     592           4 :   ierr = recompute_diag_blocks_sqrt(n,Troot,T,ld,blockformat,s);CHKERRQ(ierr);
     593             : 
     594             :   /* compute Pade approximant */
     595           4 :   ierr = pade_approx(n,Troot,L,ld,m,work);CHKERRQ(ierr);
     596             : 
     597             :   /* scale back up, L = 2^s * L */
     598           8 :   alpha = PetscPowInt(2,s);
     599           4 :   for (i=0;i<n;i++) for (j=0;j<n;j++) L[i+j*ld] *= alpha;
     600             : 
     601             :   /* recompute diagonal blocks */
     602           4 :   ierr = recompute_diag_blocks_log(n,L,T,ld,blockformat);CHKERRQ(ierr);
     603             : 
     604             :   /* backtransform B = Q*L*Q' */
     605           4 :   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&k,&n,&one,L,&ld,Q,&ld,&zero,W,&ld));
     606           4 :   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&k,&n,&one,Q,&ld,W,&ld,&zero,T,&ld));
     607             : 
     608             :   /* flop count: Schur decomposition, and backtransform */
     609           4 :   ierr = PetscLogFlops(25.0*n*n*n+4.0*n*n*k);CHKERRQ(ierr);
     610             : 
     611           4 :   ierr = PetscFree7(wr,W,Q,Troot,L,work,blockformat);CHKERRQ(ierr);
     612             : #if !defined(PETSC_USE_COMPLEX)
     613           4 :   ierr = PetscFree(wi);CHKERRQ(ierr);
     614             : #else
     615             :   ierr = PetscFree(rwork);CHKERRQ(ierr);
     616             : #endif
     617           4 :   PetscFunctionReturn(0);
     618             : #endif
     619             : }
     620             : 
     621           2 : PetscErrorCode FNEvaluateFunctionMat_Log_Higham(FN fn,Mat A,Mat B)
     622             : {
     623             :   PetscErrorCode ierr;
     624             :   PetscBLASInt   n;
     625             :   PetscScalar    *T;
     626             :   PetscInt       m;
     627             : 
     628           2 :   PetscFunctionBegin;
     629           2 :   if (A!=B) { ierr = MatCopy(A,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); }
     630           2 :   ierr = MatDenseGetArray(B,&T);CHKERRQ(ierr);
     631           2 :   ierr = MatGetSize(A,&m,NULL);CHKERRQ(ierr);
     632           2 :   ierr = PetscBLASIntCast(m,&n);CHKERRQ(ierr);
     633           2 :   ierr = SlepcLogmPade(n,T,n,PETSC_FALSE);CHKERRQ(ierr);
     634           2 :   ierr = MatDenseRestoreArray(B,&T);CHKERRQ(ierr);
     635           2 :   PetscFunctionReturn(0);
     636             : }
     637             : 
     638           2 : PetscErrorCode FNEvaluateFunctionMatVec_Log_Higham(FN fn,Mat A,Vec v)
     639             : {
     640             :   PetscErrorCode ierr;
     641             :   PetscBLASInt   n;
     642             :   PetscScalar    *T;
     643             :   PetscInt       m;
     644             :   Mat            B;
     645             : 
     646           2 :   PetscFunctionBegin;
     647           2 :   ierr = FN_AllocateWorkMat(fn,A,&B);CHKERRQ(ierr);
     648           2 :   ierr = MatDenseGetArray(B,&T);CHKERRQ(ierr);
     649           2 :   ierr = MatGetSize(A,&m,NULL);CHKERRQ(ierr);
     650           2 :   ierr = PetscBLASIntCast(m,&n);CHKERRQ(ierr);
     651           2 :   ierr = SlepcLogmPade(n,T,n,PETSC_TRUE);CHKERRQ(ierr);
     652           2 :   ierr = MatDenseRestoreArray(B,&T);CHKERRQ(ierr);
     653           2 :   ierr = MatGetColumnVector(B,v,0);CHKERRQ(ierr);
     654           2 :   ierr = FN_FreeWorkMat(fn,&B);CHKERRQ(ierr);
     655           2 :   PetscFunctionReturn(0);
     656             : }
     657             : 
     658           4 : PetscErrorCode FNView_Log(FN fn,PetscViewer viewer)
     659             : {
     660             :   PetscErrorCode ierr;
     661             :   PetscBool      isascii;
     662             :   char           str[50];
     663           4 :   const char     *methodname[] = {
     664             :                   "scaling & squaring, [m/m] Pade approximant (Higham)"
     665             :   };
     666           4 :   const int      nmeth=sizeof(methodname)/sizeof(methodname[0]);
     667             : 
     668           4 :   PetscFunctionBegin;
     669           4 :   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
     670           4 :   if (isascii) {
     671           4 :     if (fn->beta==(PetscScalar)1.0) {
     672           1 :       if (fn->alpha==(PetscScalar)1.0) {
     673           1 :         ierr = PetscViewerASCIIPrintf(viewer,"  Logarithm: log(x)\n");CHKERRQ(ierr);
     674             :       } else {
     675           0 :         ierr = SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE);CHKERRQ(ierr);
     676           0 :         ierr = PetscViewerASCIIPrintf(viewer,"  Logarithm: log(%s*x)\n",str);CHKERRQ(ierr);
     677             :       }
     678             :     } else {
     679           3 :       ierr = SlepcSNPrintfScalar(str,sizeof(str),fn->beta,PETSC_TRUE);CHKERRQ(ierr);
     680           3 :       if (fn->alpha==(PetscScalar)1.0) {
     681           0 :         ierr = PetscViewerASCIIPrintf(viewer,"  Logarithm: %s*log(x)\n",str);CHKERRQ(ierr);
     682             :       } else {
     683           3 :         ierr = PetscViewerASCIIPrintf(viewer,"  Logarithm: %s",str);CHKERRQ(ierr);
     684           3 :         ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
     685           3 :         ierr = SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE);CHKERRQ(ierr);
     686           3 :         ierr = PetscViewerASCIIPrintf(viewer,"*log(%s*x)\n",str);CHKERRQ(ierr);
     687           3 :         ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
     688             :       }
     689             :     }
     690           4 :     if (fn->method<nmeth) {
     691           4 :       ierr = PetscViewerASCIIPrintf(viewer,"  computing matrix functions with: %s\n",methodname[fn->method]);CHKERRQ(ierr);
     692             :     }
     693             :   }
     694           4 :   PetscFunctionReturn(0);
     695             : }
     696             : 
     697           3 : SLEPC_EXTERN PetscErrorCode FNCreate_Log(FN fn)
     698             : {
     699           3 :   PetscFunctionBegin;
     700           3 :   fn->ops->evaluatefunction          = FNEvaluateFunction_Log;
     701           3 :   fn->ops->evaluatederivative        = FNEvaluateDerivative_Log;
     702           3 :   fn->ops->evaluatefunctionmat[0]    = FNEvaluateFunctionMat_Log_Higham;
     703           3 :   fn->ops->evaluatefunctionmatvec[0] = FNEvaluateFunctionMatVec_Log_Higham;
     704           3 :   fn->ops->view                      = FNView_Log;
     705           3 :   PetscFunctionReturn(0);
     706             : }
     707             : 

Generated by: LCOV version 1.13