LCOV - code coverage report
Current view: top level - sys/classes/fn/impls/log - fnlog.c (source / functions) Hit Total Coverage
Test: SLEPc Lines: 289 308 93.8 %
Date: 2024-11-24 01:01:48 Functions: 16 16 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       3             :    SLEPc - Scalable Library for Eigenvalue Problem Computations
       4             :    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
       5             : 
       6             :    This file is part of SLEPc.
       7             :    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
       8             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       9             : */
      10             : /*
      11             :    Logarithm function  log(x)
      12             : */
      13             : 
      14             : #include <slepc/private/fnimpl.h>      /*I "slepcfn.h" I*/
      15             : #include <slepcblaslapack.h>
      16             : 
      17           2 : static PetscErrorCode FNEvaluateFunction_Log(FN fn,PetscScalar x,PetscScalar *y)
      18             : {
      19           2 :   PetscFunctionBegin;
      20             : #if !defined(PETSC_USE_COMPLEX)
      21             :   PetscCheck(x>=0.0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Function not defined in the requested value");
      22             : #endif
      23           2 :   *y = PetscLogScalar(x);
      24           2 :   PetscFunctionReturn(PETSC_SUCCESS);
      25             : }
      26             : 
      27           2 : static PetscErrorCode FNEvaluateDerivative_Log(FN fn,PetscScalar x,PetscScalar *y)
      28             : {
      29           2 :   PetscFunctionBegin;
      30           2 :   PetscCheck(x!=0.0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Derivative not defined in the requested value");
      31             : #if !defined(PETSC_USE_COMPLEX)
      32             :   PetscCheck(x>0.0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Derivative not defined in the requested value");
      33             : #endif
      34           2 :   *y = 1.0/x;
      35           2 :   PetscFunctionReturn(PETSC_SUCCESS);
      36             : }
      37             : 
      38             : /*
      39             :    Block structure of a quasitriangular matrix T. Returns a list of n-1 numbers, where
      40             :    structure(j) encodes the block type of the j:j+1,j:j+1 diagonal block as one of:
      41             :       0 - not the start of a block
      42             :       1 - start of a 2x2 triangular block
      43             :       2 - start of a 2x2 quasi-triangular (full) block
      44             : */
      45           6 : static PetscErrorCode qtri_struct(PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscInt *structure)
      46             : {
      47           6 :   PetscBLASInt j;
      48             : 
      49           6 :   PetscFunctionBegin;
      50             : #if defined(PETSC_USE_COMPLEX)
      51         450 :   for (j=0;j<n-1;j++) structure[j] = 1;
      52             : #else
      53             :   if (n==1) PetscFunctionReturn(PETSC_SUCCESS);
      54             :   else if (n==2) {
      55             :     structure[0] = (T[1]==0.0)? 1: 2;
      56             :     PetscFunctionReturn(PETSC_SUCCESS);
      57             :   }
      58             :   j = 0;
      59             :   while (j<n-2) {
      60             :     if (T[j+1+j*ld] != 0.0) { /* Start of a 2x2 full block */
      61             :       structure[j++] = 2;
      62             :       structure[j++] = 0;
      63             :     } else if (T[j+1+j*ld] == 0.0 && T[j+2+(j+1)*ld] == 0.0) { /* Start of a 2x2 triangular block */
      64             :       structure[j++] = 1;
      65             :     } else { /* Next block must start a 2x2 full block */
      66             :       structure[j++] = 0;
      67             :     }
      68             :   }
      69             :   if (T[n-1+(n-2)*ld] != 0.0) { /* 2x2 full block at the end */
      70             :     structure[n-2] = 2;
      71             :   } else if (structure[n-3] == 0 || structure[n-3] == 1) {
      72             :     structure[n-2] = 1;
      73             :   }
      74             : #endif
      75           6 :   PetscFunctionReturn(PETSC_SUCCESS);
      76             : }
      77             : 
      78             : /*
      79             :    Compute scaling parameter (s) and order of Pade approximant (m).
      80             :    wr,wi is overwritten. Required workspace is 3*n*n.
      81             :    On output, Troot contains the sth square root of T.
      82             : */
      83           6 : static PetscErrorCode FNlogm_params(FN fn,PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscScalar *wr,PetscScalar *wi,PetscInt maxroots,PetscInt *s,PetscInt *m,PetscScalar *Troot,PetscScalar *work)
      84             : {
      85           6 :   PetscInt        i,j,k,p,s0;
      86           6 :   PetscReal       inrm,eta,a2,a3,a4,d2,d3,d4,d5;
      87           6 :   PetscScalar     *TrootmI=work+2*n*n;
      88           6 :   PetscBool       foundm=PETSC_FALSE,more;
      89           6 :   PetscRandom     rand;
      90           6 :   const PetscReal xvals[] = { 1.586970738772063e-005, 2.313807884242979e-003, 1.938179313533253e-002,
      91             :        6.209171588994762e-002, 1.276404810806775e-001, 2.060962623452836e-001, 2.879093714241194e-001 };
      92           6 :   const PetscInt  mmax=PETSC_STATIC_ARRAY_LENGTH(xvals);
      93             : 
      94           6 :   PetscFunctionBegin;
      95           6 :   PetscCall(PetscRandomCreate(PETSC_COMM_SELF,&rand));
      96             :   /* get initial s0 so that T^(1/2^s0) < xvals(mmax) */
      97           6 :   *s = 0;
      98          28 :   do {
      99          28 :     inrm = SlepcAbsEigenvalue(wr[0]-PetscRealConstant(1.0),wi[0]);
     100        2100 :     for (i=1;i<n;i++) inrm = PetscMax(inrm,SlepcAbsEigenvalue(wr[i]-PetscRealConstant(1.0),wi[i]));
     101          28 :     if (inrm < xvals[mmax-1]) break;
     102        1672 :     for (i=0;i<n;i++) {
     103             : #if defined(PETSC_USE_COMPLEX)
     104        1650 :       wr[i] = PetscSqrtScalar(wr[i]);
     105             : #else
     106             : #if defined(PETSC_HAVE_COMPLEX)
     107             :       PetscComplex z = PetscSqrtComplex(PetscCMPLX(wr[i],wi[i]));
     108             :       wr[i] = PetscRealPartComplex(z);
     109             :       wi[i] = PetscImaginaryPartComplex(z);
     110             : #else
     111             :       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"This operation requires a compiler with C99 or C++ complex support");
     112             : #endif
     113             : #endif
     114             :     }
     115          22 :     *s = *s + 1;
     116          22 :   } while (*s<maxroots);
     117           6 :   s0 = *s;
     118           6 :   if (*s == maxroots) PetscCall(PetscInfo(fn,"Too many matrix square roots\n"));
     119             : 
     120             :   /* Troot = T */
     121         456 :   for (j=0;j<n;j++) PetscCall(PetscArraycpy(Troot+j*ld,T+j*ld,PetscMin(j+2,n)));
     122          28 :   for (k=1;k<=PetscMin(*s,maxroots);k++) PetscCall(FNSqrtmSchur(fn,n,Troot,ld,PETSC_FALSE));
     123             :   /* Compute value of s and m needed */
     124             :   /* TrootmI = Troot - I */
     125         456 :   for (j=0;j<n;j++) {
     126         450 :     PetscCall(PetscArraycpy(TrootmI+j*ld,Troot+j*ld,PetscMin(j+2,n)));
     127         450 :     TrootmI[j+j*ld] -= 1.0;
     128             :   }
     129           6 :   PetscCall(SlepcNormAm(n,TrootmI,2,work,rand,&d2));
     130           6 :   d2 = PetscPowReal(d2,1.0/2.0);
     131           6 :   PetscCall(SlepcNormAm(n,TrootmI,3,work,rand,&d3));
     132           6 :   d3 = PetscPowReal(d3,1.0/3.0);
     133           6 :   a2 = PetscMax(d2,d3);
     134           6 :   if (a2 <= xvals[1]) {
     135           0 :     *m = (a2 <= xvals[0])? 1: 2;
     136           0 :     foundm = PETSC_TRUE;
     137             :   }
     138           6 :   p = 0;
     139          14 :   while (!foundm) {
     140          14 :     more = PETSC_FALSE;  /* More norm checks needed */
     141          14 :     if (*s > s0) {
     142           8 :       PetscCall(SlepcNormAm(n,TrootmI,3,work,rand,&d3));
     143           8 :       d3 = PetscPowReal(d3,1.0/3.0);
     144             :     }
     145          14 :     PetscCall(SlepcNormAm(n,TrootmI,4,work,rand,&d4));
     146          14 :     d4 = PetscPowReal(d4,1.0/4.0);
     147          14 :     a3 = PetscMax(d3,d4);
     148          14 :     if (a3 <= xvals[mmax-1]) {
     149          38 :       for (j=2;j<mmax;j++) if (a3 <= xvals[j]) break;
     150           8 :       if (j <= 5) {
     151           2 :         *m = j+1;
     152           2 :         break;
     153           6 :       } else if (a3/2.0 <= xvals[4] && p < 2) {
     154           2 :         more = PETSC_TRUE;
     155           2 :         p = p + 1;
     156             :       }
     157             :     }
     158           2 :     if (!more) {
     159          10 :       PetscCall(SlepcNormAm(n,TrootmI,5,work,rand,&d5));
     160          10 :       d5 = PetscPowReal(d5,1.0/5.0);
     161          10 :       a4 = PetscMax(d4,d5);
     162          10 :       eta = PetscMin(a3,a4);
     163          10 :       if (eta <= xvals[mmax-1]) {
     164           6 :         for (j=5;j<mmax;j++) if (eta <= xvals[j]) break;
     165           4 :         *m = j + 1;
     166           4 :         break;
     167             :       }
     168             :     }
     169           8 :     if (*s == maxroots) {
     170           0 :       PetscCall(PetscInfo(fn,"Too many matrix square roots\n"));
     171           0 :       *m = mmax;  /* No good value found so take largest */
     172           0 :       break;
     173             :     }
     174           8 :     PetscCall(FNSqrtmSchur(fn,n,Troot,ld,PETSC_FALSE));
     175             :     /* TrootmI = Troot - I */
     176         608 :     for (j=0;j<n;j++) {
     177         600 :       PetscCall(PetscArraycpy(TrootmI+j*ld,Troot+j*ld,PetscMin(j+2,n)));
     178         600 :       TrootmI[j+j*ld] -= 1.0;
     179             :     }
     180           8 :     *s = *s + 1;
     181             :   }
     182           6 :   PetscCall(PetscRandomDestroy(&rand));
     183           6 :   PetscFunctionReturn(PETSC_SUCCESS);
     184             : }
     185             : 
     186             : #if !defined(PETSC_USE_COMPLEX)
     187             : /*
     188             :    Computes a^(1/2^s) - 1 accurately, avoiding subtractive cancellation
     189             : */
     190             : static PetscScalar sqrt_obo(PetscScalar a,PetscInt s)
     191             : {
     192             :   PetscScalar val,z0,r;
     193             :   PetscReal   angle;
     194             :   PetscInt    i,n0;
     195             : 
     196             :   PetscFunctionBegin;
     197             :   if (s == 0) val = a-1.0;
     198             :   else {
     199             :     n0 = s;
     200             :     angle = PetscAtan2Real(PetscImaginaryPart(a),PetscRealPart(a));
     201             :     if (angle >= PETSC_PI/2.0) {
     202             :       a = PetscSqrtScalar(a);
     203             :       n0 = s - 1;
     204             :     }
     205             :     z0 = a - 1.0;
     206             :     a = PetscSqrtScalar(a);
     207             :     r = 1.0 + a;
     208             :     for (i=0;i<n0-1;i++) {
     209             :       a = PetscSqrtScalar(a);
     210             :       r = r*(1.0+a);
     211             :     }
     212             :     val = z0/r;
     213             :   }
     214             :   PetscFunctionReturn(val);
     215             : }
     216             : #endif
     217             : 
     218             : /*
     219             :    Square root of 2x2 matrix T from block diagonal of Schur form. Overwrites T.
     220             : */
     221        2220 : static PetscErrorCode sqrtm_tbt(PetscScalar *T)
     222             : {
     223        2220 :   PetscScalar t11,t12,t21,t22,r11,r22;
     224             : #if !defined(PETSC_USE_COMPLEX)
     225             :   PetscScalar mu;
     226             : #endif
     227             : 
     228        2220 :   PetscFunctionBegin;
     229        2220 :   t11 = T[0]; t21 = T[1]; t12 = T[2]; t22 = T[3];
     230        2220 :   if (t21 != 0.0) {
     231             :     /* Compute square root of 2x2 quasitriangular block */
     232             :     /* The algorithm assumes the special structure of real Schur form */
     233             : #if defined(PETSC_USE_COMPLEX)
     234           0 :     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Should not reach this line in complex scalars");
     235             : #else
     236             :     mu = PetscSqrtReal(-t21*t12);
     237             :     if (t11 > 0.0) r11 = PetscSqrtReal((t11+SlepcAbsEigenvalue(t11,mu))/2.0);
     238             :     else r11 = mu / PetscSqrtReal(2.0*(-t11+SlepcAbsEigenvalue(t11,mu)));
     239             :     T[0] = r11;
     240             :     T[1] = t21/(2.0*r11);
     241             :     T[2] = t12/(2.0*r11);
     242             :     T[3] = r11;
     243             : #endif
     244             :   } else {
     245             :     /* Compute square root of 2x2 upper triangular block */
     246        2220 :     r11 = PetscSqrtScalar(t11);
     247        2220 :     r22 = PetscSqrtScalar(t22);
     248        2220 :     T[0] = r11;
     249        2220 :     T[2] = t12/(r11+r22);
     250        2220 :     T[3] = r22;
     251             :   }
     252        2220 :   PetscFunctionReturn(PETSC_SUCCESS);
     253             : }
     254             : 
     255             : #if defined(PETSC_USE_COMPLEX)
     256             : /*
     257             :    Unwinding number of z
     258             : */
     259         492 : static inline PetscReal unwinding(PetscScalar z)
     260             : {
     261         492 :   PetscReal u;
     262             : 
     263         492 :   PetscFunctionBegin;
     264         492 :   u = PetscCeilReal((PetscImaginaryPart(z)-PETSC_PI)/(2.0*PETSC_PI));
     265         492 :   PetscFunctionReturn(u);
     266             : }
     267             : #endif
     268             : 
     269             : /*
     270             :    Power of 2-by-2 upper triangular matrix A.
     271             :    Returns the (1,2) element of the pth power of A, where p is an arbitrary real number
     272             : */
     273         344 : static PetscScalar powerm2by2(PetscScalar A11,PetscScalar A22,PetscScalar A12,PetscReal p)
     274             : {
     275         344 :   PetscScalar a1,a2,a1p,a2p,loga1,loga2,w,dd,x12;
     276             : 
     277         344 :   PetscFunctionBegin;
     278         344 :   a1 = A11;
     279         344 :   a2 = A22;
     280         344 :   if (a1 == a2) {
     281         146 :     x12 = p*A12*PetscPowScalarReal(a1,p-1);
     282         198 :   } else if (PetscAbsScalar(a1) < 0.5*PetscAbsScalar(a2) || PetscAbsScalar(a2) < 0.5*PetscAbsScalar(a1)) {
     283           2 :     a1p = PetscPowScalarReal(a1,p);
     284           2 :     a2p = PetscPowScalarReal(a2,p);
     285           2 :     x12 = A12*(a2p-a1p)/(a2-a1);
     286             :   } else {  /* Close eigenvalues */
     287         196 :     loga1 = PetscLogScalar(a1);
     288         196 :     loga2 = PetscLogScalar(a2);
     289         196 :     w = PetscAtanhScalar((a2-a1)/(a2+a1));
     290             : #if defined(PETSC_USE_COMPLEX)
     291         196 :     w += PETSC_i*PETSC_PI*unwinding(loga2-loga1);
     292             : #endif
     293         196 :     dd = 2.0*PetscExpScalar(p*(loga1+loga2)/2.0)*PetscSinhScalar(p*w)/(a2-a1);
     294         196 :     x12 = A12*dd;
     295             :   }
     296         344 :   PetscFunctionReturn(x12);
     297             : }
     298             : 
     299             : /*
     300             :    Recomputes diagonal blocks of T = X^(1/2^s) - 1 more accurately
     301             : */
     302           6 : static PetscErrorCode recompute_diag_blocks_sqrt(PetscBLASInt n,PetscScalar *Troot,PetscScalar *T,PetscBLASInt ld,PetscInt *blockStruct,PetscInt s)
     303             : {
     304           6 :   PetscScalar    A[4],P[4],M[4],Z0[4],det;
     305           6 :   PetscInt       i,j;
     306             : #if !defined(PETSC_USE_COMPLEX)
     307             :   PetscInt       last_block=0;
     308             :   PetscScalar    a;
     309             : #endif
     310             : 
     311           6 :   PetscFunctionBegin;
     312         450 :   for (j=0;j<n-1;j++) {
     313             : #if !defined(PETSC_USE_COMPLEX)
     314             :     switch (blockStruct[j]) {
     315             :       case 0: /* Not start of a block */
     316             :         if (last_block != 0) {
     317             :           last_block = 0;
     318             :         } else { /* In a 1x1 block */
     319             :           a = T[j+j*ld];
     320             :           Troot[j+j*ld] = sqrt_obo(a,s);
     321             :         }
     322             :         break;
     323             :       default: /* In a 2x2 block */
     324             :         last_block = blockStruct[j];
     325             : #endif
     326         444 :         if (s == 0) {
     327           0 :           Troot[j+j*ld]       = T[j+j*ld]-1.0;
     328           0 :           Troot[j+1+j*ld]     = T[j+1+j*ld];
     329           0 :           Troot[j+(j+1)*ld]   = T[j+(j+1)*ld];
     330           0 :           Troot[j+1+(j+1)*ld] = T[j+1+(j+1)*ld]-1.0;
     331           0 :           continue;
     332             :         }
     333         444 :         A[0] = T[j+j*ld]; A[1] = T[j+1+j*ld]; A[2] = T[j+(j+1)*ld]; A[3] = T[j+1+(j+1)*ld];
     334         444 :         PetscCall(sqrtm_tbt(A));
     335             :         /* Z0 = A - I */
     336         444 :         Z0[0] = A[0]-1.0; Z0[1] = A[1]; Z0[2] = A[2]; Z0[3] = A[3]-1.0;
     337         444 :         if (s == 1) {
     338           0 :           Troot[j+j*ld]       = Z0[0];
     339           0 :           Troot[j+1+j*ld]     = Z0[1];
     340           0 :           Troot[j+(j+1)*ld]   = Z0[2];
     341           0 :           Troot[j+1+(j+1)*ld] = Z0[3];
     342           0 :           continue;
     343             :         }
     344         444 :         PetscCall(sqrtm_tbt(A));
     345             :         /* P = A + I */
     346         444 :         P[0] = A[0]+1.0; P[1] = A[1]; P[2] = A[2]; P[3] = A[3]+1.0;
     347        1776 :         for (i=0;i<s-2;i++) {
     348        1332 :           PetscCall(sqrtm_tbt(A));
     349             :           /* P = P*(I + A) */
     350        1332 :           M[0] = P[0]*(A[0]+1.0)+P[2]*A[1];
     351        1332 :           M[1] = P[1]*(A[0]+1.0)+P[3]*A[1];
     352        1332 :           M[2] = P[0]*A[2]+P[2]*(A[3]+1.0);
     353        1332 :           M[3] = P[1]*A[2]+P[3]*(A[3]+1.0);
     354        1332 :           P[0] = M[0]; P[1] = M[1]; P[2] = M[2]; P[3] = M[3];
     355             :         }
     356             :         /* Troot(j:j+1,j:j+1) = Z0 / P (via Cramer) */
     357         444 :         det = P[0]*P[3]-P[1]*P[2];
     358         444 :         Troot[j+j*ld]       = (Z0[0]*P[3]-P[1]*Z0[2])/det;
     359         444 :         Troot[j+(j+1)*ld]   = (P[0]*Z0[2]-Z0[0]*P[2])/det;
     360         444 :         Troot[j+1+j*ld]     = (Z0[1]*P[3]-P[1]*Z0[3])/det;
     361         444 :         Troot[j+1+(j+1)*ld] = (P[0]*Z0[3]-Z0[1]*P[2])/det;
     362             :         /* If block is upper triangular recompute the (1,2) element.
     363             :            Skip when T(j,j) or T(j+1,j+1) < 0 since the implementation of atanh is nonstandard */
     364         444 :         if (T[j+1+j*ld]==0.0 && PetscRealPart(T[j+j*ld])>=0.0 && PetscRealPart(T[j+1+(j+1)*ld])>=0.0) {
     365         344 :           Troot[j+(j+1)*ld] = powerm2by2(T[j+j*ld],T[j+1+(j+1)*ld],T[j+(j+1)*ld],1.0/PetscPowInt(2,s));
     366             :         }
     367             : #if !defined(PETSC_USE_COMPLEX)
     368             :     }
     369             : #endif
     370             :   }
     371             : #if !defined(PETSC_USE_COMPLEX)
     372             :   /* If last diagonal entry is not in a block it will have been missed */
     373             :   if (blockStruct[n-2] == 0) {
     374             :     a = T[n-1+(n-1)*ld];
     375             :     Troot[n-1+(n-1)*ld] = sqrt_obo(a,s);
     376             :   }
     377             : #endif
     378           6 :   PetscFunctionReturn(PETSC_SUCCESS);
     379             : }
     380             : 
     381             : /*
     382             :    Nodes x and weights w for n-point Gauss-Legendre quadrature (Q is n*n workspace)
     383             : 
     384             :    G. H. Golub and J. H. Welsch, Calculation of Gauss quadrature
     385             :       rules, Math. Comp., 23(106):221-230, 1969.
     386             : */
     387           6 : static PetscErrorCode gauss_legendre(PetscBLASInt n,PetscScalar *x,PetscScalar *w,PetscScalar *Q)
     388             : {
     389           6 :   PetscScalar    v,a,*work;
     390           6 :   PetscReal      *eig,dummy;
     391           6 :   PetscBLASInt   k,ld=n,lwork,info;
     392             : #if defined(PETSC_USE_COMPLEX)
     393           6 :   PetscReal      *rwork,rdummy;
     394             : #endif
     395             : 
     396           6 :   PetscFunctionBegin;
     397           6 :   PetscCall(PetscArrayzero(Q,n*n));
     398          38 :   for (k=1;k<n;k++) {
     399          32 :     v = k/PetscSqrtReal(4.0*k*k-1.0);
     400          32 :     Q[k+(k-1)*n] = v;
     401          32 :     Q[(k-1)+k*n] = v;
     402             :   }
     403             : 
     404             :   /* workspace query and memory allocation */
     405           6 :   lwork = -1;
     406             : #if defined(PETSC_USE_COMPLEX)
     407           6 :   PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,&dummy,&a,&lwork,&rdummy,&info));
     408           6 :   PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(a),&lwork));
     409           6 :   PetscCall(PetscMalloc3(n,&eig,lwork,&work,PetscMax(1,3*n-2),&rwork));
     410             : #else
     411             :   PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,&dummy,&a,&lwork,&info));
     412             :   PetscCall(PetscBLASIntCast((PetscInt)a,&lwork));
     413             :   PetscCall(PetscMalloc2(n,&eig,lwork,&work));
     414             : #endif
     415             : 
     416             :   /* compute eigendecomposition */
     417             : #if defined(PETSC_USE_COMPLEX)
     418           6 :   PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,eig,work,&lwork,rwork,&info));
     419             : #else
     420             :   PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,eig,work,&lwork,&info));
     421             : #endif
     422           6 :   SlepcCheckLapackInfo("syev",info);
     423             : 
     424          44 :   for (k=0;k<n;k++) {
     425          38 :     x[k] = eig[k];
     426          38 :     w[k] = 2.0*Q[k*n]*Q[k*n];
     427             :   }
     428             : #if defined(PETSC_USE_COMPLEX)
     429           6 :   PetscCall(PetscFree3(eig,work,rwork));
     430             : #else
     431             :   PetscCall(PetscFree2(eig,work));
     432             : #endif
     433           6 :   PetscCall(PetscLogFlops(9.0*n*n*n+2.0*n*n*n));
     434           6 :   PetscFunctionReturn(PETSC_SUCCESS);
     435             : }
     436             : 
     437             : /*
     438             :    Pade approximation to log(1 + T) via partial fractions
     439             : */
     440           6 : static PetscErrorCode pade_approx(PetscBLASInt n,PetscScalar *T,PetscScalar *L,PetscBLASInt ld,PetscInt m,PetscScalar *work)
     441             : {
     442           6 :   PetscScalar    *K,*W,*nodes,*wts;
     443           6 :   PetscBLASInt   *ipiv,info,mm;
     444           6 :   PetscInt       i,j,k;
     445             : 
     446           6 :   PetscFunctionBegin;
     447           6 :   K     = work;
     448           6 :   W     = work+n*n;
     449           6 :   nodes = work+2*n*n;
     450           6 :   wts   = work+2*n*n+m;
     451           6 :   ipiv  = (PetscBLASInt*)(work+2*n*n+2*m);
     452           6 :   PetscCall(PetscBLASIntCast(m,&mm));
     453           6 :   PetscCall(gauss_legendre(mm,nodes,wts,L));
     454             :   /* Convert from [-1,1] to [0,1] */
     455          44 :   for (i=0;i<m;i++) {
     456          38 :     nodes[i] = (nodes[i]+1.0)/2.0;
     457          38 :     wts[i] = wts[i]/2.0;
     458             :   }
     459           6 :   PetscCall(PetscArrayzero(L,n*n));
     460          44 :   for (k=0;k<m;k++) {
     461      216638 :     for (i=0;i<n;i++) for (j=0;j<n;j++) K[i+j*ld] = nodes[k]*T[i+j*ld];
     462        2888 :     for (i=0;i<n;i++) K[i+i*ld] += 1.0;
     463      216638 :     for (i=0;i<n;i++) for (j=0;j<n;j++) W[i+j*ld] = T[i+j*ld];
     464          38 :     PetscCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,K,&n,ipiv,W,&n,&info));
     465      216638 :     for (i=0;i<n;i++) for (j=0;j<n;j++) L[i+j*ld] += wts[k]*W[i+j*ld];
     466             :   }
     467           6 :   PetscFunctionReturn(PETSC_SUCCESS);
     468             : }
     469             : 
     470             : /*
     471             :    Recomputes diagonal blocks of T = X^(1/2^s) - 1 more accurately
     472             : */
     473           6 : static PetscErrorCode recompute_diag_blocks_log(PetscBLASInt n,PetscScalar *L,PetscScalar *T,PetscBLASInt ld,PetscInt *blockStruct)
     474             : {
     475           6 :   PetscScalar a1,a2,a12,loga1,loga2,z,dd;
     476           6 :   PetscInt    j;
     477             : #if !defined(PETSC_USE_COMPLEX)
     478             :   PetscInt    last_block=0;
     479             :   PetscScalar f,t;
     480             : #endif
     481             : 
     482           6 :   PetscFunctionBegin;
     483         450 :   for (j=0;j<n-1;j++) {
     484             : #if !defined(PETSC_USE_COMPLEX)
     485             :     switch (blockStruct[j]) {
     486             :       case 0: /* Not start of a block */
     487             :         if (last_block != 0) {
     488             :           last_block = 0;
     489             :         } else { /* In a 1x1 block */
     490             :           L[j+j*ld] = PetscLogScalar(T[j+j*ld]);
     491             :         }
     492             :         break;
     493             :       case 1: /* Start of upper-tri block */
     494             :         last_block = 1;
     495             : #endif
     496         444 :         a1 = T[j+j*ld];
     497         444 :         a2 = T[j+1+(j+1)*ld];
     498         444 :         loga1 = PetscLogScalar(a1);
     499         444 :         loga2 = PetscLogScalar(a2);
     500         444 :         L[j+j*ld] = loga1;
     501         444 :         L[j+1+(j+1)*ld] = loga2;
     502         444 :         if ((PetscRealPart(a1)<0.0 && PetscImaginaryPart(a1)==0.0) || (PetscRealPart(a2)<0.0 && PetscImaginaryPart(a1)==0.0)) {
     503             :          /* Problems with 2 x 2 formula for (1,2) block
     504             :             since atanh is nonstandard, just redo diagonal part */
     505           0 :           continue;
     506             :         }
     507         444 :         if (a1 == a2) {
     508         146 :           a12 = T[j+(j+1)*ld]/a1;
     509         298 :         } else if (PetscAbsScalar(a1)<0.5*PetscAbsScalar(a2) || PetscAbsScalar(a2)<0.5*PetscAbsScalar(a1)) {
     510           2 :           a12 = T[j+(j+1)*ld]*(loga2-loga1)/(a2-a1);
     511             :         } else {  /* Close eigenvalues */
     512         296 :           z = (a2-a1)/(a2+a1);
     513         296 :           dd = 2.0*PetscAtanhScalar(z);
     514             : #if defined(PETSC_USE_COMPLEX)
     515         296 :           dd += 2.0*PETSC_i*PETSC_PI*unwinding(loga2-loga1);
     516             : #endif
     517         296 :           dd /= (a2-a1);
     518         296 :           a12 = T[j+(j+1)*ld]*dd;
     519             :         }
     520         444 :         L[j+(j+1)*ld] = a12;
     521             : #if !defined(PETSC_USE_COMPLEX)
     522             :         break;
     523             :       case 2: /* Start of quasi-tri block */
     524             :         last_block = 2;
     525             :         f = 0.5*PetscLogScalar(T[j+j*ld]*T[j+j*ld]-T[j+(j+1)*ld]*T[j+1+j*ld]);
     526             :         t = PetscAtan2Real(PetscSqrtScalar(-T[j+(j+1)*ld]*T[j+1+j*ld]),T[j+j*ld])/PetscSqrtScalar(-T[j+(j+1)*ld]*T[j+1+j*ld]);
     527             :         L[j+j*ld]       = f;
     528             :         L[j+1+j*ld]     = t*T[j+1+j*ld];
     529             :         L[j+(j+1)*ld]   = t*T[j+(j+1)*ld];
     530             :         L[j+1+(j+1)*ld] = f;
     531             :     }
     532             : #endif
     533             :   }
     534           6 :   PetscFunctionReturn(PETSC_SUCCESS);
     535             : }
     536             : /*
     537             :  * Matrix logarithm implementation based on algorithm and matlab code by N. Higham and co-authors
     538             :  *
     539             :  *     H. Al-Mohy and N. J. Higham, "Improved inverse scaling and squaring
     540             :  *     algorithms for the matrix logarithm", SIAM J. Sci. Comput. 34(4):C153-C169, 2012.
     541             :  */
     542           6 : static PetscErrorCode FNLogmPade(FN fn,PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscBool firstonly)
     543             : {
     544           6 :   PetscBLASInt   k,sdim,lwork,info;
     545           6 :   PetscScalar    *wr,*wi=NULL,*W,*Q,*Troot,*L,*work,one=1.0,zero=0.0,alpha;
     546           6 :   PetscInt       i,j,s=0,m=0,*blockformat;
     547             : #if defined(PETSC_USE_COMPLEX)
     548           6 :   PetscReal      *rwork;
     549             : #endif
     550             : 
     551           6 :   PetscFunctionBegin;
     552           6 :   lwork = 3*n*n; /* gees needs only 5*n, but work is also passed to FNlogm_params */
     553           6 :   k     = firstonly? 1: n;
     554             : 
     555             :   /* compute Schur decomposition A*Q = Q*T */
     556           6 :   PetscCall(PetscCalloc7(n,&wr,n*k,&W,n*n,&Q,n*n,&Troot,n*n,&L,lwork,&work,n-1,&blockformat));
     557             : #if !defined(PETSC_USE_COMPLEX)
     558             :   PetscCall(PetscMalloc1(n,&wi));
     559             :   PetscCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,T,&ld,&sdim,wr,wi,Q,&ld,work,&lwork,NULL,&info));
     560             : #else
     561           6 :   PetscCall(PetscMalloc1(n,&rwork));
     562           6 :   PetscCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,T,&ld,&sdim,wr,Q,&ld,work,&lwork,rwork,NULL,&info));
     563             : #endif
     564           6 :   SlepcCheckLapackInfo("gees",info);
     565             : 
     566             : #if !defined(PETSC_USE_COMPLEX)
     567             :   /* check for negative real eigenvalues */
     568             :   for (i=0;i<n;i++) {
     569             :     PetscCheck(wr[i]>=0.0 || wi[i]!=0.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix has negative real eigenvalue; rerun with complex scalars");
     570             :   }
     571             : #endif
     572             : 
     573             :   /* get block structure of Schur factor */
     574           6 :   PetscCall(qtri_struct(n,T,ld,blockformat));
     575             : 
     576             :   /* get parameters */
     577           6 :   PetscCall(FNlogm_params(fn,n,T,ld,wr,wi,100,&s,&m,Troot,work));
     578             : 
     579             :   /* compute Troot - I = T(1/2^s) - I more accurately */
     580           6 :   PetscCall(recompute_diag_blocks_sqrt(n,Troot,T,ld,blockformat,s));
     581             : 
     582             :   /* compute Pade approximant */
     583           6 :   PetscCall(pade_approx(n,Troot,L,ld,m,work));
     584             : 
     585             :   /* scale back up, L = 2^s * L */
     586           6 :   alpha = PetscPowInt(2,s);
     587       34206 :   for (i=0;i<n;i++) for (j=0;j<n;j++) L[i+j*ld] *= alpha;
     588             : 
     589             :   /* recompute diagonal blocks */
     590           6 :   PetscCall(recompute_diag_blocks_log(n,L,T,ld,blockformat));
     591             : 
     592             :   /* backtransform B = Q*L*Q' */
     593           6 :   PetscCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&k,&n,&one,L,&ld,Q,&ld,&zero,W,&ld));
     594           6 :   PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&k,&n,&one,Q,&ld,W,&ld,&zero,T,&ld));
     595             : 
     596             :   /* flop count: Schur decomposition, and backtransform */
     597           6 :   PetscCall(PetscLogFlops(25.0*n*n*n+4.0*n*n*k));
     598             : 
     599           6 :   PetscCall(PetscFree7(wr,W,Q,Troot,L,work,blockformat));
     600             : #if !defined(PETSC_USE_COMPLEX)
     601             :   PetscCall(PetscFree(wi));
     602             : #else
     603           6 :   PetscCall(PetscFree(rwork));
     604             : #endif
     605           6 :   PetscFunctionReturn(PETSC_SUCCESS);
     606             : }
     607             : 
     608           3 : static PetscErrorCode FNEvaluateFunctionMat_Log_Higham(FN fn,Mat A,Mat B)
     609             : {
     610           3 :   PetscBLASInt   n = 0;
     611           3 :   PetscScalar    *T;
     612           3 :   PetscInt       m;
     613             : 
     614           3 :   PetscFunctionBegin;
     615           3 :   if (A!=B) PetscCall(MatCopy(A,B,SAME_NONZERO_PATTERN));
     616           3 :   PetscCall(MatDenseGetArray(B,&T));
     617           3 :   PetscCall(MatGetSize(A,&m,NULL));
     618           3 :   PetscCall(PetscBLASIntCast(m,&n));
     619           3 :   PetscCall(FNLogmPade(fn,n,T,n,PETSC_FALSE));
     620           3 :   PetscCall(MatDenseRestoreArray(B,&T));
     621           3 :   PetscFunctionReturn(PETSC_SUCCESS);
     622             : }
     623             : 
     624           3 : static PetscErrorCode FNEvaluateFunctionMatVec_Log_Higham(FN fn,Mat A,Vec v)
     625             : {
     626           3 :   PetscBLASInt   n = 0;
     627           3 :   PetscScalar    *T;
     628           3 :   PetscInt       m;
     629           3 :   Mat            B;
     630             : 
     631           3 :   PetscFunctionBegin;
     632           3 :   PetscCall(FN_AllocateWorkMat(fn,A,&B));
     633           3 :   PetscCall(MatDenseGetArray(B,&T));
     634           3 :   PetscCall(MatGetSize(A,&m,NULL));
     635           3 :   PetscCall(PetscBLASIntCast(m,&n));
     636           3 :   PetscCall(FNLogmPade(fn,n,T,n,PETSC_TRUE));
     637           3 :   PetscCall(MatDenseRestoreArray(B,&T));
     638           3 :   PetscCall(MatGetColumnVector(B,v,0));
     639           3 :   PetscCall(FN_FreeWorkMat(fn,&B));
     640           3 :   PetscFunctionReturn(PETSC_SUCCESS);
     641             : }
     642             : 
     643           5 : static PetscErrorCode FNView_Log(FN fn,PetscViewer viewer)
     644             : {
     645           5 :   PetscBool      isascii;
     646           5 :   char           str[50];
     647           5 :   const char     *methodname[] = {
     648             :                   "scaling & squaring, [m/m] Pade approximant (Higham)"
     649             :   };
     650           5 :   const int      nmeth=PETSC_STATIC_ARRAY_LENGTH(methodname);
     651             : 
     652           5 :   PetscFunctionBegin;
     653           5 :   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii));
     654           5 :   if (isascii) {
     655           5 :     if (fn->beta==(PetscScalar)1.0) {
     656           1 :       if (fn->alpha==(PetscScalar)1.0) PetscCall(PetscViewerASCIIPrintf(viewer,"  logarithm: log(x)\n"));
     657             :       else {
     658           0 :         PetscCall(SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE));
     659           0 :         PetscCall(PetscViewerASCIIPrintf(viewer,"  logarithm: log(%s*x)\n",str));
     660             :       }
     661             :     } else {
     662           4 :       PetscCall(SlepcSNPrintfScalar(str,sizeof(str),fn->beta,PETSC_TRUE));
     663           4 :       if (fn->alpha==(PetscScalar)1.0) PetscCall(PetscViewerASCIIPrintf(viewer,"  logarithm: %s*log(x)\n",str));
     664             :       else {
     665           4 :         PetscCall(PetscViewerASCIIPrintf(viewer,"  logarithm: %s",str));
     666           4 :         PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE));
     667           4 :         PetscCall(SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE));
     668           4 :         PetscCall(PetscViewerASCIIPrintf(viewer,"*log(%s*x)\n",str));
     669           4 :         PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE));
     670             :       }
     671             :     }
     672           5 :     if (fn->method<nmeth) PetscCall(PetscViewerASCIIPrintf(viewer,"  computing matrix functions with: %s\n",methodname[fn->method]));
     673             :   }
     674           5 :   PetscFunctionReturn(PETSC_SUCCESS);
     675             : }
     676             : 
     677           4 : SLEPC_EXTERN PetscErrorCode FNCreate_Log(FN fn)
     678             : {
     679           4 :   PetscFunctionBegin;
     680           4 :   fn->ops->evaluatefunction          = FNEvaluateFunction_Log;
     681           4 :   fn->ops->evaluatederivative        = FNEvaluateDerivative_Log;
     682           4 :   fn->ops->evaluatefunctionmat[0]    = FNEvaluateFunctionMat_Log_Higham;
     683           4 :   fn->ops->evaluatefunctionmatvec[0] = FNEvaluateFunctionMatVec_Log_Higham;
     684           4 :   fn->ops->view                      = FNView_Log;
     685           4 :   PetscFunctionReturn(PETSC_SUCCESS);
     686             : }

Generated by: LCOV version 1.14