Actual source code: fnlog.c

slepc-3.21.1 2024-04-26
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    Logarithm function  log(x)
 12: */

 14: #include <slepc/private/fnimpl.h>
 15: #include <slepcblaslapack.h>

 17: static PetscErrorCode FNEvaluateFunction_Log(FN fn,PetscScalar x,PetscScalar *y)
 18: {
 19:   PetscFunctionBegin;
 20: #if !defined(PETSC_USE_COMPLEX)
 21:   PetscCheck(x>=0.0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Function not defined in the requested value");
 22: #endif
 23:   *y = PetscLogScalar(x);
 24:   PetscFunctionReturn(PETSC_SUCCESS);
 25: }

 27: static PetscErrorCode FNEvaluateDerivative_Log(FN fn,PetscScalar x,PetscScalar *y)
 28: {
 29:   PetscFunctionBegin;
 30:   PetscCheck(x!=0.0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Derivative not defined in the requested value");
 31: #if !defined(PETSC_USE_COMPLEX)
 32:   PetscCheck(x>0.0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Derivative not defined in the requested value");
 33: #endif
 34:   *y = 1.0/x;
 35:   PetscFunctionReturn(PETSC_SUCCESS);
 36: }

 38: /*
 39:    Block structure of a quasitriangular matrix T. Returns a list of n-1 numbers, where
 40:    structure(j) encodes the block type of the j:j+1,j:j+1 diagonal block as one of:
 41:       0 - not the start of a block
 42:       1 - start of a 2x2 triangular block
 43:       2 - start of a 2x2 quasi-triangular (full) block
 44: */
 45: static PetscErrorCode qtri_struct(PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscInt *structure)
 46: {
 47:   PetscBLASInt j;

 49:   PetscFunctionBegin;
 50: #if defined(PETSC_USE_COMPLEX)
 51:   for (j=0;j<n-1;j++) structure[j] = 1;
 52: #else
 53:   if (n==1) PetscFunctionReturn(PETSC_SUCCESS);
 54:   else if (n==2) {
 55:     structure[0] = (T[1]==0.0)? 1: 2;
 56:     PetscFunctionReturn(PETSC_SUCCESS);
 57:   }
 58:   j = 0;
 59:   while (j<n-2) {
 60:     if (T[j+1+j*ld] != 0.0) { /* Start of a 2x2 full block */
 61:       structure[j++] = 2;
 62:       structure[j++] = 0;
 63:     } else if (T[j+1+j*ld] == 0.0 && T[j+2+(j+1)*ld] == 0.0) { /* Start of a 2x2 triangular block */
 64:       structure[j++] = 1;
 65:     } else { /* Next block must start a 2x2 full block */
 66:       structure[j++] = 0;
 67:     }
 68:   }
 69:   if (T[n-1+(n-2)*ld] != 0.0) { /* 2x2 full block at the end */
 70:     structure[n-2] = 2;
 71:   } else if (structure[n-3] == 0 || structure[n-3] == 1) {
 72:     structure[n-2] = 1;
 73:   }
 74: #endif
 75:   PetscFunctionReturn(PETSC_SUCCESS);
 76: }

 78: /*
 79:    Compute scaling parameter (s) and order of Pade approximant (m).
 80:    wr,wi is overwritten. Required workspace is 3*n*n.
 81:    On output, Troot contains the sth square root of T.
 82: */
 83: static PetscErrorCode FNlogm_params(FN fn,PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscScalar *wr,PetscScalar *wi,PetscInt maxroots,PetscInt *s,PetscInt *m,PetscScalar *Troot,PetscScalar *work)
 84: {
 85:   PetscInt        i,j,k,p,s0;
 86:   PetscReal       inrm,eta,a2,a3,a4,d2,d3,d4,d5;
 87:   PetscScalar     *TrootmI=work+2*n*n;
 88:   PetscBool       foundm=PETSC_FALSE,more;
 89:   PetscRandom     rand;
 90:   const PetscReal xvals[] = { 1.586970738772063e-005, 2.313807884242979e-003, 1.938179313533253e-002,
 91:        6.209171588994762e-002, 1.276404810806775e-001, 2.060962623452836e-001, 2.879093714241194e-001 };
 92:   const PetscInt  mmax=PETSC_STATIC_ARRAY_LENGTH(xvals);

 94:   PetscFunctionBegin;
 95:   PetscCall(PetscRandomCreate(PETSC_COMM_SELF,&rand));
 96:   /* get initial s0 so that T^(1/2^s0) < xvals(mmax) */
 97:   *s = 0;
 98:   do {
 99:     inrm = SlepcAbsEigenvalue(wr[0]-PetscRealConstant(1.0),wi[0]);
100:     for (i=1;i<n;i++) inrm = PetscMax(inrm,SlepcAbsEigenvalue(wr[i]-PetscRealConstant(1.0),wi[i]));
101:     if (inrm < xvals[mmax-1]) break;
102:     for (i=0;i<n;i++) {
103: #if defined(PETSC_USE_COMPLEX)
104:       wr[i] = PetscSqrtScalar(wr[i]);
105: #else
106: #if defined(PETSC_HAVE_COMPLEX)
107:       PetscComplex z = PetscSqrtComplex(PetscCMPLX(wr[i],wi[i]));
108:       wr[i] = PetscRealPartComplex(z);
109:       wi[i] = PetscImaginaryPartComplex(z);
110: #else
111:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"This operation requires a compiler with C99 or C++ complex support");
112: #endif
113: #endif
114:     }
115:     *s = *s + 1;
116:   } while (*s<maxroots);
117:   s0 = *s;
118:   if (*s == maxroots) PetscCall(PetscInfo(fn,"Too many matrix square roots\n"));

120:   /* Troot = T */
121:   for (j=0;j<n;j++) PetscCall(PetscArraycpy(Troot+j*ld,T+j*ld,PetscMin(j+2,n)));
122:   for (k=1;k<=PetscMin(*s,maxroots);k++) PetscCall(FNSqrtmSchur(fn,n,Troot,ld,PETSC_FALSE));
123:   /* Compute value of s and m needed */
124:   /* TrootmI = Troot - I */
125:   for (j=0;j<n;j++) {
126:     PetscCall(PetscArraycpy(TrootmI+j*ld,Troot+j*ld,PetscMin(j+2,n)));
127:     TrootmI[j+j*ld] -= 1.0;
128:   }
129:   PetscCall(SlepcNormAm(n,TrootmI,2,work,rand,&d2));
130:   d2 = PetscPowReal(d2,1.0/2.0);
131:   PetscCall(SlepcNormAm(n,TrootmI,3,work,rand,&d3));
132:   d3 = PetscPowReal(d3,1.0/3.0);
133:   a2 = PetscMax(d2,d3);
134:   if (a2 <= xvals[1]) {
135:     *m = (a2 <= xvals[0])? 1: 2;
136:     foundm = PETSC_TRUE;
137:   }
138:   p = 0;
139:   while (!foundm) {
140:     more = PETSC_FALSE;  /* More norm checks needed */
141:     if (*s > s0) {
142:       PetscCall(SlepcNormAm(n,TrootmI,3,work,rand,&d3));
143:       d3 = PetscPowReal(d3,1.0/3.0);
144:     }
145:     PetscCall(SlepcNormAm(n,TrootmI,4,work,rand,&d4));
146:     d4 = PetscPowReal(d4,1.0/4.0);
147:     a3 = PetscMax(d3,d4);
148:     if (a3 <= xvals[mmax-1]) {
149:       for (j=2;j<mmax;j++) if (a3 <= xvals[j]) break;
150:       if (j <= 5) {
151:         *m = j+1;
152:         break;
153:       } else if (a3/2.0 <= xvals[4] && p < 2) {
154:         more = PETSC_TRUE;
155:         p = p + 1;
156:       }
157:     }
158:     if (!more) {
159:       PetscCall(SlepcNormAm(n,TrootmI,5,work,rand,&d5));
160:       d5 = PetscPowReal(d5,1.0/5.0);
161:       a4 = PetscMax(d4,d5);
162:       eta = PetscMin(a3,a4);
163:       if (eta <= xvals[mmax-1]) {
164:         for (j=5;j<mmax;j++) if (eta <= xvals[j]) break;
165:         *m = j + 1;
166:         break;
167:       }
168:     }
169:     if (*s == maxroots) {
170:       PetscCall(PetscInfo(fn,"Too many matrix square roots\n"));
171:       *m = mmax;  /* No good value found so take largest */
172:       break;
173:     }
174:     PetscCall(FNSqrtmSchur(fn,n,Troot,ld,PETSC_FALSE));
175:     /* TrootmI = Troot - I */
176:     for (j=0;j<n;j++) {
177:       PetscCall(PetscArraycpy(TrootmI+j*ld,Troot+j*ld,PetscMin(j+2,n)));
178:       TrootmI[j+j*ld] -= 1.0;
179:     }
180:     *s = *s + 1;
181:   }
182:   PetscCall(PetscRandomDestroy(&rand));
183:   PetscFunctionReturn(PETSC_SUCCESS);
184: }

186: #if !defined(PETSC_USE_COMPLEX)
187: /*
188:    Computes a^(1/2^s) - 1 accurately, avoiding subtractive cancellation
189: */
190: static PetscScalar sqrt_obo(PetscScalar a,PetscInt s)
191: {
192:   PetscScalar val,z0,r;
193:   PetscReal   angle;
194:   PetscInt    i,n0;

196:   PetscFunctionBegin;
197:   if (s == 0) val = a-1.0;
198:   else {
199:     n0 = s;
200:     angle = PetscAtan2Real(PetscImaginaryPart(a),PetscRealPart(a));
201:     if (angle >= PETSC_PI/2.0) {
202:       a = PetscSqrtScalar(a);
203:       n0 = s - 1;
204:     }
205:     z0 = a - 1.0;
206:     a = PetscSqrtScalar(a);
207:     r = 1.0 + a;
208:     for (i=0;i<n0-1;i++) {
209:       a = PetscSqrtScalar(a);
210:       r = r*(1.0+a);
211:     }
212:     val = z0/r;
213:   }
214:   PetscFunctionReturn(val);
215: }
216: #endif

218: /*
219:    Square root of 2x2 matrix T from block diagonal of Schur form. Overwrites T.
220: */
221: static PetscErrorCode sqrtm_tbt(PetscScalar *T)
222: {
223:   PetscScalar t11,t12,t21,t22,r11,r22;
224: #if !defined(PETSC_USE_COMPLEX)
225:   PetscScalar mu;
226: #endif

228:   PetscFunctionBegin;
229:   t11 = T[0]; t21 = T[1]; t12 = T[2]; t22 = T[3];
230:   if (t21 != 0.0) {
231:     /* Compute square root of 2x2 quasitriangular block */
232:     /* The algorithm assumes the special structure of real Schur form */
233: #if defined(PETSC_USE_COMPLEX)
234:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Should not reach this line in complex scalars");
235: #else
236:     mu = PetscSqrtReal(-t21*t12);
237:     if (t11 > 0.0) r11 = PetscSqrtReal((t11+SlepcAbsEigenvalue(t11,mu))/2.0);
238:     else r11 = mu / PetscSqrtReal(2.0*(-t11+SlepcAbsEigenvalue(t11,mu)));
239:     T[0] = r11;
240:     T[1] = t21/(2.0*r11);
241:     T[2] = t12/(2.0*r11);
242:     T[3] = r11;
243: #endif
244:   } else {
245:     /* Compute square root of 2x2 upper triangular block */
246:     r11 = PetscSqrtScalar(t11);
247:     r22 = PetscSqrtScalar(t22);
248:     T[0] = r11;
249:     T[2] = t12/(r11+r22);
250:     T[3] = r22;
251:   }
252:   PetscFunctionReturn(PETSC_SUCCESS);
253: }

255: #if defined(PETSC_USE_COMPLEX)
256: /*
257:    Unwinding number of z
258: */
259: static inline PetscReal unwinding(PetscScalar z)
260: {
261:   PetscReal u;

263:   PetscFunctionBegin;
264:   u = PetscCeilReal((PetscImaginaryPart(z)-PETSC_PI)/(2.0*PETSC_PI));
265:   PetscFunctionReturn(u);
266: }
267: #endif

269: /*
270:    Power of 2-by-2 upper triangular matrix A.
271:    Returns the (1,2) element of the pth power of A, where p is an arbitrary real number
272: */
273: static PetscScalar powerm2by2(PetscScalar A11,PetscScalar A22,PetscScalar A12,PetscReal p)
274: {
275:   PetscScalar a1,a2,a1p,a2p,loga1,loga2,w,dd,x12;

277:   PetscFunctionBegin;
278:   a1 = A11;
279:   a2 = A22;
280:   if (a1 == a2) {
281:     x12 = p*A12*PetscPowScalarReal(a1,p-1);
282:   } else if (PetscAbsScalar(a1) < 0.5*PetscAbsScalar(a2) || PetscAbsScalar(a2) < 0.5*PetscAbsScalar(a1)) {
283:     a1p = PetscPowScalarReal(a1,p);
284:     a2p = PetscPowScalarReal(a2,p);
285:     x12 = A12*(a2p-a1p)/(a2-a1);
286:   } else {  /* Close eigenvalues */
287:     loga1 = PetscLogScalar(a1);
288:     loga2 = PetscLogScalar(a2);
289:     w = PetscAtanhScalar((a2-a1)/(a2+a1));
290: #if defined(PETSC_USE_COMPLEX)
291:     w += PETSC_i*PETSC_PI*unwinding(loga2-loga1);
292: #endif
293:     dd = 2.0*PetscExpScalar(p*(loga1+loga2)/2.0)*PetscSinhScalar(p*w)/(a2-a1);
294:     x12 = A12*dd;
295:   }
296:   PetscFunctionReturn(x12);
297: }

299: /*
300:    Recomputes diagonal blocks of T = X^(1/2^s) - 1 more accurately
301: */
302: static PetscErrorCode recompute_diag_blocks_sqrt(PetscBLASInt n,PetscScalar *Troot,PetscScalar *T,PetscBLASInt ld,PetscInt *blockStruct,PetscInt s)
303: {
304:   PetscScalar    A[4],P[4],M[4],Z0[4],det;
305:   PetscInt       i,j;
306: #if !defined(PETSC_USE_COMPLEX)
307:   PetscInt       last_block=0;
308:   PetscScalar    a;
309: #endif

311:   PetscFunctionBegin;
312:   for (j=0;j<n-1;j++) {
313: #if !defined(PETSC_USE_COMPLEX)
314:     switch (blockStruct[j]) {
315:       case 0: /* Not start of a block */
316:         if (last_block != 0) {
317:           last_block = 0;
318:         } else { /* In a 1x1 block */
319:           a = T[j+j*ld];
320:           Troot[j+j*ld] = sqrt_obo(a,s);
321:         }
322:         break;
323:       default: /* In a 2x2 block */
324:         last_block = blockStruct[j];
325: #endif
326:         if (s == 0) {
327:           Troot[j+j*ld]       = T[j+j*ld]-1.0;
328:           Troot[j+1+j*ld]     = T[j+1+j*ld];
329:           Troot[j+(j+1)*ld]   = T[j+(j+1)*ld];
330:           Troot[j+1+(j+1)*ld] = T[j+1+(j+1)*ld]-1.0;
331:           continue;
332:         }
333:         A[0] = T[j+j*ld]; A[1] = T[j+1+j*ld]; A[2] = T[j+(j+1)*ld]; A[3] = T[j+1+(j+1)*ld];
334:         PetscCall(sqrtm_tbt(A));
335:         /* Z0 = A - I */
336:         Z0[0] = A[0]-1.0; Z0[1] = A[1]; Z0[2] = A[2]; Z0[3] = A[3]-1.0;
337:         if (s == 1) {
338:           Troot[j+j*ld]       = Z0[0];
339:           Troot[j+1+j*ld]     = Z0[1];
340:           Troot[j+(j+1)*ld]   = Z0[2];
341:           Troot[j+1+(j+1)*ld] = Z0[3];
342:           continue;
343:         }
344:         PetscCall(sqrtm_tbt(A));
345:         /* P = A + I */
346:         P[0] = A[0]+1.0; P[1] = A[1]; P[2] = A[2]; P[3] = A[3]+1.0;
347:         for (i=0;i<s-2;i++) {
348:           PetscCall(sqrtm_tbt(A));
349:           /* P = P*(I + A) */
350:           M[0] = P[0]*(A[0]+1.0)+P[2]*A[1];
351:           M[1] = P[1]*(A[0]+1.0)+P[3]*A[1];
352:           M[2] = P[0]*A[2]+P[2]*(A[3]+1.0);
353:           M[3] = P[1]*A[2]+P[3]*(A[3]+1.0);
354:           P[0] = M[0]; P[1] = M[1]; P[2] = M[2]; P[3] = M[3];
355:         }
356:         /* Troot(j:j+1,j:j+1) = Z0 / P (via Cramer) */
357:         det = P[0]*P[3]-P[1]*P[2];
358:         Troot[j+j*ld]       = (Z0[0]*P[3]-P[1]*Z0[2])/det;
359:         Troot[j+(j+1)*ld]   = (P[0]*Z0[2]-Z0[0]*P[2])/det;
360:         Troot[j+1+j*ld]     = (Z0[1]*P[3]-P[1]*Z0[3])/det;
361:         Troot[j+1+(j+1)*ld] = (P[0]*Z0[3]-Z0[1]*P[2])/det;
362:         /* If block is upper triangular recompute the (1,2) element.
363:            Skip when T(j,j) or T(j+1,j+1) < 0 since the implementation of atanh is nonstandard */
364:         if (T[j+1+j*ld]==0.0 && PetscRealPart(T[j+j*ld])>=0.0 && PetscRealPart(T[j+1+(j+1)*ld])>=0.0) {
365:           Troot[j+(j+1)*ld] = powerm2by2(T[j+j*ld],T[j+1+(j+1)*ld],T[j+(j+1)*ld],1.0/PetscPowInt(2,s));
366:         }
367: #if !defined(PETSC_USE_COMPLEX)
368:     }
369: #endif
370:   }
371: #if !defined(PETSC_USE_COMPLEX)
372:   /* If last diagonal entry is not in a block it will have been missed */
373:   if (blockStruct[n-2] == 0) {
374:     a = T[n-1+(n-1)*ld];
375:     Troot[n-1+(n-1)*ld] = sqrt_obo(a,s);
376:   }
377: #endif
378:   PetscFunctionReturn(PETSC_SUCCESS);
379: }

381: /*
382:    Nodes x and weights w for n-point Gauss-Legendre quadrature (Q is n*n workspace)

384:    G. H. Golub and J. H. Welsch, Calculation of Gauss quadrature
385:       rules, Math. Comp., 23(106):221-230, 1969.
386: */
387: static PetscErrorCode gauss_legendre(PetscBLASInt n,PetscScalar *x,PetscScalar *w,PetscScalar *Q)
388: {
389:   PetscScalar    v,a,*work;
390:   PetscReal      *eig,dummy;
391:   PetscBLASInt   k,ld=n,lwork,info;
392: #if defined(PETSC_USE_COMPLEX)
393:   PetscReal      *rwork,rdummy;
394: #endif

396:   PetscFunctionBegin;
397:   PetscCall(PetscArrayzero(Q,n*n));
398:   for (k=1;k<n;k++) {
399:     v = k/PetscSqrtReal(4.0*k*k-1.0);
400:     Q[k+(k-1)*n] = v;
401:     Q[(k-1)+k*n] = v;
402:   }

404:   /* workspace query and memory allocation */
405:   lwork = -1;
406: #if defined(PETSC_USE_COMPLEX)
407:   PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,&dummy,&a,&lwork,&rdummy,&info));
408:   PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(a),&lwork));
409:   PetscCall(PetscMalloc3(n,&eig,lwork,&work,PetscMax(1,3*n-2),&rwork));
410: #else
411:   PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,&dummy,&a,&lwork,&info));
412:   PetscCall(PetscBLASIntCast((PetscInt)a,&lwork));
413:   PetscCall(PetscMalloc2(n,&eig,lwork,&work));
414: #endif

416:   /* compute eigendecomposition */
417: #if defined(PETSC_USE_COMPLEX)
418:   PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,eig,work,&lwork,rwork,&info));
419: #else
420:   PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,eig,work,&lwork,&info));
421: #endif
422:   SlepcCheckLapackInfo("syev",info);

424:   for (k=0;k<n;k++) {
425:     x[k] = eig[k];
426:     w[k] = 2.0*Q[k*n]*Q[k*n];
427:   }
428: #if defined(PETSC_USE_COMPLEX)
429:   PetscCall(PetscFree3(eig,work,rwork));
430: #else
431:   PetscCall(PetscFree2(eig,work));
432: #endif
433:   PetscCall(PetscLogFlops(9.0*n*n*n+2.0*n*n*n));
434:   PetscFunctionReturn(PETSC_SUCCESS);
435: }

437: /*
438:    Pade approximation to log(1 + T) via partial fractions
439: */
440: static PetscErrorCode pade_approx(PetscBLASInt n,PetscScalar *T,PetscScalar *L,PetscBLASInt ld,PetscInt m,PetscScalar *work)
441: {
442:   PetscScalar    *K,*W,*nodes,*wts;
443:   PetscBLASInt   *ipiv,info;
444:   PetscInt       i,j,k;

446:   PetscFunctionBegin;
447:   K     = work;
448:   W     = work+n*n;
449:   nodes = work+2*n*n;
450:   wts   = work+2*n*n+m;
451:   ipiv  = (PetscBLASInt*)(work+2*n*n+2*m);
452:   PetscCall(gauss_legendre(m,nodes,wts,L));
453:   /* Convert from [-1,1] to [0,1] */
454:   for (i=0;i<m;i++) {
455:     nodes[i] = (nodes[i]+1.0)/2.0;
456:     wts[i] = wts[i]/2.0;
457:   }
458:   PetscCall(PetscArrayzero(L,n*n));
459:   for (k=0;k<m;k++) {
460:     for (i=0;i<n;i++) for (j=0;j<n;j++) K[i+j*ld] = nodes[k]*T[i+j*ld];
461:     for (i=0;i<n;i++) K[i+i*ld] += 1.0;
462:     for (i=0;i<n;i++) for (j=0;j<n;j++) W[i+j*ld] = T[i+j*ld];
463:     PetscCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,K,&n,ipiv,W,&n,&info));
464:     for (i=0;i<n;i++) for (j=0;j<n;j++) L[i+j*ld] += wts[k]*W[i+j*ld];
465:   }
466:   PetscFunctionReturn(PETSC_SUCCESS);
467: }

469: /*
470:    Recomputes diagonal blocks of T = X^(1/2^s) - 1 more accurately
471: */
472: static PetscErrorCode recompute_diag_blocks_log(PetscBLASInt n,PetscScalar *L,PetscScalar *T,PetscBLASInt ld,PetscInt *blockStruct)
473: {
474:   PetscScalar a1,a2,a12,loga1,loga2,z,dd;
475:   PetscInt    j;
476: #if !defined(PETSC_USE_COMPLEX)
477:   PetscInt    last_block=0;
478:   PetscScalar f,t;
479: #endif

481:   PetscFunctionBegin;
482:   for (j=0;j<n-1;j++) {
483: #if !defined(PETSC_USE_COMPLEX)
484:     switch (blockStruct[j]) {
485:       case 0: /* Not start of a block */
486:         if (last_block != 0) {
487:           last_block = 0;
488:         } else { /* In a 1x1 block */
489:           L[j+j*ld] = PetscLogScalar(T[j+j*ld]);
490:         }
491:         break;
492:       case 1: /* Start of upper-tri block */
493:         last_block = 1;
494: #endif
495:         a1 = T[j+j*ld];
496:         a2 = T[j+1+(j+1)*ld];
497:         loga1 = PetscLogScalar(a1);
498:         loga2 = PetscLogScalar(a2);
499:         L[j+j*ld] = loga1;
500:         L[j+1+(j+1)*ld] = loga2;
501:         if ((PetscRealPart(a1)<0.0 && PetscImaginaryPart(a1)==0.0) || (PetscRealPart(a2)<0.0 && PetscImaginaryPart(a1)==0.0)) {
502:          /* Problems with 2 x 2 formula for (1,2) block
503:             since atanh is nonstandard, just redo diagonal part */
504:           continue;
505:         }
506:         if (a1 == a2) {
507:           a12 = T[j+(j+1)*ld]/a1;
508:         } else if (PetscAbsScalar(a1)<0.5*PetscAbsScalar(a2) || PetscAbsScalar(a2)<0.5*PetscAbsScalar(a1)) {
509:           a12 = T[j+(j+1)*ld]*(loga2-loga1)/(a2-a1);
510:         } else {  /* Close eigenvalues */
511:           z = (a2-a1)/(a2+a1);
512:           dd = 2.0*PetscAtanhScalar(z);
513: #if defined(PETSC_USE_COMPLEX)
514:           dd += 2.0*PETSC_i*PETSC_PI*unwinding(loga2-loga1);
515: #endif
516:           dd /= (a2-a1);
517:           a12 = T[j+(j+1)*ld]*dd;
518:         }
519:         L[j+(j+1)*ld] = a12;
520: #if !defined(PETSC_USE_COMPLEX)
521:         break;
522:       case 2: /* Start of quasi-tri block */
523:         last_block = 2;
524:         f = 0.5*PetscLogScalar(T[j+j*ld]*T[j+j*ld]-T[j+(j+1)*ld]*T[j+1+j*ld]);
525:         t = PetscAtan2Real(PetscSqrtScalar(-T[j+(j+1)*ld]*T[j+1+j*ld]),T[j+j*ld])/PetscSqrtScalar(-T[j+(j+1)*ld]*T[j+1+j*ld]);
526:         L[j+j*ld]       = f;
527:         L[j+1+j*ld]     = t*T[j+1+j*ld];
528:         L[j+(j+1)*ld]   = t*T[j+(j+1)*ld];
529:         L[j+1+(j+1)*ld] = f;
530:     }
531: #endif
532:   }
533:   PetscFunctionReturn(PETSC_SUCCESS);
534: }
535: /*
536:  * Matrix logarithm implementation based on algorithm and matlab code by N. Higham and co-authors
537:  *
538:  *     H. Al-Mohy and N. J. Higham, "Improved inverse scaling and squaring
539:  *     algorithms for the matrix logarithm", SIAM J. Sci. Comput. 34(4):C153-C169, 2012.
540:  */
541: static PetscErrorCode FNLogmPade(FN fn,PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscBool firstonly)
542: {
543:   PetscBLASInt   k,sdim,lwork,info;
544:   PetscScalar    *wr,*wi=NULL,*W,*Q,*Troot,*L,*work,one=1.0,zero=0.0,alpha;
545:   PetscInt       i,j,s=0,m=0,*blockformat;
546: #if defined(PETSC_USE_COMPLEX)
547:   PetscReal      *rwork;
548: #endif

550:   PetscFunctionBegin;
551:   lwork = 3*n*n; /* gees needs only 5*n, but work is also passed to FNlogm_params */
552:   k     = firstonly? 1: n;

554:   /* compute Schur decomposition A*Q = Q*T */
555:   PetscCall(PetscCalloc7(n,&wr,n*k,&W,n*n,&Q,n*n,&Troot,n*n,&L,lwork,&work,n-1,&blockformat));
556: #if !defined(PETSC_USE_COMPLEX)
557:   PetscCall(PetscMalloc1(n,&wi));
558:   PetscCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,T,&ld,&sdim,wr,wi,Q,&ld,work,&lwork,NULL,&info));
559: #else
560:   PetscCall(PetscMalloc1(n,&rwork));
561:   PetscCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,T,&ld,&sdim,wr,Q,&ld,work,&lwork,rwork,NULL,&info));
562: #endif
563:   SlepcCheckLapackInfo("gees",info);

565: #if !defined(PETSC_USE_COMPLEX)
566:   /* check for negative real eigenvalues */
567:   for (i=0;i<n;i++) {
568:     PetscCheck(wr[i]>=0.0 || wi[i]!=0.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix has negative real eigenvalue; rerun with complex scalars");
569:   }
570: #endif

572:   /* get block structure of Schur factor */
573:   PetscCall(qtri_struct(n,T,ld,blockformat));

575:   /* get parameters */
576:   PetscCall(FNlogm_params(fn,n,T,ld,wr,wi,100,&s,&m,Troot,work));

578:   /* compute Troot - I = T(1/2^s) - I more accurately */
579:   PetscCall(recompute_diag_blocks_sqrt(n,Troot,T,ld,blockformat,s));

581:   /* compute Pade approximant */
582:   PetscCall(pade_approx(n,Troot,L,ld,m,work));

584:   /* scale back up, L = 2^s * L */
585:   alpha = PetscPowInt(2,s);
586:   for (i=0;i<n;i++) for (j=0;j<n;j++) L[i+j*ld] *= alpha;

588:   /* recompute diagonal blocks */
589:   PetscCall(recompute_diag_blocks_log(n,L,T,ld,blockformat));

591:   /* backtransform B = Q*L*Q' */
592:   PetscCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&k,&n,&one,L,&ld,Q,&ld,&zero,W,&ld));
593:   PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&k,&n,&one,Q,&ld,W,&ld,&zero,T,&ld));

595:   /* flop count: Schur decomposition, and backtransform */
596:   PetscCall(PetscLogFlops(25.0*n*n*n+4.0*n*n*k));

598:   PetscCall(PetscFree7(wr,W,Q,Troot,L,work,blockformat));
599: #if !defined(PETSC_USE_COMPLEX)
600:   PetscCall(PetscFree(wi));
601: #else
602:   PetscCall(PetscFree(rwork));
603: #endif
604:   PetscFunctionReturn(PETSC_SUCCESS);
605: }

607: static PetscErrorCode FNEvaluateFunctionMat_Log_Higham(FN fn,Mat A,Mat B)
608: {
609:   PetscBLASInt   n = 0;
610:   PetscScalar    *T;
611:   PetscInt       m;

613:   PetscFunctionBegin;
614:   if (A!=B) PetscCall(MatCopy(A,B,SAME_NONZERO_PATTERN));
615:   PetscCall(MatDenseGetArray(B,&T));
616:   PetscCall(MatGetSize(A,&m,NULL));
617:   PetscCall(PetscBLASIntCast(m,&n));
618:   PetscCall(FNLogmPade(fn,n,T,n,PETSC_FALSE));
619:   PetscCall(MatDenseRestoreArray(B,&T));
620:   PetscFunctionReturn(PETSC_SUCCESS);
621: }

623: static PetscErrorCode FNEvaluateFunctionMatVec_Log_Higham(FN fn,Mat A,Vec v)
624: {
625:   PetscBLASInt   n = 0;
626:   PetscScalar    *T;
627:   PetscInt       m;
628:   Mat            B;

630:   PetscFunctionBegin;
631:   PetscCall(FN_AllocateWorkMat(fn,A,&B));
632:   PetscCall(MatDenseGetArray(B,&T));
633:   PetscCall(MatGetSize(A,&m,NULL));
634:   PetscCall(PetscBLASIntCast(m,&n));
635:   PetscCall(FNLogmPade(fn,n,T,n,PETSC_TRUE));
636:   PetscCall(MatDenseRestoreArray(B,&T));
637:   PetscCall(MatGetColumnVector(B,v,0));
638:   PetscCall(FN_FreeWorkMat(fn,&B));
639:   PetscFunctionReturn(PETSC_SUCCESS);
640: }

642: static PetscErrorCode FNView_Log(FN fn,PetscViewer viewer)
643: {
644:   PetscBool      isascii;
645:   char           str[50];
646:   const char     *methodname[] = {
647:                   "scaling & squaring, [m/m] Pade approximant (Higham)"
648:   };
649:   const int      nmeth=PETSC_STATIC_ARRAY_LENGTH(methodname);

651:   PetscFunctionBegin;
652:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii));
653:   if (isascii) {
654:     if (fn->beta==(PetscScalar)1.0) {
655:       if (fn->alpha==(PetscScalar)1.0) PetscCall(PetscViewerASCIIPrintf(viewer,"  logarithm: log(x)\n"));
656:       else {
657:         PetscCall(SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE));
658:         PetscCall(PetscViewerASCIIPrintf(viewer,"  logarithm: log(%s*x)\n",str));
659:       }
660:     } else {
661:       PetscCall(SlepcSNPrintfScalar(str,sizeof(str),fn->beta,PETSC_TRUE));
662:       if (fn->alpha==(PetscScalar)1.0) PetscCall(PetscViewerASCIIPrintf(viewer,"  logarithm: %s*log(x)\n",str));
663:       else {
664:         PetscCall(PetscViewerASCIIPrintf(viewer,"  logarithm: %s",str));
665:         PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE));
666:         PetscCall(SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE));
667:         PetscCall(PetscViewerASCIIPrintf(viewer,"*log(%s*x)\n",str));
668:         PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE));
669:       }
670:     }
671:     if (fn->method<nmeth) PetscCall(PetscViewerASCIIPrintf(viewer,"  computing matrix functions with: %s\n",methodname[fn->method]));
672:   }
673:   PetscFunctionReturn(PETSC_SUCCESS);
674: }

676: SLEPC_EXTERN PetscErrorCode FNCreate_Log(FN fn)
677: {
678:   PetscFunctionBegin;
679:   fn->ops->evaluatefunction          = FNEvaluateFunction_Log;
680:   fn->ops->evaluatederivative        = FNEvaluateDerivative_Log;
681:   fn->ops->evaluatefunctionmat[0]    = FNEvaluateFunctionMat_Log_Higham;
682:   fn->ops->evaluatefunctionmatvec[0] = FNEvaluateFunctionMatVec_Log_Higham;
683:   fn->ops->view                      = FNView_Log;
684:   PetscFunctionReturn(PETSC_SUCCESS);
685: }