Actual source code: fnlog.c
slepc-3.22.2 2024-12-02
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: Logarithm function log(x)
12: */
14: #include <slepc/private/fnimpl.h>
15: #include <slepcblaslapack.h>
17: static PetscErrorCode FNEvaluateFunction_Log(FN fn,PetscScalar x,PetscScalar *y)
18: {
19: PetscFunctionBegin;
20: #if !defined(PETSC_USE_COMPLEX)
21: PetscCheck(x>=0.0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Function not defined in the requested value");
22: #endif
23: *y = PetscLogScalar(x);
24: PetscFunctionReturn(PETSC_SUCCESS);
25: }
27: static PetscErrorCode FNEvaluateDerivative_Log(FN fn,PetscScalar x,PetscScalar *y)
28: {
29: PetscFunctionBegin;
30: PetscCheck(x!=0.0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Derivative not defined in the requested value");
31: #if !defined(PETSC_USE_COMPLEX)
32: PetscCheck(x>0.0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Derivative not defined in the requested value");
33: #endif
34: *y = 1.0/x;
35: PetscFunctionReturn(PETSC_SUCCESS);
36: }
38: /*
39: Block structure of a quasitriangular matrix T. Returns a list of n-1 numbers, where
40: structure(j) encodes the block type of the j:j+1,j:j+1 diagonal block as one of:
41: 0 - not the start of a block
42: 1 - start of a 2x2 triangular block
43: 2 - start of a 2x2 quasi-triangular (full) block
44: */
45: static PetscErrorCode qtri_struct(PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscInt *structure)
46: {
47: PetscBLASInt j;
49: PetscFunctionBegin;
50: #if defined(PETSC_USE_COMPLEX)
51: for (j=0;j<n-1;j++) structure[j] = 1;
52: #else
53: if (n==1) PetscFunctionReturn(PETSC_SUCCESS);
54: else if (n==2) {
55: structure[0] = (T[1]==0.0)? 1: 2;
56: PetscFunctionReturn(PETSC_SUCCESS);
57: }
58: j = 0;
59: while (j<n-2) {
60: if (T[j+1+j*ld] != 0.0) { /* Start of a 2x2 full block */
61: structure[j++] = 2;
62: structure[j++] = 0;
63: } else if (T[j+1+j*ld] == 0.0 && T[j+2+(j+1)*ld] == 0.0) { /* Start of a 2x2 triangular block */
64: structure[j++] = 1;
65: } else { /* Next block must start a 2x2 full block */
66: structure[j++] = 0;
67: }
68: }
69: if (T[n-1+(n-2)*ld] != 0.0) { /* 2x2 full block at the end */
70: structure[n-2] = 2;
71: } else if (structure[n-3] == 0 || structure[n-3] == 1) {
72: structure[n-2] = 1;
73: }
74: #endif
75: PetscFunctionReturn(PETSC_SUCCESS);
76: }
78: /*
79: Compute scaling parameter (s) and order of Pade approximant (m).
80: wr,wi is overwritten. Required workspace is 3*n*n.
81: On output, Troot contains the sth square root of T.
82: */
83: static PetscErrorCode FNlogm_params(FN fn,PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscScalar *wr,PetscScalar *wi,PetscInt maxroots,PetscInt *s,PetscInt *m,PetscScalar *Troot,PetscScalar *work)
84: {
85: PetscInt i,j,k,p,s0;
86: PetscReal inrm,eta,a2,a3,a4,d2,d3,d4,d5;
87: PetscScalar *TrootmI=work+2*n*n;
88: PetscBool foundm=PETSC_FALSE,more;
89: PetscRandom rand;
90: const PetscReal xvals[] = { 1.586970738772063e-005, 2.313807884242979e-003, 1.938179313533253e-002,
91: 6.209171588994762e-002, 1.276404810806775e-001, 2.060962623452836e-001, 2.879093714241194e-001 };
92: const PetscInt mmax=PETSC_STATIC_ARRAY_LENGTH(xvals);
94: PetscFunctionBegin;
95: PetscCall(PetscRandomCreate(PETSC_COMM_SELF,&rand));
96: /* get initial s0 so that T^(1/2^s0) < xvals(mmax) */
97: *s = 0;
98: do {
99: inrm = SlepcAbsEigenvalue(wr[0]-PetscRealConstant(1.0),wi[0]);
100: for (i=1;i<n;i++) inrm = PetscMax(inrm,SlepcAbsEigenvalue(wr[i]-PetscRealConstant(1.0),wi[i]));
101: if (inrm < xvals[mmax-1]) break;
102: for (i=0;i<n;i++) {
103: #if defined(PETSC_USE_COMPLEX)
104: wr[i] = PetscSqrtScalar(wr[i]);
105: #else
106: #if defined(PETSC_HAVE_COMPLEX)
107: PetscComplex z = PetscSqrtComplex(PetscCMPLX(wr[i],wi[i]));
108: wr[i] = PetscRealPartComplex(z);
109: wi[i] = PetscImaginaryPartComplex(z);
110: #else
111: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"This operation requires a compiler with C99 or C++ complex support");
112: #endif
113: #endif
114: }
115: *s = *s + 1;
116: } while (*s<maxroots);
117: s0 = *s;
118: if (*s == maxroots) PetscCall(PetscInfo(fn,"Too many matrix square roots\n"));
120: /* Troot = T */
121: for (j=0;j<n;j++) PetscCall(PetscArraycpy(Troot+j*ld,T+j*ld,PetscMin(j+2,n)));
122: for (k=1;k<=PetscMin(*s,maxroots);k++) PetscCall(FNSqrtmSchur(fn,n,Troot,ld,PETSC_FALSE));
123: /* Compute value of s and m needed */
124: /* TrootmI = Troot - I */
125: for (j=0;j<n;j++) {
126: PetscCall(PetscArraycpy(TrootmI+j*ld,Troot+j*ld,PetscMin(j+2,n)));
127: TrootmI[j+j*ld] -= 1.0;
128: }
129: PetscCall(SlepcNormAm(n,TrootmI,2,work,rand,&d2));
130: d2 = PetscPowReal(d2,1.0/2.0);
131: PetscCall(SlepcNormAm(n,TrootmI,3,work,rand,&d3));
132: d3 = PetscPowReal(d3,1.0/3.0);
133: a2 = PetscMax(d2,d3);
134: if (a2 <= xvals[1]) {
135: *m = (a2 <= xvals[0])? 1: 2;
136: foundm = PETSC_TRUE;
137: }
138: p = 0;
139: while (!foundm) {
140: more = PETSC_FALSE; /* More norm checks needed */
141: if (*s > s0) {
142: PetscCall(SlepcNormAm(n,TrootmI,3,work,rand,&d3));
143: d3 = PetscPowReal(d3,1.0/3.0);
144: }
145: PetscCall(SlepcNormAm(n,TrootmI,4,work,rand,&d4));
146: d4 = PetscPowReal(d4,1.0/4.0);
147: a3 = PetscMax(d3,d4);
148: if (a3 <= xvals[mmax-1]) {
149: for (j=2;j<mmax;j++) if (a3 <= xvals[j]) break;
150: if (j <= 5) {
151: *m = j+1;
152: break;
153: } else if (a3/2.0 <= xvals[4] && p < 2) {
154: more = PETSC_TRUE;
155: p = p + 1;
156: }
157: }
158: if (!more) {
159: PetscCall(SlepcNormAm(n,TrootmI,5,work,rand,&d5));
160: d5 = PetscPowReal(d5,1.0/5.0);
161: a4 = PetscMax(d4,d5);
162: eta = PetscMin(a3,a4);
163: if (eta <= xvals[mmax-1]) {
164: for (j=5;j<mmax;j++) if (eta <= xvals[j]) break;
165: *m = j + 1;
166: break;
167: }
168: }
169: if (*s == maxroots) {
170: PetscCall(PetscInfo(fn,"Too many matrix square roots\n"));
171: *m = mmax; /* No good value found so take largest */
172: break;
173: }
174: PetscCall(FNSqrtmSchur(fn,n,Troot,ld,PETSC_FALSE));
175: /* TrootmI = Troot - I */
176: for (j=0;j<n;j++) {
177: PetscCall(PetscArraycpy(TrootmI+j*ld,Troot+j*ld,PetscMin(j+2,n)));
178: TrootmI[j+j*ld] -= 1.0;
179: }
180: *s = *s + 1;
181: }
182: PetscCall(PetscRandomDestroy(&rand));
183: PetscFunctionReturn(PETSC_SUCCESS);
184: }
186: #if !defined(PETSC_USE_COMPLEX)
187: /*
188: Computes a^(1/2^s) - 1 accurately, avoiding subtractive cancellation
189: */
190: static PetscScalar sqrt_obo(PetscScalar a,PetscInt s)
191: {
192: PetscScalar val,z0,r;
193: PetscReal angle;
194: PetscInt i,n0;
196: PetscFunctionBegin;
197: if (s == 0) val = a-1.0;
198: else {
199: n0 = s;
200: angle = PetscAtan2Real(PetscImaginaryPart(a),PetscRealPart(a));
201: if (angle >= PETSC_PI/2.0) {
202: a = PetscSqrtScalar(a);
203: n0 = s - 1;
204: }
205: z0 = a - 1.0;
206: a = PetscSqrtScalar(a);
207: r = 1.0 + a;
208: for (i=0;i<n0-1;i++) {
209: a = PetscSqrtScalar(a);
210: r = r*(1.0+a);
211: }
212: val = z0/r;
213: }
214: PetscFunctionReturn(val);
215: }
216: #endif
218: /*
219: Square root of 2x2 matrix T from block diagonal of Schur form. Overwrites T.
220: */
221: static PetscErrorCode sqrtm_tbt(PetscScalar *T)
222: {
223: PetscScalar t11,t12,t21,t22,r11,r22;
224: #if !defined(PETSC_USE_COMPLEX)
225: PetscScalar mu;
226: #endif
228: PetscFunctionBegin;
229: t11 = T[0]; t21 = T[1]; t12 = T[2]; t22 = T[3];
230: if (t21 != 0.0) {
231: /* Compute square root of 2x2 quasitriangular block */
232: /* The algorithm assumes the special structure of real Schur form */
233: #if defined(PETSC_USE_COMPLEX)
234: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Should not reach this line in complex scalars");
235: #else
236: mu = PetscSqrtReal(-t21*t12);
237: if (t11 > 0.0) r11 = PetscSqrtReal((t11+SlepcAbsEigenvalue(t11,mu))/2.0);
238: else r11 = mu / PetscSqrtReal(2.0*(-t11+SlepcAbsEigenvalue(t11,mu)));
239: T[0] = r11;
240: T[1] = t21/(2.0*r11);
241: T[2] = t12/(2.0*r11);
242: T[3] = r11;
243: #endif
244: } else {
245: /* Compute square root of 2x2 upper triangular block */
246: r11 = PetscSqrtScalar(t11);
247: r22 = PetscSqrtScalar(t22);
248: T[0] = r11;
249: T[2] = t12/(r11+r22);
250: T[3] = r22;
251: }
252: PetscFunctionReturn(PETSC_SUCCESS);
253: }
255: #if defined(PETSC_USE_COMPLEX)
256: /*
257: Unwinding number of z
258: */
259: static inline PetscReal unwinding(PetscScalar z)
260: {
261: PetscReal u;
263: PetscFunctionBegin;
264: u = PetscCeilReal((PetscImaginaryPart(z)-PETSC_PI)/(2.0*PETSC_PI));
265: PetscFunctionReturn(u);
266: }
267: #endif
269: /*
270: Power of 2-by-2 upper triangular matrix A.
271: Returns the (1,2) element of the pth power of A, where p is an arbitrary real number
272: */
273: static PetscScalar powerm2by2(PetscScalar A11,PetscScalar A22,PetscScalar A12,PetscReal p)
274: {
275: PetscScalar a1,a2,a1p,a2p,loga1,loga2,w,dd,x12;
277: PetscFunctionBegin;
278: a1 = A11;
279: a2 = A22;
280: if (a1 == a2) {
281: x12 = p*A12*PetscPowScalarReal(a1,p-1);
282: } else if (PetscAbsScalar(a1) < 0.5*PetscAbsScalar(a2) || PetscAbsScalar(a2) < 0.5*PetscAbsScalar(a1)) {
283: a1p = PetscPowScalarReal(a1,p);
284: a2p = PetscPowScalarReal(a2,p);
285: x12 = A12*(a2p-a1p)/(a2-a1);
286: } else { /* Close eigenvalues */
287: loga1 = PetscLogScalar(a1);
288: loga2 = PetscLogScalar(a2);
289: w = PetscAtanhScalar((a2-a1)/(a2+a1));
290: #if defined(PETSC_USE_COMPLEX)
291: w += PETSC_i*PETSC_PI*unwinding(loga2-loga1);
292: #endif
293: dd = 2.0*PetscExpScalar(p*(loga1+loga2)/2.0)*PetscSinhScalar(p*w)/(a2-a1);
294: x12 = A12*dd;
295: }
296: PetscFunctionReturn(x12);
297: }
299: /*
300: Recomputes diagonal blocks of T = X^(1/2^s) - 1 more accurately
301: */
302: static PetscErrorCode recompute_diag_blocks_sqrt(PetscBLASInt n,PetscScalar *Troot,PetscScalar *T,PetscBLASInt ld,PetscInt *blockStruct,PetscInt s)
303: {
304: PetscScalar A[4],P[4],M[4],Z0[4],det;
305: PetscInt i,j;
306: #if !defined(PETSC_USE_COMPLEX)
307: PetscInt last_block=0;
308: PetscScalar a;
309: #endif
311: PetscFunctionBegin;
312: for (j=0;j<n-1;j++) {
313: #if !defined(PETSC_USE_COMPLEX)
314: switch (blockStruct[j]) {
315: case 0: /* Not start of a block */
316: if (last_block != 0) {
317: last_block = 0;
318: } else { /* In a 1x1 block */
319: a = T[j+j*ld];
320: Troot[j+j*ld] = sqrt_obo(a,s);
321: }
322: break;
323: default: /* In a 2x2 block */
324: last_block = blockStruct[j];
325: #endif
326: if (s == 0) {
327: Troot[j+j*ld] = T[j+j*ld]-1.0;
328: Troot[j+1+j*ld] = T[j+1+j*ld];
329: Troot[j+(j+1)*ld] = T[j+(j+1)*ld];
330: Troot[j+1+(j+1)*ld] = T[j+1+(j+1)*ld]-1.0;
331: continue;
332: }
333: A[0] = T[j+j*ld]; A[1] = T[j+1+j*ld]; A[2] = T[j+(j+1)*ld]; A[3] = T[j+1+(j+1)*ld];
334: PetscCall(sqrtm_tbt(A));
335: /* Z0 = A - I */
336: Z0[0] = A[0]-1.0; Z0[1] = A[1]; Z0[2] = A[2]; Z0[3] = A[3]-1.0;
337: if (s == 1) {
338: Troot[j+j*ld] = Z0[0];
339: Troot[j+1+j*ld] = Z0[1];
340: Troot[j+(j+1)*ld] = Z0[2];
341: Troot[j+1+(j+1)*ld] = Z0[3];
342: continue;
343: }
344: PetscCall(sqrtm_tbt(A));
345: /* P = A + I */
346: P[0] = A[0]+1.0; P[1] = A[1]; P[2] = A[2]; P[3] = A[3]+1.0;
347: for (i=0;i<s-2;i++) {
348: PetscCall(sqrtm_tbt(A));
349: /* P = P*(I + A) */
350: M[0] = P[0]*(A[0]+1.0)+P[2]*A[1];
351: M[1] = P[1]*(A[0]+1.0)+P[3]*A[1];
352: M[2] = P[0]*A[2]+P[2]*(A[3]+1.0);
353: M[3] = P[1]*A[2]+P[3]*(A[3]+1.0);
354: P[0] = M[0]; P[1] = M[1]; P[2] = M[2]; P[3] = M[3];
355: }
356: /* Troot(j:j+1,j:j+1) = Z0 / P (via Cramer) */
357: det = P[0]*P[3]-P[1]*P[2];
358: Troot[j+j*ld] = (Z0[0]*P[3]-P[1]*Z0[2])/det;
359: Troot[j+(j+1)*ld] = (P[0]*Z0[2]-Z0[0]*P[2])/det;
360: Troot[j+1+j*ld] = (Z0[1]*P[3]-P[1]*Z0[3])/det;
361: Troot[j+1+(j+1)*ld] = (P[0]*Z0[3]-Z0[1]*P[2])/det;
362: /* If block is upper triangular recompute the (1,2) element.
363: Skip when T(j,j) or T(j+1,j+1) < 0 since the implementation of atanh is nonstandard */
364: if (T[j+1+j*ld]==0.0 && PetscRealPart(T[j+j*ld])>=0.0 && PetscRealPart(T[j+1+(j+1)*ld])>=0.0) {
365: Troot[j+(j+1)*ld] = powerm2by2(T[j+j*ld],T[j+1+(j+1)*ld],T[j+(j+1)*ld],1.0/PetscPowInt(2,s));
366: }
367: #if !defined(PETSC_USE_COMPLEX)
368: }
369: #endif
370: }
371: #if !defined(PETSC_USE_COMPLEX)
372: /* If last diagonal entry is not in a block it will have been missed */
373: if (blockStruct[n-2] == 0) {
374: a = T[n-1+(n-1)*ld];
375: Troot[n-1+(n-1)*ld] = sqrt_obo(a,s);
376: }
377: #endif
378: PetscFunctionReturn(PETSC_SUCCESS);
379: }
381: /*
382: Nodes x and weights w for n-point Gauss-Legendre quadrature (Q is n*n workspace)
384: G. H. Golub and J. H. Welsch, Calculation of Gauss quadrature
385: rules, Math. Comp., 23(106):221-230, 1969.
386: */
387: static PetscErrorCode gauss_legendre(PetscBLASInt n,PetscScalar *x,PetscScalar *w,PetscScalar *Q)
388: {
389: PetscScalar v,a,*work;
390: PetscReal *eig,dummy;
391: PetscBLASInt k,ld=n,lwork,info;
392: #if defined(PETSC_USE_COMPLEX)
393: PetscReal *rwork,rdummy;
394: #endif
396: PetscFunctionBegin;
397: PetscCall(PetscArrayzero(Q,n*n));
398: for (k=1;k<n;k++) {
399: v = k/PetscSqrtReal(4.0*k*k-1.0);
400: Q[k+(k-1)*n] = v;
401: Q[(k-1)+k*n] = v;
402: }
404: /* workspace query and memory allocation */
405: lwork = -1;
406: #if defined(PETSC_USE_COMPLEX)
407: PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,&dummy,&a,&lwork,&rdummy,&info));
408: PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(a),&lwork));
409: PetscCall(PetscMalloc3(n,&eig,lwork,&work,PetscMax(1,3*n-2),&rwork));
410: #else
411: PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,&dummy,&a,&lwork,&info));
412: PetscCall(PetscBLASIntCast((PetscInt)a,&lwork));
413: PetscCall(PetscMalloc2(n,&eig,lwork,&work));
414: #endif
416: /* compute eigendecomposition */
417: #if defined(PETSC_USE_COMPLEX)
418: PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,eig,work,&lwork,rwork,&info));
419: #else
420: PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,eig,work,&lwork,&info));
421: #endif
422: SlepcCheckLapackInfo("syev",info);
424: for (k=0;k<n;k++) {
425: x[k] = eig[k];
426: w[k] = 2.0*Q[k*n]*Q[k*n];
427: }
428: #if defined(PETSC_USE_COMPLEX)
429: PetscCall(PetscFree3(eig,work,rwork));
430: #else
431: PetscCall(PetscFree2(eig,work));
432: #endif
433: PetscCall(PetscLogFlops(9.0*n*n*n+2.0*n*n*n));
434: PetscFunctionReturn(PETSC_SUCCESS);
435: }
437: /*
438: Pade approximation to log(1 + T) via partial fractions
439: */
440: static PetscErrorCode pade_approx(PetscBLASInt n,PetscScalar *T,PetscScalar *L,PetscBLASInt ld,PetscInt m,PetscScalar *work)
441: {
442: PetscScalar *K,*W,*nodes,*wts;
443: PetscBLASInt *ipiv,info,mm;
444: PetscInt i,j,k;
446: PetscFunctionBegin;
447: K = work;
448: W = work+n*n;
449: nodes = work+2*n*n;
450: wts = work+2*n*n+m;
451: ipiv = (PetscBLASInt*)(work+2*n*n+2*m);
452: PetscCall(PetscBLASIntCast(m,&mm));
453: PetscCall(gauss_legendre(mm,nodes,wts,L));
454: /* Convert from [-1,1] to [0,1] */
455: for (i=0;i<m;i++) {
456: nodes[i] = (nodes[i]+1.0)/2.0;
457: wts[i] = wts[i]/2.0;
458: }
459: PetscCall(PetscArrayzero(L,n*n));
460: for (k=0;k<m;k++) {
461: for (i=0;i<n;i++) for (j=0;j<n;j++) K[i+j*ld] = nodes[k]*T[i+j*ld];
462: for (i=0;i<n;i++) K[i+i*ld] += 1.0;
463: for (i=0;i<n;i++) for (j=0;j<n;j++) W[i+j*ld] = T[i+j*ld];
464: PetscCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,K,&n,ipiv,W,&n,&info));
465: for (i=0;i<n;i++) for (j=0;j<n;j++) L[i+j*ld] += wts[k]*W[i+j*ld];
466: }
467: PetscFunctionReturn(PETSC_SUCCESS);
468: }
470: /*
471: Recomputes diagonal blocks of T = X^(1/2^s) - 1 more accurately
472: */
473: static PetscErrorCode recompute_diag_blocks_log(PetscBLASInt n,PetscScalar *L,PetscScalar *T,PetscBLASInt ld,PetscInt *blockStruct)
474: {
475: PetscScalar a1,a2,a12,loga1,loga2,z,dd;
476: PetscInt j;
477: #if !defined(PETSC_USE_COMPLEX)
478: PetscInt last_block=0;
479: PetscScalar f,t;
480: #endif
482: PetscFunctionBegin;
483: for (j=0;j<n-1;j++) {
484: #if !defined(PETSC_USE_COMPLEX)
485: switch (blockStruct[j]) {
486: case 0: /* Not start of a block */
487: if (last_block != 0) {
488: last_block = 0;
489: } else { /* In a 1x1 block */
490: L[j+j*ld] = PetscLogScalar(T[j+j*ld]);
491: }
492: break;
493: case 1: /* Start of upper-tri block */
494: last_block = 1;
495: #endif
496: a1 = T[j+j*ld];
497: a2 = T[j+1+(j+1)*ld];
498: loga1 = PetscLogScalar(a1);
499: loga2 = PetscLogScalar(a2);
500: L[j+j*ld] = loga1;
501: L[j+1+(j+1)*ld] = loga2;
502: if ((PetscRealPart(a1)<0.0 && PetscImaginaryPart(a1)==0.0) || (PetscRealPart(a2)<0.0 && PetscImaginaryPart(a1)==0.0)) {
503: /* Problems with 2 x 2 formula for (1,2) block
504: since atanh is nonstandard, just redo diagonal part */
505: continue;
506: }
507: if (a1 == a2) {
508: a12 = T[j+(j+1)*ld]/a1;
509: } else if (PetscAbsScalar(a1)<0.5*PetscAbsScalar(a2) || PetscAbsScalar(a2)<0.5*PetscAbsScalar(a1)) {
510: a12 = T[j+(j+1)*ld]*(loga2-loga1)/(a2-a1);
511: } else { /* Close eigenvalues */
512: z = (a2-a1)/(a2+a1);
513: dd = 2.0*PetscAtanhScalar(z);
514: #if defined(PETSC_USE_COMPLEX)
515: dd += 2.0*PETSC_i*PETSC_PI*unwinding(loga2-loga1);
516: #endif
517: dd /= (a2-a1);
518: a12 = T[j+(j+1)*ld]*dd;
519: }
520: L[j+(j+1)*ld] = a12;
521: #if !defined(PETSC_USE_COMPLEX)
522: break;
523: case 2: /* Start of quasi-tri block */
524: last_block = 2;
525: f = 0.5*PetscLogScalar(T[j+j*ld]*T[j+j*ld]-T[j+(j+1)*ld]*T[j+1+j*ld]);
526: t = PetscAtan2Real(PetscSqrtScalar(-T[j+(j+1)*ld]*T[j+1+j*ld]),T[j+j*ld])/PetscSqrtScalar(-T[j+(j+1)*ld]*T[j+1+j*ld]);
527: L[j+j*ld] = f;
528: L[j+1+j*ld] = t*T[j+1+j*ld];
529: L[j+(j+1)*ld] = t*T[j+(j+1)*ld];
530: L[j+1+(j+1)*ld] = f;
531: }
532: #endif
533: }
534: PetscFunctionReturn(PETSC_SUCCESS);
535: }
536: /*
537: * Matrix logarithm implementation based on algorithm and matlab code by N. Higham and co-authors
538: *
539: * H. Al-Mohy and N. J. Higham, "Improved inverse scaling and squaring
540: * algorithms for the matrix logarithm", SIAM J. Sci. Comput. 34(4):C153-C169, 2012.
541: */
542: static PetscErrorCode FNLogmPade(FN fn,PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscBool firstonly)
543: {
544: PetscBLASInt k,sdim,lwork,info;
545: PetscScalar *wr,*wi=NULL,*W,*Q,*Troot,*L,*work,one=1.0,zero=0.0,alpha;
546: PetscInt i,j,s=0,m=0,*blockformat;
547: #if defined(PETSC_USE_COMPLEX)
548: PetscReal *rwork;
549: #endif
551: PetscFunctionBegin;
552: lwork = 3*n*n; /* gees needs only 5*n, but work is also passed to FNlogm_params */
553: k = firstonly? 1: n;
555: /* compute Schur decomposition A*Q = Q*T */
556: PetscCall(PetscCalloc7(n,&wr,n*k,&W,n*n,&Q,n*n,&Troot,n*n,&L,lwork,&work,n-1,&blockformat));
557: #if !defined(PETSC_USE_COMPLEX)
558: PetscCall(PetscMalloc1(n,&wi));
559: PetscCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,T,&ld,&sdim,wr,wi,Q,&ld,work,&lwork,NULL,&info));
560: #else
561: PetscCall(PetscMalloc1(n,&rwork));
562: PetscCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,T,&ld,&sdim,wr,Q,&ld,work,&lwork,rwork,NULL,&info));
563: #endif
564: SlepcCheckLapackInfo("gees",info);
566: #if !defined(PETSC_USE_COMPLEX)
567: /* check for negative real eigenvalues */
568: for (i=0;i<n;i++) {
569: PetscCheck(wr[i]>=0.0 || wi[i]!=0.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix has negative real eigenvalue; rerun with complex scalars");
570: }
571: #endif
573: /* get block structure of Schur factor */
574: PetscCall(qtri_struct(n,T,ld,blockformat));
576: /* get parameters */
577: PetscCall(FNlogm_params(fn,n,T,ld,wr,wi,100,&s,&m,Troot,work));
579: /* compute Troot - I = T(1/2^s) - I more accurately */
580: PetscCall(recompute_diag_blocks_sqrt(n,Troot,T,ld,blockformat,s));
582: /* compute Pade approximant */
583: PetscCall(pade_approx(n,Troot,L,ld,m,work));
585: /* scale back up, L = 2^s * L */
586: alpha = PetscPowInt(2,s);
587: for (i=0;i<n;i++) for (j=0;j<n;j++) L[i+j*ld] *= alpha;
589: /* recompute diagonal blocks */
590: PetscCall(recompute_diag_blocks_log(n,L,T,ld,blockformat));
592: /* backtransform B = Q*L*Q' */
593: PetscCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&k,&n,&one,L,&ld,Q,&ld,&zero,W,&ld));
594: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&k,&n,&one,Q,&ld,W,&ld,&zero,T,&ld));
596: /* flop count: Schur decomposition, and backtransform */
597: PetscCall(PetscLogFlops(25.0*n*n*n+4.0*n*n*k));
599: PetscCall(PetscFree7(wr,W,Q,Troot,L,work,blockformat));
600: #if !defined(PETSC_USE_COMPLEX)
601: PetscCall(PetscFree(wi));
602: #else
603: PetscCall(PetscFree(rwork));
604: #endif
605: PetscFunctionReturn(PETSC_SUCCESS);
606: }
608: static PetscErrorCode FNEvaluateFunctionMat_Log_Higham(FN fn,Mat A,Mat B)
609: {
610: PetscBLASInt n = 0;
611: PetscScalar *T;
612: PetscInt m;
614: PetscFunctionBegin;
615: if (A!=B) PetscCall(MatCopy(A,B,SAME_NONZERO_PATTERN));
616: PetscCall(MatDenseGetArray(B,&T));
617: PetscCall(MatGetSize(A,&m,NULL));
618: PetscCall(PetscBLASIntCast(m,&n));
619: PetscCall(FNLogmPade(fn,n,T,n,PETSC_FALSE));
620: PetscCall(MatDenseRestoreArray(B,&T));
621: PetscFunctionReturn(PETSC_SUCCESS);
622: }
624: static PetscErrorCode FNEvaluateFunctionMatVec_Log_Higham(FN fn,Mat A,Vec v)
625: {
626: PetscBLASInt n = 0;
627: PetscScalar *T;
628: PetscInt m;
629: Mat B;
631: PetscFunctionBegin;
632: PetscCall(FN_AllocateWorkMat(fn,A,&B));
633: PetscCall(MatDenseGetArray(B,&T));
634: PetscCall(MatGetSize(A,&m,NULL));
635: PetscCall(PetscBLASIntCast(m,&n));
636: PetscCall(FNLogmPade(fn,n,T,n,PETSC_TRUE));
637: PetscCall(MatDenseRestoreArray(B,&T));
638: PetscCall(MatGetColumnVector(B,v,0));
639: PetscCall(FN_FreeWorkMat(fn,&B));
640: PetscFunctionReturn(PETSC_SUCCESS);
641: }
643: static PetscErrorCode FNView_Log(FN fn,PetscViewer viewer)
644: {
645: PetscBool isascii;
646: char str[50];
647: const char *methodname[] = {
648: "scaling & squaring, [m/m] Pade approximant (Higham)"
649: };
650: const int nmeth=PETSC_STATIC_ARRAY_LENGTH(methodname);
652: PetscFunctionBegin;
653: PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii));
654: if (isascii) {
655: if (fn->beta==(PetscScalar)1.0) {
656: if (fn->alpha==(PetscScalar)1.0) PetscCall(PetscViewerASCIIPrintf(viewer," logarithm: log(x)\n"));
657: else {
658: PetscCall(SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE));
659: PetscCall(PetscViewerASCIIPrintf(viewer," logarithm: log(%s*x)\n",str));
660: }
661: } else {
662: PetscCall(SlepcSNPrintfScalar(str,sizeof(str),fn->beta,PETSC_TRUE));
663: if (fn->alpha==(PetscScalar)1.0) PetscCall(PetscViewerASCIIPrintf(viewer," logarithm: %s*log(x)\n",str));
664: else {
665: PetscCall(PetscViewerASCIIPrintf(viewer," logarithm: %s",str));
666: PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE));
667: PetscCall(SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE));
668: PetscCall(PetscViewerASCIIPrintf(viewer,"*log(%s*x)\n",str));
669: PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE));
670: }
671: }
672: if (fn->method<nmeth) PetscCall(PetscViewerASCIIPrintf(viewer," computing matrix functions with: %s\n",methodname[fn->method]));
673: }
674: PetscFunctionReturn(PETSC_SUCCESS);
675: }
677: SLEPC_EXTERN PetscErrorCode FNCreate_Log(FN fn)
678: {
679: PetscFunctionBegin;
680: fn->ops->evaluatefunction = FNEvaluateFunction_Log;
681: fn->ops->evaluatederivative = FNEvaluateDerivative_Log;
682: fn->ops->evaluatefunctionmat[0] = FNEvaluateFunctionMat_Log_Higham;
683: fn->ops->evaluatefunctionmatvec[0] = FNEvaluateFunctionMatVec_Log_Higham;
684: fn->ops->view = FNView_Log;
685: PetscFunctionReturn(PETSC_SUCCESS);
686: }