Line data Source code
1 : /*
2 : - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3 : SLEPc - Scalable Library for Eigenvalue Problem Computations
4 : Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
5 :
6 : This file is part of SLEPc.
7 : SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8 : - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9 : */
10 : /*
11 : Logarithm function log(x)
12 : */
13 :
14 : #include <slepc/private/fnimpl.h> /*I "slepcfn.h" I*/
15 : #include <slepcblaslapack.h>
16 :
17 2 : static PetscErrorCode FNEvaluateFunction_Log(FN fn,PetscScalar x,PetscScalar *y)
18 : {
19 2 : PetscFunctionBegin;
20 : #if !defined(PETSC_USE_COMPLEX)
21 2 : PetscCheck(x>=0.0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Function not defined in the requested value");
22 : #endif
23 2 : *y = PetscLogScalar(x);
24 2 : PetscFunctionReturn(PETSC_SUCCESS);
25 : }
26 :
27 2 : static PetscErrorCode FNEvaluateDerivative_Log(FN fn,PetscScalar x,PetscScalar *y)
28 : {
29 2 : PetscFunctionBegin;
30 2 : PetscCheck(x!=0.0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Derivative not defined in the requested value");
31 : #if !defined(PETSC_USE_COMPLEX)
32 2 : PetscCheck(x>0.0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Derivative not defined in the requested value");
33 : #endif
34 2 : *y = 1.0/x;
35 2 : PetscFunctionReturn(PETSC_SUCCESS);
36 : }
37 :
38 : /*
39 : Block structure of a quasitriangular matrix T. Returns a list of n-1 numbers, where
40 : structure(j) encodes the block type of the j:j+1,j:j+1 diagonal block as one of:
41 : 0 - not the start of a block
42 : 1 - start of a 2x2 triangular block
43 : 2 - start of a 2x2 quasi-triangular (full) block
44 : */
45 4 : static PetscErrorCode qtri_struct(PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscInt *structure)
46 : {
47 4 : PetscBLASInt j;
48 :
49 4 : PetscFunctionBegin;
50 : #if defined(PETSC_USE_COMPLEX)
51 : for (j=0;j<n-1;j++) structure[j] = 1;
52 : #else
53 4 : if (n==1) PetscFunctionReturn(PETSC_SUCCESS);
54 4 : else if (n==2) {
55 0 : structure[0] = (T[1]==0.0)? 1: 2;
56 0 : PetscFunctionReturn(PETSC_SUCCESS);
57 : }
58 : j = 0;
59 224 : while (j<n-2) {
60 220 : if (T[j+1+j*ld] != 0.0) { /* Start of a 2x2 full block */
61 72 : structure[j++] = 2;
62 72 : structure[j++] = 0;
63 148 : } else if (T[j+1+j*ld] == 0.0 && T[j+2+(j+1)*ld] == 0.0) { /* Start of a 2x2 triangular block */
64 146 : structure[j++] = 1;
65 : } else { /* Next block must start a 2x2 full block */
66 2 : structure[j++] = 0;
67 : }
68 : }
69 4 : if (T[n-1+(n-2)*ld] != 0.0) { /* 2x2 full block at the end */
70 2 : structure[n-2] = 2;
71 2 : } else if (structure[n-3] == 0 || structure[n-3] == 1) {
72 2 : structure[n-2] = 1;
73 : }
74 : #endif
75 4 : PetscFunctionReturn(PETSC_SUCCESS);
76 : }
77 :
78 : /*
79 : Compute scaling parameter (s) and order of Pade approximant (m).
80 : wr,wi is overwritten. Required workspace is 3*n*n.
81 : On output, Troot contains the sth square root of T.
82 : */
83 4 : static PetscErrorCode FNlogm_params(FN fn,PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscScalar *wr,PetscScalar *wi,PetscInt maxroots,PetscInt *s,PetscInt *m,PetscScalar *Troot,PetscScalar *work)
84 : {
85 4 : PetscInt i,j,k,p,s0;
86 4 : PetscReal inrm,eta,a2,a3,a4,d2,d3,d4,d5;
87 4 : PetscScalar *TrootmI=work+2*n*n;
88 4 : PetscBool foundm=PETSC_FALSE,more;
89 4 : PetscRandom rand;
90 4 : const PetscReal xvals[] = { 1.586970738772063e-005, 2.313807884242979e-003, 1.938179313533253e-002,
91 : 6.209171588994762e-002, 1.276404810806775e-001, 2.060962623452836e-001, 2.879093714241194e-001 };
92 4 : const PetscInt mmax=PETSC_STATIC_ARRAY_LENGTH(xvals);
93 :
94 4 : PetscFunctionBegin;
95 4 : PetscCall(PetscRandomCreate(PETSC_COMM_SELF,&rand));
96 : /* get initial s0 so that T^(1/2^s0) < xvals(mmax) */
97 4 : *s = 0;
98 16 : do {
99 16 : inrm = SlepcAbsEigenvalue(wr[0]-PetscRealConstant(1.0),wi[0]);
100 1216 : for (i=1;i<n;i++) inrm = PetscMax(inrm,SlepcAbsEigenvalue(wr[i]-PetscRealConstant(1.0),wi[i]));
101 16 : if (inrm < xvals[mmax-1]) break;
102 912 : for (i=0;i<n;i++) {
103 : #if defined(PETSC_USE_COMPLEX)
104 : wr[i] = PetscSqrtScalar(wr[i]);
105 : #else
106 : #if defined(PETSC_HAVE_COMPLEX)
107 900 : PetscComplex z = PetscSqrtComplex(PetscCMPLX(wr[i],wi[i]));
108 900 : wr[i] = PetscRealPartComplex(z);
109 900 : wi[i] = PetscImaginaryPartComplex(z);
110 : #else
111 : SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"This operation requires a compiler with C99 or C++ complex support");
112 : #endif
113 : #endif
114 : }
115 12 : *s = *s + 1;
116 12 : } while (*s<maxroots);
117 4 : s0 = *s;
118 4 : if (*s == maxroots) PetscCall(PetscInfo(fn,"Too many matrix square roots\n"));
119 :
120 : /* Troot = T */
121 304 : for (j=0;j<n;j++) PetscCall(PetscArraycpy(Troot+j*ld,T+j*ld,PetscMin(j+2,n)));
122 16 : for (k=1;k<=PetscMin(*s,maxroots);k++) PetscCall(FNSqrtmSchur(fn,n,Troot,ld,PETSC_FALSE));
123 : /* Compute value of s and m needed */
124 : /* TrootmI = Troot - I */
125 304 : for (j=0;j<n;j++) {
126 300 : PetscCall(PetscArraycpy(TrootmI+j*ld,Troot+j*ld,PetscMin(j+2,n)));
127 300 : TrootmI[j+j*ld] -= 1.0;
128 : }
129 4 : PetscCall(SlepcNormAm(n,TrootmI,2,work,rand,&d2));
130 4 : d2 = PetscPowReal(d2,1.0/2.0);
131 4 : PetscCall(SlepcNormAm(n,TrootmI,3,work,rand,&d3));
132 4 : d3 = PetscPowReal(d3,1.0/3.0);
133 4 : a2 = PetscMax(d2,d3);
134 4 : if (a2 <= xvals[1]) {
135 0 : *m = (a2 <= xvals[0])? 1: 2;
136 0 : foundm = PETSC_TRUE;
137 : }
138 4 : p = 0;
139 10 : while (!foundm) {
140 10 : more = PETSC_FALSE; /* More norm checks needed */
141 10 : if (*s > s0) {
142 6 : PetscCall(SlepcNormAm(n,TrootmI,3,work,rand,&d3));
143 6 : d3 = PetscPowReal(d3,1.0/3.0);
144 : }
145 10 : PetscCall(SlepcNormAm(n,TrootmI,4,work,rand,&d4));
146 10 : d4 = PetscPowReal(d4,1.0/4.0);
147 10 : a3 = PetscMax(d3,d4);
148 10 : if (a3 <= xvals[mmax-1]) {
149 28 : for (j=2;j<mmax;j++) if (a3 <= xvals[j]) break;
150 6 : if (j <= 5) {
151 2 : *m = j+1;
152 2 : break;
153 4 : } else if (a3/2.0 <= xvals[4] && p < 2) {
154 2 : more = PETSC_TRUE;
155 2 : p = p + 1;
156 : }
157 : }
158 2 : if (!more) {
159 6 : PetscCall(SlepcNormAm(n,TrootmI,5,work,rand,&d5));
160 6 : d5 = PetscPowReal(d5,1.0/5.0);
161 6 : a4 = PetscMax(d4,d5);
162 6 : eta = PetscMin(a3,a4);
163 6 : if (eta <= xvals[mmax-1]) {
164 4 : for (j=5;j<mmax;j++) if (eta <= xvals[j]) break;
165 2 : *m = j + 1;
166 2 : break;
167 : }
168 : }
169 6 : if (*s == maxroots) {
170 0 : PetscCall(PetscInfo(fn,"Too many matrix square roots\n"));
171 0 : *m = mmax; /* No good value found so take largest */
172 0 : break;
173 : }
174 6 : PetscCall(FNSqrtmSchur(fn,n,Troot,ld,PETSC_FALSE));
175 : /* TrootmI = Troot - I */
176 456 : for (j=0;j<n;j++) {
177 450 : PetscCall(PetscArraycpy(TrootmI+j*ld,Troot+j*ld,PetscMin(j+2,n)));
178 450 : TrootmI[j+j*ld] -= 1.0;
179 : }
180 6 : *s = *s + 1;
181 : }
182 4 : PetscCall(PetscRandomDestroy(&rand));
183 4 : PetscFunctionReturn(PETSC_SUCCESS);
184 : }
185 :
186 : #if !defined(PETSC_USE_COMPLEX)
187 : /*
188 : Computes a^(1/2^s) - 1 accurately, avoiding subtractive cancellation
189 : */
190 2 : static PetscScalar sqrt_obo(PetscScalar a,PetscInt s)
191 : {
192 2 : PetscScalar val,z0,r;
193 2 : PetscReal angle;
194 2 : PetscInt i,n0;
195 :
196 2 : PetscFunctionBegin;
197 2 : if (s == 0) val = a-1.0;
198 : else {
199 2 : n0 = s;
200 2 : angle = PetscAtan2Real(PetscImaginaryPart(a),PetscRealPart(a));
201 2 : if (angle >= PETSC_PI/2.0) {
202 0 : a = PetscSqrtScalar(a);
203 0 : n0 = s - 1;
204 : }
205 2 : z0 = a - 1.0;
206 2 : a = PetscSqrtScalar(a);
207 2 : r = 1.0 + a;
208 8 : for (i=0;i<n0-1;i++) {
209 6 : a = PetscSqrtScalar(a);
210 6 : r = r*(1.0+a);
211 : }
212 2 : val = z0/r;
213 : }
214 2 : PetscFunctionReturn(val);
215 : }
216 : #endif
217 :
218 : /*
219 : Square root of 2x2 matrix T from block diagonal of Schur form. Overwrites T.
220 : */
221 1036 : static PetscErrorCode sqrtm_tbt(PetscScalar *T)
222 : {
223 1036 : PetscScalar t11,t12,t21,t22,r11,r22;
224 : #if !defined(PETSC_USE_COMPLEX)
225 1036 : PetscScalar mu;
226 : #endif
227 :
228 1036 : PetscFunctionBegin;
229 1036 : t11 = T[0]; t21 = T[1]; t12 = T[2]; t22 = T[3];
230 1036 : if (t21 != 0.0) {
231 : /* Compute square root of 2x2 quasitriangular block */
232 : /* The algorithm assumes the special structure of real Schur form */
233 : #if defined(PETSC_USE_COMPLEX)
234 : SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Should not reach this line in complex scalars");
235 : #else
236 296 : mu = PetscSqrtReal(-t21*t12);
237 296 : if (t11 > 0.0) r11 = PetscSqrtReal((t11+SlepcAbsEigenvalue(t11,mu))/2.0);
238 0 : else r11 = mu / PetscSqrtReal(2.0*(-t11+SlepcAbsEigenvalue(t11,mu)));
239 296 : T[0] = r11;
240 296 : T[1] = t21/(2.0*r11);
241 296 : T[2] = t12/(2.0*r11);
242 296 : T[3] = r11;
243 : #endif
244 : } else {
245 : /* Compute square root of 2x2 upper triangular block */
246 740 : r11 = PetscSqrtScalar(t11);
247 740 : r22 = PetscSqrtScalar(t22);
248 740 : T[0] = r11;
249 740 : T[2] = t12/(r11+r22);
250 740 : T[3] = r22;
251 : }
252 1036 : PetscFunctionReturn(PETSC_SUCCESS);
253 : }
254 :
255 : #if defined(PETSC_USE_COMPLEX)
256 : /*
257 : Unwinding number of z
258 : */
259 : static inline PetscReal unwinding(PetscScalar z)
260 : {
261 : PetscReal u;
262 :
263 : PetscFunctionBegin;
264 : u = PetscCeilReal((PetscImaginaryPart(z)-PETSC_PI)/(2.0*PETSC_PI));
265 : PetscFunctionReturn(u);
266 : }
267 : #endif
268 :
269 : /*
270 : Power of 2-by-2 upper triangular matrix A.
271 : Returns the (1,2) element of the pth power of A, where p is an arbitrary real number
272 : */
273 148 : static PetscScalar powerm2by2(PetscScalar A11,PetscScalar A22,PetscScalar A12,PetscReal p)
274 : {
275 148 : PetscScalar a1,a2,a1p,a2p,loga1,loga2,w,dd,x12;
276 :
277 148 : PetscFunctionBegin;
278 148 : a1 = A11;
279 148 : a2 = A22;
280 148 : if (a1 == a2) {
281 146 : x12 = p*A12*PetscPowScalarReal(a1,p-1);
282 2 : } else if (PetscAbsScalar(a1) < 0.5*PetscAbsScalar(a2) || PetscAbsScalar(a2) < 0.5*PetscAbsScalar(a1)) {
283 0 : a1p = PetscPowScalarReal(a1,p);
284 0 : a2p = PetscPowScalarReal(a2,p);
285 0 : x12 = A12*(a2p-a1p)/(a2-a1);
286 : } else { /* Close eigenvalues */
287 2 : loga1 = PetscLogScalar(a1);
288 2 : loga2 = PetscLogScalar(a2);
289 2 : w = PetscAtanhScalar((a2-a1)/(a2+a1));
290 : #if defined(PETSC_USE_COMPLEX)
291 : w += PETSC_i*PETSC_PI*unwinding(loga2-loga1);
292 : #endif
293 2 : dd = 2.0*PetscExpScalar(p*(loga1+loga2)/2.0)*PetscSinhScalar(p*w)/(a2-a1);
294 2 : x12 = A12*dd;
295 : }
296 148 : PetscFunctionReturn(x12);
297 : }
298 :
299 : /*
300 : Recomputes diagonal blocks of T = X^(1/2^s) - 1 more accurately
301 : */
302 4 : static PetscErrorCode recompute_diag_blocks_sqrt(PetscBLASInt n,PetscScalar *Troot,PetscScalar *T,PetscBLASInt ld,PetscInt *blockStruct,PetscInt s)
303 : {
304 4 : PetscScalar A[4],P[4],M[4],Z0[4],det;
305 4 : PetscInt i,j;
306 : #if !defined(PETSC_USE_COMPLEX)
307 4 : PetscInt last_block=0;
308 4 : PetscScalar a;
309 : #endif
310 :
311 4 : PetscFunctionBegin;
312 300 : for (j=0;j<n-1;j++) {
313 : #if !defined(PETSC_USE_COMPLEX)
314 296 : switch (blockStruct[j]) {
315 74 : case 0: /* Not start of a block */
316 74 : if (last_block != 0) {
317 : last_block = 0;
318 : } else { /* In a 1x1 block */
319 2 : a = T[j+j*ld];
320 2 : Troot[j+j*ld] = sqrt_obo(a,s);
321 : }
322 : break;
323 222 : default: /* In a 2x2 block */
324 222 : last_block = blockStruct[j];
325 : #endif
326 222 : if (s == 0) {
327 0 : Troot[j+j*ld] = T[j+j*ld]-1.0;
328 0 : Troot[j+1+j*ld] = T[j+1+j*ld];
329 0 : Troot[j+(j+1)*ld] = T[j+(j+1)*ld];
330 0 : Troot[j+1+(j+1)*ld] = T[j+1+(j+1)*ld]-1.0;
331 0 : continue;
332 : }
333 222 : A[0] = T[j+j*ld]; A[1] = T[j+1+j*ld]; A[2] = T[j+(j+1)*ld]; A[3] = T[j+1+(j+1)*ld];
334 222 : PetscCall(sqrtm_tbt(A));
335 : /* Z0 = A - I */
336 222 : Z0[0] = A[0]-1.0; Z0[1] = A[1]; Z0[2] = A[2]; Z0[3] = A[3]-1.0;
337 222 : if (s == 1) {
338 0 : Troot[j+j*ld] = Z0[0];
339 0 : Troot[j+1+j*ld] = Z0[1];
340 0 : Troot[j+(j+1)*ld] = Z0[2];
341 0 : Troot[j+1+(j+1)*ld] = Z0[3];
342 0 : continue;
343 : }
344 222 : PetscCall(sqrtm_tbt(A));
345 : /* P = A + I */
346 222 : P[0] = A[0]+1.0; P[1] = A[1]; P[2] = A[2]; P[3] = A[3]+1.0;
347 814 : for (i=0;i<s-2;i++) {
348 592 : PetscCall(sqrtm_tbt(A));
349 : /* P = P*(I + A) */
350 592 : M[0] = P[0]*(A[0]+1.0)+P[2]*A[1];
351 592 : M[1] = P[1]*(A[0]+1.0)+P[3]*A[1];
352 592 : M[2] = P[0]*A[2]+P[2]*(A[3]+1.0);
353 592 : M[3] = P[1]*A[2]+P[3]*(A[3]+1.0);
354 592 : P[0] = M[0]; P[1] = M[1]; P[2] = M[2]; P[3] = M[3];
355 : }
356 : /* Troot(j:j+1,j:j+1) = Z0 / P (via Cramer) */
357 222 : det = P[0]*P[3]-P[1]*P[2];
358 222 : Troot[j+j*ld] = (Z0[0]*P[3]-P[1]*Z0[2])/det;
359 222 : Troot[j+(j+1)*ld] = (P[0]*Z0[2]-Z0[0]*P[2])/det;
360 222 : Troot[j+1+j*ld] = (Z0[1]*P[3]-P[1]*Z0[3])/det;
361 222 : Troot[j+1+(j+1)*ld] = (P[0]*Z0[3]-Z0[1]*P[2])/det;
362 : /* If block is upper triangular recompute the (1,2) element.
363 : Skip when T(j,j) or T(j+1,j+1) < 0 since the implementation of atanh is nonstandard */
364 222 : if (T[j+1+j*ld]==0.0 && PetscRealPart(T[j+j*ld])>=0.0 && PetscRealPart(T[j+1+(j+1)*ld])>=0.0) {
365 148 : Troot[j+(j+1)*ld] = powerm2by2(T[j+j*ld],T[j+1+(j+1)*ld],T[j+(j+1)*ld],1.0/PetscPowInt(2,s));
366 : }
367 : #if !defined(PETSC_USE_COMPLEX)
368 : }
369 : #endif
370 : }
371 : #if !defined(PETSC_USE_COMPLEX)
372 : /* If last diagonal entry is not in a block it will have been missed */
373 4 : if (blockStruct[n-2] == 0) {
374 0 : a = T[n-1+(n-1)*ld];
375 0 : Troot[n-1+(n-1)*ld] = sqrt_obo(a,s);
376 : }
377 : #endif
378 4 : PetscFunctionReturn(PETSC_SUCCESS);
379 : }
380 :
381 : /*
382 : Nodes x and weights w for n-point Gauss-Legendre quadrature (Q is n*n workspace)
383 :
384 : G. H. Golub and J. H. Welsch, Calculation of Gauss quadrature
385 : rules, Math. Comp., 23(106):221-230, 1969.
386 : */
387 4 : static PetscErrorCode gauss_legendre(PetscBLASInt n,PetscScalar *x,PetscScalar *w,PetscScalar *Q)
388 : {
389 4 : PetscScalar v,a,*work;
390 4 : PetscReal *eig,dummy;
391 4 : PetscBLASInt k,ld=n,lwork,info;
392 : #if defined(PETSC_USE_COMPLEX)
393 : PetscReal *rwork,rdummy;
394 : #endif
395 :
396 4 : PetscFunctionBegin;
397 4 : PetscCall(PetscArrayzero(Q,n*n));
398 26 : for (k=1;k<n;k++) {
399 22 : v = k/PetscSqrtReal(4.0*k*k-1.0);
400 22 : Q[k+(k-1)*n] = v;
401 22 : Q[(k-1)+k*n] = v;
402 : }
403 :
404 : /* workspace query and memory allocation */
405 4 : lwork = -1;
406 : #if defined(PETSC_USE_COMPLEX)
407 : PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,&dummy,&a,&lwork,&rdummy,&info));
408 : PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(a),&lwork));
409 : PetscCall(PetscMalloc3(n,&eig,lwork,&work,PetscMax(1,3*n-2),&rwork));
410 : #else
411 4 : PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,&dummy,&a,&lwork,&info));
412 4 : PetscCall(PetscBLASIntCast((PetscInt)a,&lwork));
413 4 : PetscCall(PetscMalloc2(n,&eig,lwork,&work));
414 : #endif
415 :
416 : /* compute eigendecomposition */
417 : #if defined(PETSC_USE_COMPLEX)
418 : PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,eig,work,&lwork,rwork,&info));
419 : #else
420 4 : PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,eig,work,&lwork,&info));
421 : #endif
422 4 : SlepcCheckLapackInfo("syev",info);
423 :
424 30 : for (k=0;k<n;k++) {
425 26 : x[k] = eig[k];
426 26 : w[k] = 2.0*Q[k*n]*Q[k*n];
427 : }
428 : #if defined(PETSC_USE_COMPLEX)
429 : PetscCall(PetscFree3(eig,work,rwork));
430 : #else
431 4 : PetscCall(PetscFree2(eig,work));
432 : #endif
433 4 : PetscCall(PetscLogFlops(9.0*n*n*n+2.0*n*n*n));
434 4 : PetscFunctionReturn(PETSC_SUCCESS);
435 : }
436 :
437 : /*
438 : Pade approximation to log(1 + T) via partial fractions
439 : */
440 4 : static PetscErrorCode pade_approx(PetscBLASInt n,PetscScalar *T,PetscScalar *L,PetscBLASInt ld,PetscInt m,PetscScalar *work)
441 : {
442 4 : PetscScalar *K,*W,*nodes,*wts;
443 4 : PetscBLASInt *ipiv,info,mm;
444 4 : PetscInt i,j,k;
445 :
446 4 : PetscFunctionBegin;
447 4 : K = work;
448 4 : W = work+n*n;
449 4 : nodes = work+2*n*n;
450 4 : wts = work+2*n*n+m;
451 4 : ipiv = (PetscBLASInt*)(work+2*n*n+2*m);
452 4 : PetscCall(PetscBLASIntCast(m,&mm));
453 4 : PetscCall(gauss_legendre(mm,nodes,wts,L));
454 : /* Convert from [-1,1] to [0,1] */
455 30 : for (i=0;i<m;i++) {
456 26 : nodes[i] = (nodes[i]+1.0)/2.0;
457 26 : wts[i] = wts[i]/2.0;
458 : }
459 4 : PetscCall(PetscArrayzero(L,n*n));
460 30 : for (k=0;k<m;k++) {
461 148226 : for (i=0;i<n;i++) for (j=0;j<n;j++) K[i+j*ld] = nodes[k]*T[i+j*ld];
462 1976 : for (i=0;i<n;i++) K[i+i*ld] += 1.0;
463 148226 : for (i=0;i<n;i++) for (j=0;j<n;j++) W[i+j*ld] = T[i+j*ld];
464 26 : PetscCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,K,&n,ipiv,W,&n,&info));
465 148226 : for (i=0;i<n;i++) for (j=0;j<n;j++) L[i+j*ld] += wts[k]*W[i+j*ld];
466 : }
467 4 : PetscFunctionReturn(PETSC_SUCCESS);
468 : }
469 :
470 : /*
471 : Recomputes diagonal blocks of T = X^(1/2^s) - 1 more accurately
472 : */
473 4 : static PetscErrorCode recompute_diag_blocks_log(PetscBLASInt n,PetscScalar *L,PetscScalar *T,PetscBLASInt ld,PetscInt *blockStruct)
474 : {
475 4 : PetscScalar a1,a2,a12,loga1,loga2,z,dd;
476 4 : PetscInt j;
477 : #if !defined(PETSC_USE_COMPLEX)
478 4 : PetscInt last_block=0;
479 4 : PetscScalar f,t;
480 : #endif
481 :
482 4 : PetscFunctionBegin;
483 300 : for (j=0;j<n-1;j++) {
484 : #if !defined(PETSC_USE_COMPLEX)
485 296 : switch (blockStruct[j]) {
486 74 : case 0: /* Not start of a block */
487 74 : if (last_block != 0) {
488 : last_block = 0;
489 : } else { /* In a 1x1 block */
490 2 : L[j+j*ld] = PetscLogScalar(T[j+j*ld]);
491 : }
492 : break;
493 148 : case 1: /* Start of upper-tri block */
494 148 : last_block = 1;
495 : #endif
496 148 : a1 = T[j+j*ld];
497 148 : a2 = T[j+1+(j+1)*ld];
498 148 : loga1 = PetscLogScalar(a1);
499 148 : loga2 = PetscLogScalar(a2);
500 148 : L[j+j*ld] = loga1;
501 148 : L[j+1+(j+1)*ld] = loga2;
502 148 : if ((PetscRealPart(a1)<0.0 && PetscImaginaryPart(a1)==0.0) || (PetscRealPart(a2)<0.0 && PetscImaginaryPart(a1)==0.0)) {
503 : /* Problems with 2 x 2 formula for (1,2) block
504 : since atanh is nonstandard, just redo diagonal part */
505 0 : continue;
506 : }
507 148 : if (a1 == a2) {
508 146 : a12 = T[j+(j+1)*ld]/a1;
509 2 : } else if (PetscAbsScalar(a1)<0.5*PetscAbsScalar(a2) || PetscAbsScalar(a2)<0.5*PetscAbsScalar(a1)) {
510 0 : a12 = T[j+(j+1)*ld]*(loga2-loga1)/(a2-a1);
511 : } else { /* Close eigenvalues */
512 2 : z = (a2-a1)/(a2+a1);
513 2 : dd = 2.0*PetscAtanhScalar(z);
514 : #if defined(PETSC_USE_COMPLEX)
515 : dd += 2.0*PETSC_i*PETSC_PI*unwinding(loga2-loga1);
516 : #endif
517 2 : dd /= (a2-a1);
518 2 : a12 = T[j+(j+1)*ld]*dd;
519 : }
520 148 : L[j+(j+1)*ld] = a12;
521 : #if !defined(PETSC_USE_COMPLEX)
522 148 : break;
523 74 : case 2: /* Start of quasi-tri block */
524 74 : last_block = 2;
525 74 : f = 0.5*PetscLogScalar(T[j+j*ld]*T[j+j*ld]-T[j+(j+1)*ld]*T[j+1+j*ld]);
526 74 : t = PetscAtan2Real(PetscSqrtScalar(-T[j+(j+1)*ld]*T[j+1+j*ld]),T[j+j*ld])/PetscSqrtScalar(-T[j+(j+1)*ld]*T[j+1+j*ld]);
527 74 : L[j+j*ld] = f;
528 74 : L[j+1+j*ld] = t*T[j+1+j*ld];
529 74 : L[j+(j+1)*ld] = t*T[j+(j+1)*ld];
530 74 : L[j+1+(j+1)*ld] = f;
531 : }
532 : #endif
533 296 : }
534 4 : PetscFunctionReturn(PETSC_SUCCESS);
535 : }
536 : /*
537 : * Matrix logarithm implementation based on algorithm and matlab code by N. Higham and co-authors
538 : *
539 : * H. Al-Mohy and N. J. Higham, "Improved inverse scaling and squaring
540 : * algorithms for the matrix logarithm", SIAM J. Sci. Comput. 34(4):C153-C169, 2012.
541 : */
542 4 : static PetscErrorCode FNLogmPade(FN fn,PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscBool firstonly)
543 : {
544 4 : PetscBLASInt k,sdim,lwork,info;
545 4 : PetscScalar *wr,*wi=NULL,*W,*Q,*Troot,*L,*work,one=1.0,zero=0.0,alpha;
546 4 : PetscInt i,j,s=0,m=0,*blockformat;
547 : #if defined(PETSC_USE_COMPLEX)
548 : PetscReal *rwork;
549 : #endif
550 :
551 4 : PetscFunctionBegin;
552 4 : lwork = 3*n*n; /* gees needs only 5*n, but work is also passed to FNlogm_params */
553 4 : k = firstonly? 1: n;
554 :
555 : /* compute Schur decomposition A*Q = Q*T */
556 4 : PetscCall(PetscCalloc7(n,&wr,n*k,&W,n*n,&Q,n*n,&Troot,n*n,&L,lwork,&work,n-1,&blockformat));
557 : #if !defined(PETSC_USE_COMPLEX)
558 4 : PetscCall(PetscMalloc1(n,&wi));
559 4 : PetscCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,T,&ld,&sdim,wr,wi,Q,&ld,work,&lwork,NULL,&info));
560 : #else
561 : PetscCall(PetscMalloc1(n,&rwork));
562 : PetscCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,T,&ld,&sdim,wr,Q,&ld,work,&lwork,rwork,NULL,&info));
563 : #endif
564 4 : SlepcCheckLapackInfo("gees",info);
565 :
566 : #if !defined(PETSC_USE_COMPLEX)
567 : /* check for negative real eigenvalues */
568 304 : for (i=0;i<n;i++) {
569 300 : PetscCheck(wr[i]>=0.0 || wi[i]!=0.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix has negative real eigenvalue; rerun with complex scalars");
570 : }
571 : #endif
572 :
573 : /* get block structure of Schur factor */
574 4 : PetscCall(qtri_struct(n,T,ld,blockformat));
575 :
576 : /* get parameters */
577 4 : PetscCall(FNlogm_params(fn,n,T,ld,wr,wi,100,&s,&m,Troot,work));
578 :
579 : /* compute Troot - I = T(1/2^s) - I more accurately */
580 4 : PetscCall(recompute_diag_blocks_sqrt(n,Troot,T,ld,blockformat,s));
581 :
582 : /* compute Pade approximant */
583 4 : PetscCall(pade_approx(n,Troot,L,ld,m,work));
584 :
585 : /* scale back up, L = 2^s * L */
586 4 : alpha = PetscPowInt(2,s);
587 22804 : for (i=0;i<n;i++) for (j=0;j<n;j++) L[i+j*ld] *= alpha;
588 :
589 : /* recompute diagonal blocks */
590 4 : PetscCall(recompute_diag_blocks_log(n,L,T,ld,blockformat));
591 :
592 : /* backtransform B = Q*L*Q' */
593 4 : PetscCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&k,&n,&one,L,&ld,Q,&ld,&zero,W,&ld));
594 4 : PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&k,&n,&one,Q,&ld,W,&ld,&zero,T,&ld));
595 :
596 : /* flop count: Schur decomposition, and backtransform */
597 4 : PetscCall(PetscLogFlops(25.0*n*n*n+4.0*n*n*k));
598 :
599 4 : PetscCall(PetscFree7(wr,W,Q,Troot,L,work,blockformat));
600 : #if !defined(PETSC_USE_COMPLEX)
601 4 : PetscCall(PetscFree(wi));
602 : #else
603 : PetscCall(PetscFree(rwork));
604 : #endif
605 4 : PetscFunctionReturn(PETSC_SUCCESS);
606 : }
607 :
608 2 : static PetscErrorCode FNEvaluateFunctionMat_Log_Higham(FN fn,Mat A,Mat B)
609 : {
610 2 : PetscBLASInt n = 0;
611 2 : PetscScalar *T;
612 2 : PetscInt m;
613 :
614 2 : PetscFunctionBegin;
615 2 : if (A!=B) PetscCall(MatCopy(A,B,SAME_NONZERO_PATTERN));
616 2 : PetscCall(MatDenseGetArray(B,&T));
617 2 : PetscCall(MatGetSize(A,&m,NULL));
618 2 : PetscCall(PetscBLASIntCast(m,&n));
619 2 : PetscCall(FNLogmPade(fn,n,T,n,PETSC_FALSE));
620 2 : PetscCall(MatDenseRestoreArray(B,&T));
621 2 : PetscFunctionReturn(PETSC_SUCCESS);
622 : }
623 :
624 2 : static PetscErrorCode FNEvaluateFunctionMatVec_Log_Higham(FN fn,Mat A,Vec v)
625 : {
626 2 : PetscBLASInt n = 0;
627 2 : PetscScalar *T;
628 2 : PetscInt m;
629 2 : Mat B;
630 :
631 2 : PetscFunctionBegin;
632 2 : PetscCall(FN_AllocateWorkMat(fn,A,&B));
633 2 : PetscCall(MatDenseGetArray(B,&T));
634 2 : PetscCall(MatGetSize(A,&m,NULL));
635 2 : PetscCall(PetscBLASIntCast(m,&n));
636 2 : PetscCall(FNLogmPade(fn,n,T,n,PETSC_TRUE));
637 2 : PetscCall(MatDenseRestoreArray(B,&T));
638 2 : PetscCall(MatGetColumnVector(B,v,0));
639 2 : PetscCall(FN_FreeWorkMat(fn,&B));
640 2 : PetscFunctionReturn(PETSC_SUCCESS);
641 : }
642 :
643 4 : static PetscErrorCode FNView_Log(FN fn,PetscViewer viewer)
644 : {
645 4 : PetscBool isascii;
646 4 : char str[50];
647 4 : const char *methodname[] = {
648 : "scaling & squaring, [m/m] Pade approximant (Higham)"
649 : };
650 4 : const int nmeth=PETSC_STATIC_ARRAY_LENGTH(methodname);
651 :
652 4 : PetscFunctionBegin;
653 4 : PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii));
654 4 : if (isascii) {
655 4 : if (fn->beta==(PetscScalar)1.0) {
656 1 : if (fn->alpha==(PetscScalar)1.0) PetscCall(PetscViewerASCIIPrintf(viewer," logarithm: log(x)\n"));
657 : else {
658 0 : PetscCall(SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE));
659 0 : PetscCall(PetscViewerASCIIPrintf(viewer," logarithm: log(%s*x)\n",str));
660 : }
661 : } else {
662 3 : PetscCall(SlepcSNPrintfScalar(str,sizeof(str),fn->beta,PETSC_TRUE));
663 3 : if (fn->alpha==(PetscScalar)1.0) PetscCall(PetscViewerASCIIPrintf(viewer," logarithm: %s*log(x)\n",str));
664 : else {
665 3 : PetscCall(PetscViewerASCIIPrintf(viewer," logarithm: %s",str));
666 3 : PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE));
667 3 : PetscCall(SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE));
668 3 : PetscCall(PetscViewerASCIIPrintf(viewer,"*log(%s*x)\n",str));
669 3 : PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE));
670 : }
671 : }
672 4 : if (fn->method<nmeth) PetscCall(PetscViewerASCIIPrintf(viewer," computing matrix functions with: %s\n",methodname[fn->method]));
673 : }
674 4 : PetscFunctionReturn(PETSC_SUCCESS);
675 : }
676 :
677 3 : SLEPC_EXTERN PetscErrorCode FNCreate_Log(FN fn)
678 : {
679 3 : PetscFunctionBegin;
680 3 : fn->ops->evaluatefunction = FNEvaluateFunction_Log;
681 3 : fn->ops->evaluatederivative = FNEvaluateDerivative_Log;
682 3 : fn->ops->evaluatefunctionmat[0] = FNEvaluateFunctionMat_Log_Higham;
683 3 : fn->ops->evaluatefunctionmatvec[0] = FNEvaluateFunctionMatVec_Log_Higham;
684 3 : fn->ops->view = FNView_Log;
685 3 : PetscFunctionReturn(PETSC_SUCCESS);
686 : }
|