Actual source code: dsghiep_dqds.c
1: /*
2: DQDS-type dense solver for generalized symmetric-indefinite eigenproblem.
3: Based on Matlab code from Carla Ferreira.
5: References:
7: [1] C. Ferreira, B. Parlett, "Real DQDS for the nonsymmetric tridiagonal
8: eigenvalue problem", preprint, 2012.
10: [2] C. Ferreira. The unsymmetric tridiagonal eigenvalue problem. Ph.D
11: Thesis, University of Minho, 2007.
13: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
14: SLEPc - Scalable Library for Eigenvalue Problem Computations
15: Copyright (c) 2002-2012, Universitat Politecnica de Valencia, Spain
17: This file is part of SLEPc.
18:
19: SLEPc is free software: you can redistribute it and/or modify it under the
20: terms of version 3 of the GNU Lesser General Public License as published by
21: the Free Software Foundation.
23: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
24: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
25: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
26: more details.
28: You should have received a copy of the GNU Lesser General Public License
29: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
30: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
31: */
32: #include <slepc-private/dsimpl.h> /*I "slepcds.h" I*/
33: #include <slepcblaslapack.h>
35: extern PetscErrorCode DSGHIEPComplexEigs(DS,PetscInt,PetscInt,PetscScalar*,PetscScalar*);
36: extern PetscErrorCode DSGHIEPPseudoOrthogInverseIteration(DS,PetscScalar*,PetscScalar*);
37: extern PetscErrorCode DSIntermediate_GHIEP(DS);
41: /*
42: INPUT:
43: a ---- diagonal of J
44: b ---- subdiagonal of J;
45: the superdiagonal of J is all 1's
47: OUTPUT:
48: For an eigenvalue lambda of J we have:
49: gl=<real(lambda)<=gr
50: -sigma=<imag(lambda)<=sigma
51: */
52: static PetscErrorCode ScanJ(PetscInt n,PetscReal *a,PetscReal *b,PetscReal *gl,PetscReal *gr,PetscReal *sigma)
53: {
54: PetscInt i;
55: PetscReal b0,b1,rad;
58: /* For original matrix C, C_bal=T+S; T-symmetric and S=skew-symmetric
59: C_bal is the balanced form of C */
60: /* Bounds on the imaginary part of C (Gersgorin bound for S)*/
61: *sigma = 0.0;
62: b0 = 0.0;
63: for (i=0;i<n-1;i++) {
64: if (b[i]<0.0) b1 = PetscSqrtReal(-b[i]);
65: else b1 = 0.0;
66: *sigma = PetscMax(*sigma,b1+b0);
67: b0 = b1;
68: }
69: *sigma = PetscMax(*sigma,b0);
70: /* Gersgorin bounds for T (=Gersgorin bounds on the real part for C) */
71: rad = (b[0]>0.0)?PetscSqrtReal(b[0]):0.0; /* rad = b1+b0, b0 = 0 */
72: *gr = a[0]+rad;
73: *gl = a[0]-rad;
74: b0 = rad;
75: for (i=1;i<n-1;i++) {
76: b1 = (b[i]>0.0)?PetscSqrtReal(b[i]):0.0;
77: rad = b0+b1;
78: *gr = PetscMax(*gr,a[i]+rad);
79: *gl = PetscMin(*gl,a[i]-rad);
80: b0 = b1;
81: }
82: rad = b0;
83: *gr = PetscMax(*gr,a[n-1]+rad);
84: *gl = PetscMin(*gl,a[n-1]-rad);
86: return(0);
87: }
91: /*
92: INPUT:
93: a - vector with the diagonal elements
94: b - vector with the subdiagonal elements
95: gl - Gersgorin left bound (real axis)
96: gr - Gersgorin right bound (real axis)
97: OUTPUT:
98: eigvalue - multiple eigenvalue (if there is an eigenvalue)
99: m - its multiplicity (m=0 if there isn't a multiple eigenvalue)
100: X - matrix of generalized eigenvectors
101: shift
102: */
103: static PetscErrorCode Prologue(PetscInt n,PetscReal *a,PetscReal *b,PetscReal gl,PetscReal gr,PetscInt *m,PetscReal *shift,PetscReal *w,PetscReal nw)
104: {
107: PetscReal mu,tol,*a1,*work,*y,*yp,*x,*xp;
108: PetscInt i,k,nwall=0;
111: *m = 0;
112: mu = 0.0;
113: for (i=0;i<n;i++) mu += a[i];
114: mu /= n;
115: tol=n*PETSC_MACHINE_EPSILON*(gr-gl);
116: nwall = 5*n+4;
117: if (w && nw>=nwall) {
118: work = w;
119: nwall = nw;
120: }else {
121: PetscMalloc(nwall*sizeof(PetscReal),&work);
122: PetscMemzero(work,nwall*sizeof(PetscReal));
123: }
124: a1 = work; /* size n */
125: y = work+n; /* size n+1 */
126: yp = y+n+1; /* size n+1. yp is the derivative of y (p for "prime") */
127: x = yp+n+1; /* size n+1 */
128: xp = x+n+1; /* size n+1 */
129: for (i=0;i<n;i++) a1[i] = mu-a[i];
130: x[0] = 1;
131: xp[0] = 0;
132: x[1] = a1[0];
133: xp[1] = 1;
134: for (i=1;i<n;i++) {
135: x[i+1]=a1[i]*x[i]-b[i-1]*x[i-1];
136: xp[i+1]=a1[i]*xp[i]+x[i]-b[i-1]*xp[i-1];
137: }
138: *shift = mu;
139: if ( PetscAbsReal(x[n])<tol) {
140: /* mu is an eigenvalue */
141: *m = *m+1;
142: if ( PetscAbsReal(xp[n])<tol ) {
143: /* mu is a multiple eigenvalue; Is it the one-point spectrum case? */
144: k = 0;
145: while (PetscAbsReal(xp[n])<tol && k<n-1) {
146: PetscMemcpy(x,y,(n+1)*sizeof(PetscReal));
147: PetscMemcpy(xp,yp,(n+1)*sizeof(PetscReal));
148: x[k] = 0.0;
149: k++;
150: x[k] = 1.0;
151: xp[k] = 0.0;
152: x[k+1] = a1[k] + y[k];
153: xp[k+1] = 1+yp[k];
154: for (i=k+1;i<n;i++) {
155: x[i+1] = a1[i]*x[i]-b[i-1]*x[i-1]+y[i];
156: xp[i+1]=a1[i]*xp[i]+x[i]-b[i-1]*xp[i-1]+yp[i];
157: }
158: *m = *m+1;
159: }
160: }
161: }
162: if (work != w) {
163: PetscFree(work);
164: }
165: /*
166: When mu is not an eigenvalue or it it an eigenvalue but it is not the one-point spectrum case, we will always have shift=mu
168: Need to check for overflow!
170: After calling Prologue, eigenComplexdqds and eigen3dqds will test if m==n in which case we have the one-point spectrum case;
171: If m!=0, the only output to be used is the shift returned.
172: */
173: return(0);
174: }
179: static PetscErrorCode LUfac(PetscInt n,PetscReal *a,PetscReal *b,PetscReal shift,PetscReal tol,PetscReal norm,PetscReal *L,PetscReal *U,PetscInt *fail,PetscReal *w,PetscInt nw) {
181: PetscInt nwall,i;
182: PetscReal *work,*a1;
185: nwall = n;
186: if (w && nw>=nwall) {
187: work = w;
188: nwall = nw;
189: } else {
190: PetscMalloc(nwall*sizeof(PetscReal),&work);
191: }
192: a1 = work;
193: for (i=0;i<n;i++) a1[i] = a[i]-shift;
194: *fail = 0;
195: for (i=0;i<n-1;i++) {
196: U[i] = a1[i];
197: L[i] = b[i]/U[i];
198: a1[i+1] = a1[i+1]-L[i];
199: }
200: U[n-1] = a1[n-1];
202: /* Check if there are NaN values */
203: for (i=0;i<n-1 && *fail==0;i++) {
204: if (PetscIsInfOrNanReal(L[i])) *fail=1;
205: if (PetscIsInfOrNanReal(U[i])) *fail=1;
206: }
207: if (*fail==0 && PetscIsInfOrNanReal(U[n-1])) *fail=1;
209: for (i=0;i<n-1 && *fail==0;i++) {
210: if ( PetscAbsReal(L[i])>tol*norm) *fail = 1; /* This demands IEEE arithmetic */
211: if ( PetscAbsReal(U[i])>tol*norm) *fail = 1;
212: }
213: if ( *fail==0 && PetscAbsReal(U[n-1])>tol*norm) *fail = 1;
214:
215: if (work != w) {
216: PetscFree(work);
217: }
218: return(0);
219: }
223: static PetscErrorCode realDQDS(PetscInt n,PetscReal *L,PetscReal *U,PetscReal shift,PetscReal tol,PetscReal norm,PetscReal *L1,PetscReal *U1,PetscInt *fail)
224: {
225: PetscReal d;
226: PetscInt i;
229: *fail = 0;
230: d = U[0]-shift;
231: for (i=0;i<n-1;i++) {
232: U1[i] = d+L[i];
233: L1[i] = L[i]*(U[i+1]/U1[i]);
234: d = d*(U[i+1]/U1[i])-shift;
235: }
236: U1[n-1]=d;
238: /* The following demands IEEE arithmetic */
239: for (i=0;i<n-1 && *fail==0;i++) {
240: if (PetscIsInfOrNanReal(L1[i])) *fail=1;
241: if (PetscIsInfOrNanReal(U1[i])) *fail=1;
242: }
243: if (*fail==0 && PetscIsInfOrNanReal(U1[n-1])) *fail=1;
244: for (i=0;i<n-1 && *fail==0;i++) {
245: if (PetscAbsReal(L1[i])>tol*norm) *fail=1;
246: if (PetscAbsReal(U1[i])>tol*norm) *fail=1;
247: }
248: if (*fail==0 && PetscAbsReal(U1[n-1])>tol*norm) *fail=1;
249: return(0);
250: }
254: static PetscErrorCode tridqdsZhuang3(PetscInt n,PetscReal *e,PetscReal *q,PetscReal sum,PetscReal prod,PetscReal tol,PetscReal norm,PetscReal tolDef,PetscInt *fail)
255: {
256: PetscReal xl,yl,xr,yr,zr,t;
257: PetscInt i;
260: *fail = 0;
261: xr = 1.0;
262: yr = e[0];
263: zr = 0.0;
264: /* Step 1 */
265: /* the efect of Z1 */
266: xr = xr*q[0]+yr;
267: /* the inverse of L1 */
268: xl = (q[0]+e[0])*(q[0]+e[0])+q[1]*e[0]-sum*(q[0]+e[0])+prod;
269: yl = -(q[2]*e[1]*q[1]*e[0])/xl;
270: xl = -(q[1]*e[0]*(q[0]+e[0]+q[1]+e[1]-sum))/xl;
271: /* the efect of L1 */
272: q[0] = xr-xl;
273: xr = yr-xl;
274: yr = zr-yl-xl*e[1];
275: /*the inverse of Y1 */
276: xr = xr/q[0];
277: yr = yr/q[0];
278: /*the effect of Y1 inverse */
279: e[0] = xl+yr+xr*q[1];
280: xl = yl+zr+yr*q[2]; /* zr=0 when n=3 */
281: /*the effect of Y1 */
282: xr = 1.0-xr;
283: yr = e[1]-yr;
285: /* STEP n-1 */
287: if (PetscAbsReal(e[n-3])>tolDef*PetscAbsReal(xl) || PetscAbsReal(e[n-3])>tolDef*PetscAbsReal(q[n-3])) {
288: /* the efect of Zn-1 */
289: xr = xr*q[n-2]+yr;
290: /* the inverse of Ln-1 */
291: xl = -xl/e[n-3];
292: /* the efect of Ln-1 */
293: q[n-2] = xr-xl;
294: xr = yr-xl;
295: /*the inverse of Yn-1 */
296: xr = xr/q[n-2];
297: /*the effect of the inverse of Yn-1 */
298: e[n-2] = xl+xr*q[n-1];
299: /*the effects of Yn-1 */
300: xr = 1.0-xr;
301: /* STEP n */
302: /*the effect of Zn */
303: xr = xr*q[n-1];
304: /*the inverse of Ln=I */
305: /*the effect of Ln */
306: q[n-1] = xr;
307: /* the inverse of Yn-1=I */
309: } else { /* Free deflation */
310: e[n-2] = (e[n-3]+(xr*q[n-2]+yr)+q[n-1])*0.5; /* Sum=trace/2 */
311: q[n-2] = (e[n-3]+q[n-2]*xr)*q[n-1]-xl; /* det */
312: t = ((e[n-3]+(xr*q[n-2]+yr)-q[n-1])*0.5);
313: q[n-1] = t*t+(xl+q[n-1]*yr);
314: *fail = 2;
315: }
317: /* The following demands IEEE arithmetic */
318: for (i=0;i<n-1 && *fail==0;i++) {
319: if (PetscIsInfOrNanReal(e[i])) *fail=1;
320: if (PetscIsInfOrNanReal(q[i])) *fail=1;
321: }
322: if (*fail==0 && PetscIsInfOrNanReal(q[n-1])) *fail=1;
323: for (i=0;i<n-1 && *fail==0;i++) {
324: if (PetscAbsReal(e[i])>tol*norm) *fail=1;
325: if (PetscAbsReal(q[i])>tol*norm) *fail=1;
326: }
327: if (*fail==0 && PetscAbsReal(q[n-1])>tol*norm) *fail=1;
328: return(0);
329: }
333: static PetscErrorCode tridqdsZhuang(PetscInt n,PetscReal *e,PetscReal *q,PetscReal sum,PetscReal prod,PetscReal tol,PetscReal norm,PetscReal tolDef,PetscReal *e1,PetscReal *q1,PetscInt *fail) {
335: PetscInt i;
336: PetscReal xl,yl,xr,yr,zr,t;
339: for (i=0;i<n-1;i++) {
340: e1[i] = e[i];
341: q1[i] = q[i];
342: }
343: q1[n-1] = q[n-1];
344: *fail = 0;
345: if (n>3) { /* For n>3 */
346: *fail = 0;
347: xr = 1;
348: yr = e1[0];
349: zr = 0;
350: /* step 1 */
351: /* the efect of Z1 */
352: xr = xr*q1[0]+yr;
353: /* the inverse of L1 */
354: xl = (q1[0]+e1[0])*(q1[0]+e1[0])+q1[1]*e1[0]-sum*(q1[0]+e1[0])+prod;
355: yl = -(q1[2]*e1[1]*q1[1]*e1[0])/xl;
356: xl = -(q1[1]*e1[0]*(q1[0]+e1[0]+q1[1]+e1[1]-sum))/xl;
357: /* the efect of L1 */
358: q1[0] = xr-xl;
359: xr = yr-xl;
360: yr = zr-yl-xl*e1[1];
361: zr = -yl*e1[2];
362: /* the inverse of Y1 */
363: xr = xr/q1[0];
364: yr = yr/q1[0];
365: zr = zr/q1[0];
366: /* the effect of Y1 inverse */
367: e1[0] = xl+yr+xr*q1[1];
368: xl = yl+zr+yr*q1[2];
369: yl = zr*q1[3];
370: /* the effect of Y1 */
371: xr = 1-xr;
372: yr = e1[1]-yr;
373: zr = -zr;
374: /* step i=2,...,n-3 */
375: for (i=1;i<n-3;i++) {
376: /* the efect of Zi */
377: xr = xr*q1[i]+yr;
378: /* the inverse of Li */
379: xl = -xl/e1[i-1];
380: yl = -yl/e1[i-1];
381: /* the efect of Li */
382: q1[i] = xr-xl;
383: xr = yr-xl;
384: yr = zr-yl-xl*e1[i+1];
385: zr = -yl*e1[i+2];
386: /* the inverse of Yi */
387: xr = xr/q1[i];
388: yr = yr/q1[i];
389: zr = zr/q1[i];
390: /* the effect of the inverse of Yi */
391: e1[i] = xl+yr+xr*q1[i+1];
392: xl = yl+zr+yr*q1[i+2];
393: yl = zr*q1[i+3];
394: /* the effects of Yi */
395: xr = 1.0-xr;
396: yr = e1[i+1]-yr;
397: zr = -zr;
398: }
400: /* STEP n-2 zr is no longer needed */
402: /* the efect of Zn-2 */
403: xr = xr*q1[n-3]+yr;
404: /* the inverse of Ln-2 */
405: xl = -xl/e1[n-4];
406: yl = -yl/e1[n-4];
407: /* the efect of Ln-2 */
408: q1[n-3] = xr-xl;
409: xr = yr-xl;
410: yr = zr-yl-xl*e1[n-2];
411: /* the inverse of Yn-2 */
412: xr = xr/q1[n-3];
413: yr = yr/q1[n-3];
414: /* the effect of the inverse of Yn-2 */
415: e1[n-3] = xl+yr+xr*q1[n-2];
416: xl = yl+yr*q1[n-1];
417: /* the effect of Yn-2 */
418: xr = 1.0-xr;
419: yr = e1[n-2]-yr;
420:
421: /* STEP n-1 yl and yr are no longer needed */
422: /* Testing for EARLY DEFLATION */
424: if (PetscAbsReal(e1[n-3])>tolDef*PetscAbsReal(xl) || PetscAbsReal(e1[n-3])>tolDef*PetscAbsReal(q1[n-3])) {
425: /* the efect of Zn-1 */
426: xr = xr*q1[n-2]+yr;
427: /* the inverse of Ln-1 */
428: xl = -xl/e1[n-3];
429: /* the efect of Ln-1 */
430: q1[n-2] = xr-xl;
431: xr = yr-xl;
432: /*the inverse of Yn-1 */
433: xr = xr/q1[n-2];
434: /*the effect of the inverse of Yn-1 */
435: e1[n-2] = xl+xr*q1[n-1];
436: /*the effects of Yn-1 */
437: xr = 1.0-xr;
438:
439: /* STEP n; xl no longer needed */
440: /* the effect of Zn */
441: xr = xr*q1[n-1];
442: /* the inverse of Ln = I */
443: /* the effect of Ln */
444: q1[n-1] = xr;
445: /* the inverse of Yn-1=I */
447: } else { /* FREE DEFLATION */
448: e1[n-2] = (e1[n-3]+xr*q1[n-2]+yr+q1[n-1])*0.5; /* sum=trace/2 */
449: q1[n-2] = (e1[n-3]+q1[n-2]*xr)*q1[n-1]-xl; /* det */
450: t = (e1[n-3]+xr*q1[n-2]+yr-q1[n-1])*0.5;
451: q1[n-1] = t*t+xl+q1[n-1]*yr;
452: *fail = 2;
453: }
455: for (i=0;i<n-1 && *fail==0;i++) {
456: if (PetscIsInfOrNanReal(e1[i])) *fail=1;
457: if (PetscIsInfOrNanReal(q1[i])) *fail=1;
458: }
459: if (*fail==0 && PetscIsInfOrNanReal(q1[n-1])) *fail=1;
460: for (i=0;i<n-1 && *fail==0;i++) {
461: if (PetscAbsReal(e1[i])>tol*norm) *fail = 1; /* This demands IEEE arithmetic */
462: if (PetscAbsReal(q1[i])>tol*norm) *fail = 1;
463: }
464: if ( *fail==0 && PetscAbsReal(q1[n-1])>tol*norm) *fail = 1;
465:
466: } else { /* The case n=3 */
467: tridqdsZhuang3(n,e1,q1,sum,prod,tol,norm,tolDef,fail);
468: }
469: return(0);
470: }
474: static PetscErrorCode DSGHIEP_Eigen3DQDS(PetscInt n,PetscReal *a,PetscReal *b,PetscReal *c, PetscScalar *wr, PetscScalar *wi,PetscReal *w,PetscInt nw)
475: {
476: PetscInt totalIt=0; /* Total Number of Iterations */
477: PetscInt totalFail=0; /* Total number of failures */
478: PetscInt nFail=0; /* Number of failures per transformation */
479: PetscReal tolZero=1.0/16; /* Tolerance for zero shifts */
480: PetscInt maxIt=10*n; /* Maximum number of iterations */
481: PetscInt maxFail=10*n; /* Maximum number of failures allowed per each transformation */
482: PetscReal tolDef=PETSC_MACHINE_EPSILON; /* Tolerance for deflation eps, 10*eps, 100*eps */
483: PetscReal tolGrowth=100000;
485: PetscInt i,k,nwu=0,nwall,begin,ind,flag,dim,m;
486: PetscReal norm,gr,gl,sigma,delta,meanEig,*work,*U,*L,*U1,*L1,*split;
487: PetscReal acShift,initialShift,shift=0.0,sum,det,disc,prod,x1,x2;
488: PetscInt realSteps,complexSteps,earlyDef,lastSplit,splitCount;
489: PetscBool test1,test2;
492: dim = n;
493: /* Test if the matrix is unreduced */
494: for (i=0;i<n-1;i++) {
495: if (PetscAbsReal(b[i])==0 || PetscAbsReal(c[i])==0) {
496: SETERRQ(PETSC_COMM_SELF,1,"Initial tridiagonal matrix is not unreduced");
497: }
498: }
499: nwall = 9*n+4;
500: if (w && nw>=nwall) {
501: work = w;
502: nwall = nw;
503: } else {
504: PetscMalloc(nwall*sizeof(PetscReal),&work);
505: }
506: U = work;
507: L = work+n;
508: U1 = work+2*n;
509: L1 = work+3*n;
510: nwu = 4*n;
511: PetscMemzero(wi,n*sizeof(PetscScalar));
512: /* Normalization - the J form of C */
513: for (i=0;i<n-1;i++) b[i] *= c[i]; /* subdiagonal of the J form */
515: /* Scan matrix J ---- Finding a box of inclusion for the eigenvalues */
516: norm = 0.0;
517: for (i=0;i<n-1;i++) {
518: norm = PetscMax(norm,PetscMax(PetscAbsReal(a[i]),PetscAbsReal(b[i])));
519: }
520: norm = PetscMax(norm,PetscMax(1,PetscAbsReal(a[n-1])));
521: ScanJ(n,a,b,&gl,&gr,&sigma);
522: delta = (gr-gl)/n; /* How much to add to the shift, in case of failure (element growth) */
523: meanEig = 0.0;
524: for (i=0;i<n;i++) meanEig += a[i];
525: meanEig /= n; /* shift = initial shift = mean of eigenvalues */
526: Prologue(n,a,b,gl,gr,&m,&shift,work+nwu,nwall-nwu);
527: if (m==n) { /* Multiple eigenvalue, we have the one-point spectrum case */
528: for (i=0;i<dim;i++) {
529: wr[i] = shift;
530: wi[i] = 0.0;
531: }
532: return(0);
533: }
534: /* Initial LU Factorization */
535: if ( delta==0 ) shift=0;/* The case when all eigenvalues are pure imaginary */
536: LUfac(n,a,b,shift,tolGrowth,norm,L,U,&flag,work+nwu,nwall-nwu); /* flag=1 failure; flag=0 successful transformation*/
537: while (flag==1 && nFail<maxFail) {
538: shift=shift+delta;
539: if (shift>gr || shift<gl) { /* Successive failures */
540: shift=meanEig;
541: delta=-delta;
542: }
543: nFail=nFail+1;
544: LUfac(n,a,b,shift,tolGrowth,norm,L,U,&flag,work+nwu,nwall-nwu); /* flag=1 failure; flag=0 successful transformation*/
545: }
546: if (nFail==maxFail) {
547: SETERRQ(PETSC_COMM_SELF,1,"Maximun number of failures reached in Initial LU factorization");
548: }
549: /* Successful Initial transformation */
550: totalFail = totalFail+nFail;
551: nFail = 0;
552: acShift = 0;
553: initialShift = shift;
554: shift = 0;
555: begin = 0;
556: lastSplit = 0;
557: split = work+nwu;
558: nwu += n;
559: split[lastSplit] = begin;
560: splitCount = 0;
561: while (begin!=-1) {
562: while (n-begin>2 && totalIt<maxIt) {
563: /* Check for deflation before performing a transformation */
564: test1 = ((PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-2])) && (PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-1]+acShift)) && (PetscAbsReal(L[n-2]*U[n])<tolDef*PetscAbsReal(acShift+U[n-1])) && (PetscAbsReal(L[n-2])*(PetscAbsReal(U[n-2])+1)<tolDef*PetscAbsReal(acShift+U[n-1])))? PETSC_TRUE: PETSC_FALSE;
565: if (flag==2) { /* Early 2x2 deflation */
566: earlyDef=earlyDef+1;
567: test2 = PETSC_TRUE;
568: } else {
569: if (n-begin>4) {
570: test2 = ((PetscAbsReal(L[n-3])<tolDef*PetscAbsReal(U[n-3])) && (PetscAbsReal(L[n-3]*(U[n-4]+L[n-4]))< tolDef*PetscAbsReal(U[n-4]*(U[n-3]+L[n-3])+L[n-4]*L[n-3])))? PETSC_TRUE: PETSC_FALSE;
571: } else { /* n-begin+1=3 */
572: test2 = (PetscAbsReal(L[begin])<tolDef*PetscAbsReal(U[begin]))? PETSC_TRUE: PETSC_FALSE;
573: }
574: }
575: while (test2 || test1) {
576: /* 2x2 deflation */
577: if (test2) {
578: if (flag==2) { /* Early deflation */
579: sum = L[n-2];
580: det = U[n-2];
581: disc = U[n-1];
582: flag = 0;
583: } else {
584: sum = (L[n-2]+(U[n-2]+U[n-1]))/2;
585: disc = (L[n-2]*(L[n-2]+2*(U[n-2]+U[n-1]))+(U[n-2]-U[n-1])*(U[n-2]-U[n-1]))/4;
586: det = U[n-2]*U[n-1];
587: }
588: if (disc<=0) {
589: #if !defined(PETSC_USE_COMPLEX)
590: wr[--n] = sum+acShift; wi[n] = PetscSqrtReal(-disc);
591: wr[--n] = sum+acShift; wi[n] = -PetscSqrtReal(-disc);
592: #else
593: wr[--n] = sum-PETSC_i*PetscSqrtReal(-disc)+acShift; wi[n] = 0.0;
594: wr[--n] = sum+PETSC_i*PetscSqrtReal(-disc)+acShift; wi[n] = 0.0;
595: #endif
596: } else {
597: if (sum==0) {
598: x1 = PetscSqrtReal(disc);
599: x2 = -x1;
600: } else {
601: x1 = ((sum>=0.0)?1.0:-1.0)*(PetscAbsReal(sum)+PetscSqrtReal(disc));
602: x2 = det/x1;
603: }
604: wr[--n] = x1+acShift;
605: wr[--n] = x2+acShift;
606: }
607: } else { /* test1 -- 1x1 deflation */
608: x1 = U[n-1]+acShift;
609: wr[--n] = x1;
610: }
611:
612: if (n<=begin+2) {
613: break;
614: } else {
615: test1 = ((PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-2])) && (PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-1]+acShift)) && (PetscAbsReal(L[n-2]*U[n-1])<tolDef*PetscAbsReal(acShift+U[n-1])) && (PetscAbsReal(L[n-2])*(PetscAbsReal(U[n-2])+1)< tolDef*PetscAbsReal(acShift+U[n-1])))? PETSC_TRUE: PETSC_FALSE;
616: if (n-begin>4) {
617: test2 = ((PetscAbsReal(L[n-3])<tolDef*PetscAbsReal(U[n-3])) && (PetscAbsReal(L[n-3]*(U[n-4]+L[n-4]))< tolDef*PetscAbsReal(U[n-4]*(U[n-3]+L[n-3])+L[n-4]*L[n-3])))? PETSC_TRUE: PETSC_FALSE;
618: } else { /* n-begin+1=3 */
619: test2 = (PetscAbsReal(L[begin])<tolDef*PetscAbsReal(U[begin]))? PETSC_TRUE: PETSC_FALSE;
620: }
621: }
622: } /* end "WHILE deflations" */
623: /* After deflation */
624: if (n>begin+3) {
625: ind = begin;
626: for (k=n-4;k>=begin+1;k--) {
627: if ( (PetscAbsReal(L[k])<tolDef*PetscAbsReal(U[k]))&&(PetscAbsReal(L[k]*U[k+1]*(U[k+2]+L[k+2])*(U[k-1]+L[k-1]))<tolDef*PetscAbsReal((U[k-1]*(U[k]+L[k])+L[k-1]*L[k])*(U[k+1]*(U[k+2]+L[k+2])+L[k+1]*L[k+2]))) ) {
628: ind=k;
629: break;
630: }
631: }
632: if ( ind>begin || PetscAbsReal(L[begin]) <tolDef*PetscAbsReal(U[begin]) ) {
633: lastSplit = lastSplit+1;
634: split[lastSplit] = begin;
635: L[ind] = acShift; /* Use of L[ind] to save acShift */
636: begin = ind+1;
637: splitCount = splitCount+1;
638: }
639: }
640:
641: if (n>begin+2) {
642: disc = (L[n-2]*(L[n-2]+2*(U[n-2]+U[n-1]))+(U[n-2]-U[n-1])*(U[n-2]-U[n-1]))/4;
643: if ( (PetscAbsReal(L[n-2])>tolZero) && (PetscAbsReal(L[n-3])>tolZero)) { /* L's are big */
644: shift = 0;
645: sum = 0; /* Needed in case of failure */
646: prod = 0;
647: realDQDS(n-begin,L+begin,U+begin,0,tolGrowth,norm,L1+begin,U1+begin,&flag);
648: realSteps++;
649: if (flag) { /* Failure */
650: tridqdsZhuang(n-begin,L+begin,U+begin,0.0,0.0,tolGrowth,norm,tolDef,L1+begin,U1+begin,&flag);
651: shift = 0.0;
652: while (flag==1 && nFail<maxFail) { /* Successive failures */
653: shift = shift+delta;
654: if (shift>gr-acShift || shift<gl-acShift) {
655: shift = meanEig-acShift;
656: delta = -delta;
657: }
658: nFail++;
659: realDQDS(n-begin,L+begin,U+begin,0,tolGrowth,norm,L1+begin,U1+begin,&flag);
660: }
661: }
662: } else { /* L's are small */
663: if (disc<0) { /* disc <0 Complex case; Francis shift; 3dqds */
664: sum = U[n-2]+L[n-2]+U[n-1];
665: prod = U[n-2]*U[n-1];
666: tridqdsZhuang(n-begin,L+begin,U+begin,sum,prod,tolGrowth,norm,tolDef,L1+begin,U1+begin,&flag);
667: complexSteps++;
668: shift = 0.0; /* Restoring transformation */
669: while (flag==1 && nFail<maxFail) { /* In case of failure */
670: shift = shift+U[n-1]; /* first time shift=0 */
671: realDQDS(n-begin,L+begin,U+begin,shift,tolGrowth,norm,L1+begin,U1+begin,&flag);
672: nFail++;
673: }
674: } else { /* disc >0 Real case; real Wilkinson shift; dqds */
675: sum = (L[n-2]+U[n-2]+U[n-1])/2;
676: if (sum==0) {
677: x1 = PetscSqrtReal(disc);
678: x2 = -x1;
679: } else {
680: x1 = ((sum>=0)?1.0:-1.0)*(PetscAbsReal(sum)+PetscSqrtReal(disc));
681: x2 = U[n-2]*U[n-1]/x1;
682: }
683: /* Take the eigenvalue closest to UL(n,n) */
684: if (PetscAbsReal(x1-U[n-1])<PetscAbsReal(x2-U[n-1])) {
685: shift = x1;
686: } else {
687: shift = x2;
688: }
689: realDQDS(n-begin,L+begin,U+begin,shift,tolGrowth,norm,L1+begin,U1+begin,&flag);
690: realSteps++;
691: /* In case of failure */
692: while (flag==1 && nFail<maxFail) {
693: sum = 2*shift;
694: prod = shift*shift;
695: tridqdsZhuang(n-1-begin,L+begin,U+begin,sum,prod,tolGrowth,norm,tolDef,L1+begin,U1+begin,&flag);
696: /* In case of successive failures */
697: if (shift==0) {
698: shift = PetscMin(PetscAbsReal(L[n-2]),PetscAbsReal(L[n-3]))*delta;
699: } else {
700: shift=shift+delta;
701: }
702: if (shift>gr-acShift || shift<gl-acShift) {
703: shift = meanEig-acShift;
704: delta = -delta;
705: }
706: if (flag==0) { /* We changed from real dqds to 3dqds */
707: shift=0;
708: }
709: nFail++;
710: }
711: }
712: } /* end "if tolZero" */
713: if (nFail==maxFail) {
714: SETERRQ(PETSC_COMM_SELF,1,"Maximun number of failures reached. No convergence in DQDS");
715: }
716: /* Successful Transformation; flag==0 */
717: totalIt++;
718: acShift = shift+acShift;
719: for (i=begin;i<n-1;i++) {
720: L[i] = L1[i];
721: U[i] = U1[i];
722: }
723: U[n-1] = U1[n-1];
724: totalFail = totalFail+nFail;
725: nFail = 0;
726: } /* end "if n>begin+1" */
727: } /* end WHILE 1 */
728: if (totalIt>=maxIt) {
729: SETERRQ(PETSC_COMM_SELF,1,"Maximun number of iterations reached. No convergence in DQDS");
730: }
731: /* END: n=2 or n=1 % n=begin+1 or n=begin */
732: if (n==begin+2) {
733: sum = (L[n-2]+U[n-2]+U[n-1])/2;
734: disc = (L[n-2]*(L[n-2]+2*(U[n-2]+U[n-1]))+(U[n-2]-U[n-1])*(U[n-2]-U[n-1]))/4;
735: if (disc<=0) { /* Complex case */
736: /* Deflation 2 */
737: #if !defined(PETSC_USE_COMPLEX)
738: wr[--n] = sum+acShift; wi[n] = PetscSqrtReal(-disc);
739: wr[--n] = sum+acShift; wi[n] = -PetscSqrtReal(-disc);
740: #else
741: wr[--n] = sum-PETSC_i*PetscSqrtReal(-disc)+acShift; wi[n] = 0.0;
742: wr[--n] = sum+PETSC_i*PetscSqrtReal(-disc)+acShift; wi[n] = 0.0;
743: #endif
744: }else { /* Real case */
745: if (sum==0) {
746: x1 = PetscSqrtReal(disc);
747: x2 = -x1;
748: } else {
749: x1 = ((sum>=0)?1.0:-1.0)*(PetscAbsReal(sum)+PetscSqrtReal(disc));
750: x2 = U[n-2]*U[n-1]/x1;
751: }
752: /* Deflation 2 */
753: wr[--n] = x2+acShift;
754: wr[--n] = x1+acShift;
755: }
756: }else { /* n=1 n=begin */
757: /* deflation 1 */
758: x1 = U[n-1]+acShift;
759: wr[--n] = x1;
760: }
761: switch (n) {
762: case 0:
763: begin = -1;
764: break;
765: case 1:
766: acShift = L[begin-1];
767: begin = split[lastSplit];
768: lastSplit--;
769: break;
770: default : /* n>=2 */
771: acShift = L[begin-1];
772: begin = split[lastSplit];
773: lastSplit--;
774: }
775: }/* While begin~=-1 */
776: for (i=0;i<dim;i++) {
777: wr[i] = wr[i]+initialShift;
778: }
779: if (work!=w) {
780: PetscFree(work);
781: }
782: return(0);
783: }
786: PetscErrorCode DSSolve_GHIEP_DQDS_II(DS ds,PetscScalar *wr,PetscScalar *wi)
787: {
789: PetscInt i,off,ld,nwall,nwu;
790: PetscScalar *A,*B,*Q;
791: PetscReal *d,*e,*s,*a,*b,*c;
794: ld = ds->ld;
795: off = ds->l + ds->l*ld;
796: A = ds->mat[DS_MAT_A];
797: B = ds->mat[DS_MAT_B];
798: Q = ds->mat[DS_MAT_Q];
799: d = ds->rmat[DS_MAT_T];
800: e = ds->rmat[DS_MAT_T] + ld;
801: c = ds->rmat[DS_MAT_T] + 2*ld;
802: s = ds->rmat[DS_MAT_D];
803: /* Quick return if possible */
804: if (ds->n-ds->l == 1) {
805: *(Q+off) = 1;
806: if (ds->compact) {
807: wr[ds->l] = d[ds->l]/s[ds->l];
808: wi[ds->l] = 0.0;
809: } else {
810: d[ds->l] = PetscRealPart(A[off]);
811: s[ds->l] = PetscRealPart(B[off]);
812: wr[ds->l] = d[ds->l]/s[ds->l];
813: wi[ds->l] = 0.0;
814: }
815: return(0);
816: }
817: nwall = 12*ld+4;
818: DSAllocateWork_Private(ds,0,nwall,0);
819: /* Reduce to pseudotriadiagonal form */
820: DSIntermediate_GHIEP( ds);
821:
822: /* Compute Eigenvalues (DQDS)*/
823: /* Form pseudosymmetric tridiagonal */
824: a = ds->rwork;
825: b = a+ld;
826: c = b+ld;
827: nwu = 3*ld;
828: if (ds->compact) {
829: for (i=ds->l;i<ds->n-1;i++) {
830: a[i] = d[i]*s[i];
831: b[i] = e[i]*s[i+1];
832: c[i] = e[i]*s[i];
833: }
834: a[ds->n-1] = d[ds->n-1]*s[ds->n-1];
835: } else {
836: for (i=ds->l;i<ds->n-1;i++) {
837: a[i] = PetscRealPart(A[i+i*ld]*B[i+i*ld]);
838: b[i] = PetscRealPart(A[i+1+i*ld]*s[i+1]);
839: c[i] = PetscRealPart(A[i+(i+1)*ld]*s[i]);
840: }
841: a[ds->n-1] = PetscRealPart(A[ds->n-1+(ds->n-1)*ld]*B[ds->n-1+(ds->n-1)*ld]);
842: }
843: DSGHIEP_Eigen3DQDS(ds->n-ds->l,a+ds->l,b+ds->l,c+ds->l,wr+ds->l,wi+ds->l,ds->rwork+nwu,nwall-nwu);
845: /* Compute Eigenvectors with Inverse Iteration */
846: DSGHIEPPseudoOrthogInverseIteration(ds,wr,wi);
847:
848: /* Recover eigenvalues from diagonal */
849: DSGHIEPComplexEigs(ds, 0, ds->l, wr, wi);
850: return(0);
851: }