Actual source code: dsghiep_dqds.c
1: /*
2: DQDS-type dense solver for generalized symmetric-indefinite eigenproblem.
3: Based on Matlab code from Carla Ferreira.
5: References:
7: [1] C. Ferreira, B. Parlett, "Real DQDS for the nonsymmetric tridiagonal
8: eigenvalue problem", preprint, 2012.
10: [2] C. Ferreira. The unsymmetric tridiagonal eigenvalue problem. Ph.D
11: Thesis, University of Minho, 2007.
13: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
14: SLEPc - Scalable Library for Eigenvalue Problem Computations
15: Copyright (c) 2002-2012, Universitat Politecnica de Valencia, Spain
17: This file is part of SLEPc.
18:
19: SLEPc is free software: you can redistribute it and/or modify it under the
20: terms of version 3 of the GNU Lesser General Public License as published by
21: the Free Software Foundation.
23: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
24: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
25: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
26: more details.
28: You should have received a copy of the GNU Lesser General Public License
29: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
30: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
31: */
32: #include <slepc-private/dsimpl.h> /*I "slepcds.h" I*/
33: #include <slepcblaslapack.h>
35: extern PetscErrorCode DSGHIEPComplexEigs(DS,PetscInt,PetscInt,PetscScalar*,PetscScalar*);
36: extern PetscErrorCode DSGHIEPPseudoOrthogInverseIteration(DS,PetscScalar*,PetscScalar*);
37: extern PetscErrorCode DSIntermediate_GHIEP(DS);
41: /*
42: INPUT:
43: a ---- diagonal of J
44: b ---- subdiagonal of J;
45: the superdiagonal of J is all 1's
47: OUTPUT:
48: For an eigenvalue lambda of J we have:
49: gl=<real(lambda)<=gr
50: -sigma=<imag(lambda)<=sigma
51: */
52: static PetscErrorCode ScanJ(PetscInt n,PetscReal *a,PetscReal *b,PetscReal *gl,PetscReal *gr,PetscReal *sigma)
53: {
54: PetscInt i;
55: PetscReal b0,b1,rad;
58: /* For original matrix C, C_bal=T+S; T-symmetric and S=skew-symmetric
59: C_bal is the balanced form of C */
60: /* Bounds on the imaginary part of C (Gersgorin bound for S)*/
61: *sigma = 0.0;
62: b0 = 0.0;
63: for (i=0;i<n-1;i++) {
64: if (b[i]<0.0) b1 = PetscSqrtReal(-b[i]);
65: else b1 = 0.0;
66: *sigma = PetscMax(*sigma,b1+b0);
67: b0 = b1;
68: }
69: *sigma = PetscMax(*sigma,b0);
70: /* Gersgorin bounds for T (=Gersgorin bounds on the real part for C) */
71: rad = (b[0]>0.0)?PetscSqrtReal(b[0]):0.0; /* rad = b1+b0, b0 = 0 */
72: *gr = a[0]+rad;
73: *gl = a[0]-rad;
74: b0 = rad;
75: for (i=1;i<n-1;i++) {
76: b1 = (b[i]>0.0)?PetscSqrtReal(b[i]):0.0;
77: rad = b0+b1;
78: *gr = PetscMax(*gr,a[i]+rad);
79: *gl = PetscMin(*gl,a[i]-rad);
80: b0 = b1;
81: }
82: rad = b0;
83: *gr = PetscMax(*gr,a[n-1]+rad);
84: *gl = PetscMin(*gl,a[n-1]-rad);
86: return(0);
87: }
91: /*
92: INPUT:
93: a - vector with the diagonal elements
94: b - vector with the subdiagonal elements
95: gl - Gersgorin left bound (real axis)
96: gr - Gersgorin right bound (real axis)
97: OUTPUT:
98: eigvalue - multiple eigenvalue (if there is an eigenvalue)
99: m - its multiplicity (m=0 if there isn't a multiple eigenvalue)
100: X - matrix of generalized eigenvectors
101: shift
102: */
103: static PetscErrorCode Prologue(PetscInt n,PetscReal *a,PetscReal *b,PetscReal gl,PetscReal gr,PetscInt *m,PetscReal *shift,PetscReal *w,PetscReal nw)
104: {
107: PetscReal mu,tol,*a1,*work,*y,*yp,*x,*xp;
108: PetscInt i,k,nwall=0;
111: *m = 0;
112: mu = 0.0;
113: for (i=0;i<n;i++) mu += a[i];
114: mu /= n;
115: tol=n*PETSC_MACHINE_EPSILON*(gr-gl);
116: nwall = 5*n+4;
117: if (w && nw>=nwall) {
118: work = w;
119: nwall = nw;
120: }else {
121: PetscMalloc(nwall*sizeof(PetscReal),&work);
122: PetscMemzero(work,nwall*sizeof(PetscReal));
123: }
124: a1 = work; /* size n */
125: y = work+n; /* size n+1 */
126: yp = y+n+1; /* size n+1. yp is the derivative of y (p for "prime") */
127: x = yp+n+1; /* size n+1 */
128: xp = x+n+1; /* size n+1 */
129: for (i=0;i<n;i++) a1[i] = mu-a[i];
130: x[0] = 1;
131: xp[0] = 0;
132: x[1] = a1[0];
133: xp[1] = 1;
134: for (i=1;i<n;i++) {
135: x[i+1]=a1[i]*x[i]-b[i-1]*x[i-1];
136: xp[i+1]=a1[i]*xp[i]+x[i]-b[i-1]*xp[i-1];
137: }
138: *shift = mu;
139: if ( PetscAbsReal(x[n])<tol) {
140: /* mu is an eigenvalue */
141: *m = *m+1;
142: if ( PetscAbsReal(xp[n])<tol ) {
143: /* mu is a multiple eigenvalue; Is it the one-point spectrum case? */
144: k = 0;
145: while (PetscAbsReal(xp[n])<tol && k<n-1) {
146: PetscMemcpy(x,y,(n+1)*sizeof(PetscReal));
147: PetscMemcpy(xp,yp,(n+1)*sizeof(PetscReal));
148: x[k] = 0.0;
149: k++;
150: x[k] = 1.0;
151: xp[k] = 0.0;
152: x[k+1] = a1[k] + y[k];
153: xp[k+1] = 1+yp[k];
154: for (i=k+1;i<n;i++) {
155: x[i+1] = a1[i]*x[i]-b[i-1]*x[i-1]+y[i];
156: xp[i+1]=a1[i]*xp[i]+x[i]-b[i-1]*xp[i-1]+yp[i];
157: }
158: *m = *m+1;
159: }
160: }
161: }
162: if (work != w) {
163: PetscFree(work);
164: }
165: /*
166: When mu is not an eigenvalue or it it an eigenvalue but it is not the one-point spectrum case, we will always have shift=mu
168: Need to check for overflow!
170: After calling Prologue, eigenComplexdqds and eigen3dqds will test if m==n in which case we have the one-point spectrum case;
171: If m!=0, the only output to be used is the shift returned.
172: */
173: return(0);
174: }
179: static PetscErrorCode LUfac(PetscInt n,PetscReal *a,PetscReal *b,PetscReal shift,PetscReal tol,PetscReal norm,PetscReal *L,PetscReal *U,PetscInt *fail,PetscReal *w,PetscInt nw) {
181: PetscInt nwall,i;
182: PetscReal *work,*a1;
185: nwall = n;
186: if (w && nw>=nwall) {
187: work = w;
188: nwall = nw;
189: } else {
190: PetscMalloc(nwall*sizeof(PetscReal),&work);
191: }
192: a1 = work;
193: for (i=0;i<n;i++) a1[i] = a[i]-shift;
194: *fail = 0;
195: for (i=0;i<n-1;i++) {
196: U[i] = a1[i];
197: L[i] = b[i]/U[i];
198: a1[i+1] = a1[i+1]-L[i];
199: }
200: U[n-1] = a1[n-1];
202: /* Check if there are NaN values */
203: for (i=0;i<n-1 && *fail==0;i++) {
204: if (PetscIsInfOrNanReal(L[i])) *fail=1;
205: if (PetscIsInfOrNanReal(U[i])) *fail=1;
206: }
207: if (*fail==0 && PetscIsInfOrNanReal(U[n-1])) *fail=1;
209: for (i=0;i<n-1 && *fail==0;i++) {
210: if ( PetscAbsReal(L[i])>tol*norm) *fail = 1; /* This demands IEEE arithmetic */
211: if ( PetscAbsReal(U[i])>tol*norm) *fail = 1;
212: }
213: if ( *fail==0 && PetscAbsReal(U[n-1])>tol*norm) *fail = 1;
214:
215: if (work != w) {
216: PetscFree(work);
217: }
218: return(0);
219: }
223: static PetscErrorCode realDQDS(PetscInt n,PetscReal *L,PetscReal *U,PetscReal shift,PetscReal tol,PetscReal norm,PetscReal *L1,PetscReal *U1,PetscInt *fail)
224: {
225: PetscReal d;
226: PetscInt i;
229: *fail = 0;
230: d = U[0]-shift;
231: for (i=0;i<n-1;i++) {
232: U1[i] = d+L[i];
233: L1[i] = L[i]*(U[i+1]/U1[i]);
234: d = d*(U[i+1]/U1[i])-shift;
235: }
236: U1[n-1]=d;
238: /* The following demands IEEE arithmetic */
239: for (i=0;i<n-1 && *fail==0;i++) {
240: if (PetscIsInfOrNanReal(L1[i])) *fail=1;
241: if (PetscIsInfOrNanReal(U1[i])) *fail=1;
242: }
243: if (*fail==0 && PetscIsInfOrNanReal(U1[n-1])) *fail=1;
244: for (i=0;i<n-1 && *fail==0;i++) {
245: if (PetscAbsReal(L1[i])>tol*norm) *fail=1;
246: if (PetscAbsReal(U1[i])>tol*norm) *fail=1;
247: }
248: if (*fail==0 && PetscAbsReal(U1[n-1])>tol*norm) *fail=1;
249: return(0);
250: }
254: static PetscErrorCode tridqdsZhuang3(PetscInt n,PetscReal *e,PetscReal *q,PetscReal sum,PetscReal prod,PetscReal tol,PetscReal norm,PetscReal tolDef,PetscInt *fail)
255: {
256: PetscReal xl,yl,xr,yr,zr,t;
257: PetscInt i;
260: *fail = 0;
261: xr = 1.0;
262: yr = e[0];
263: zr = 0.0;
264: /* Step 1 */
265: /* the efect of Z1 */
266: xr = xr*q[0]+yr;
267: /* the inverse of L1 */
268: xl = (q[0]+e[0])*(q[0]+e[0])+q[1]*e[0]-sum*(q[0]+e[0])+prod;
269: yl = -(q[2]*e[1]*q[1]*e[0])/xl;
270: xl = -(q[1]*e[0]*(q[0]+e[0]+q[1]+e[1]-sum))/xl;
271: /* the efect of L1 */
272: q[0] = xr-xl;
273: xr = yr-xl;
274: yr = zr-yl-xl*e[1];
275: /*the inverse of Y1 */
276: xr = xr/q[0];
277: yr = yr/q[0];
278: /*the effect of Y1 inverse */
279: e[0] = xl+yr+xr*q[1];
280: xl = yl+zr+yr*q[2]; /* zr=0 when n=3 */
281: /*the effect of Y1 */
282: xr = 1.0-xr;
283: yr = e[1]-yr;
285: /* STEP n-1 */
287: if (PetscAbsReal(e[n-3])>tolDef*PetscAbsReal(xl) || PetscAbsReal(e[n-3])>tolDef*PetscAbsReal(q[n-3])) {
288: /* the efect of Zn-1 */
289: xr = xr*q[n-2]+yr;
290: /* the inverse of Ln-1 */
291: xl = -xl/e[n-3];
292: /* the efect of Ln-1 */
293: q[n-2] = xr-xl;
294: xr = yr-xl;
295: /*the inverse of Yn-1 */
296: xr = xr/q[n-2];
297: /*the effect of the inverse of Yn-1 */
298: e[n-2] = xl+xr*q[n-1];
299: /*the effects of Yn-1 */
300: xr = 1.0-xr;
301: /* STEP n */
302: /*the effect of Zn */
303: xr = xr*q[n-1];
304: /*the inverse of Ln=I */
305: /*the effect of Ln */
306: q[n-1] = xr;
307: /* the inverse of Yn-1=I */
309: } else { /* Free deflation */
310: e[n-2] = (e[n-3]+(xr*q[n-2]+yr)+q[n-1])*0.5; /* Sum=trace/2 */
311: q[n-2] = (e[n-3]+q[n-2]*xr)*q[n-1]-xl; /* det */
312: t = ((e[n-3]+(xr*q[n-2]+yr)-q[n-1])*0.5);
313: q[n-1] = t*t+(xl+q[n-1]*yr);
314: *fail = 2;
315: }
317: /* The following demands IEEE arithmetic */
318: for (i=0;i<n-1 && *fail==0;i++) {
319: if (PetscIsInfOrNanReal(e[i])) *fail=1;
320: if (PetscIsInfOrNanReal(q[i])) *fail=1;
321: }
322: if (*fail==0 && PetscIsInfOrNanReal(q[n-1])) *fail=1;
323: for (i=0;i<n-1 && *fail==0;i++) {
324: if (PetscAbsReal(e[i])>tol*norm) *fail=1;
325: if (PetscAbsReal(q[i])>tol*norm) *fail=1;
326: }
327: if (*fail==0 && PetscAbsReal(q[n-1])>tol*norm) *fail=1;
328: return(0);
329: }
333: static PetscErrorCode tridqdsZhuang(PetscInt n,PetscReal *e,PetscReal *q,PetscReal sum,PetscReal prod,PetscReal tol,PetscReal norm,PetscReal tolDef,PetscReal *e1,PetscReal *q1,PetscInt *fail) {
335: PetscInt i;
336: PetscReal xl,yl,xr,yr,zr,t;
339: for (i=0;i<n-1;i++) {
340: e1[i] = e[i];
341: q1[i] = q[i];
342: }
343: q1[n-1] = q[n-1];
344: *fail = 0;
345: if (n>3) { /* For n>3 */
346: *fail = 0;
347: xr = 1;
348: yr = e1[0];
349: zr = 0;
350: /* step 1 */
351: /* the efect of Z1 */
352: xr = xr*q1[0]+yr;
353: /* the inverse of L1 */
354: xl = (q1[0]+e1[0])*(q1[0]+e1[0])+q1[1]*e1[0]-sum*(q1[0]+e1[0])+prod;
355: yl = -(q1[2]*e1[1]*q1[1]*e1[0])/xl;
356: xl = -(q1[1]*e1[0]*(q1[0]+e1[0]+q1[1]+e1[1]-sum))/xl;
357: /* the efect of L1 */
358: q1[0] = xr-xl;
359: xr = yr-xl;
360: yr = zr-yl-xl*e1[1];
361: zr = -yl*e1[2];
362: /* the inverse of Y1 */
363: xr = xr/q1[0];
364: yr = yr/q1[0];
365: zr = zr/q1[0];
366: /* the effect of Y1 inverse */
367: e1[0] = xl+yr+xr*q1[1];
368: xl = yl+zr+yr*q1[2];
369: yl = zr*q1[3];
370: /* the effect of Y1 */
371: xr = 1-xr;
372: yr = e1[1]-yr;
373: zr = -zr;
374: /* step i=2,...,n-3 */
375: for (i=1;i<n-3;i++) {
376: /* the efect of Zi */
377: xr = xr*q1[i]+yr;
378: /* the inverse of Li */
379: xl = -xl/e1[i-1];
380: yl = -yl/e1[i-1];
381: /* the efect of Li */
382: q1[i] = xr-xl;
383: xr = yr-xl;
384: yr = zr-yl-xl*e1[i+1];
385: zr = -yl*e1[i+2];
386: /* the inverse of Yi */
387: xr = xr/q1[i];
388: yr = yr/q1[i];
389: zr = zr/q1[i];
390: /* the effect of the inverse of Yi */
391: e1[i] = xl+yr+xr*q1[i+1];
392: xl = yl+zr+yr*q1[i+2];
393: yl = zr*q1[i+3];
394: /* the effects of Yi */
395: xr = 1.0-xr;
396: yr = e1[i+1]-yr;
397: zr = -zr;
398: }
400: /* STEP n-2 zr is no longer needed */
402: /* the efect of Zn-2 */
403: xr = xr*q1[n-3]+yr;
404: /* the inverse of Ln-2 */
405: xl = -xl/e1[n-4];
406: yl = -yl/e1[n-4];
407: /* the efect of Ln-2 */
408: q1[n-3] = xr-xl;
409: xr = yr-xl;
410: yr = zr-yl-xl*e1[n-2];
411: /* the inverse of Yn-2 */
412: xr = xr/q1[n-3];
413: yr = yr/q1[n-3];
414: /* the effect of the inverse of Yn-2 */
415: e1[n-3] = xl+yr+xr*q1[n-2];
416: xl = yl+yr*q1[n-1];
417: /* the effect of Yn-2 */
418: xr = 1.0-xr;
419: yr = e1[n-2]-yr;
420:
421: /* STEP n-1 yl and yr are no longer needed */
422: /* Testing for EARLY DEFLATION */
424: if (PetscAbsReal(e1[n-3])>tolDef*PetscAbsReal(xl) || PetscAbsReal(e1[n-3])>tolDef*PetscAbsReal(q1[n-3])) {
425: /* the efect of Zn-1 */
426: xr = xr*q1[n-2]+yr;
427: /* the inverse of Ln-1 */
428: xl = -xl/e1[n-3];
429: /* the efect of Ln-1 */
430: q1[n-2] = xr-xl;
431: xr = yr-xl;
432: /*the inverse of Yn-1 */
433: xr = xr/q1[n-2];
434: /*the effect of the inverse of Yn-1 */
435: e1[n-2] = xl+xr*q1[n-1];
436: /*the effects of Yn-1 */
437: xr = 1.0-xr;
438:
439: /* STEP n; xl no longer needed */
440: /* the effect of Zn */
441: xr = xr*q1[n-1];
442: /* the inverse of Ln = I */
443: /* the effect of Ln */
444: q1[n-1] = xr;
445: /* the inverse of Yn-1=I */
447: } else { /* FREE DEFLATION */
448: e1[n-2] = (e1[n-3]+xr*q1[n-2]+yr+q1[n-1])*0.5; /* sum=trace/2 */
449: q1[n-2] = (e1[n-3]+q1[n-2]*xr)*q1[n-1]-xl; /* det */
450: t = (e1[n-3]+xr*q1[n-2]+yr-q1[n-1])*0.5;
451: q1[n-1] = t*t+xl+q1[n-1]*yr;
452: *fail = 2;
453: }
455: for (i=0;i<n-1 && *fail==0;i++) {
456: if (PetscIsInfOrNanReal(e1[i])) *fail=1;
457: if (PetscIsInfOrNanReal(q1[i])) *fail=1;
458: }
459: if (*fail==0 && PetscIsInfOrNanReal(q1[n-1])) *fail=1;
460: for (i=0;i<n-1 && *fail==0;i++) {
461: if (PetscAbsReal(e1[i])>tol*norm) *fail = 1; /* This demands IEEE arithmetic */
462: if (PetscAbsReal(q1[i])>tol*norm) *fail = 1;
463: }
464: if ( *fail==0 && PetscAbsReal(q1[n-1])>tol*norm) *fail = 1;
465:
466: } else { /* The case n=3 */
467: tridqdsZhuang3(n,e1,q1,sum,prod,tol,norm,tolDef,fail);
468: }
469: return(0);
470: }
474: static PetscErrorCode DSGHIEP_Eigen3DQDS(PetscInt n,PetscReal *a,PetscReal *b,PetscReal *c, PetscScalar *wr, PetscScalar *wi,PetscReal *w,PetscInt nw)
475: {
476: PetscInt totalIt=0; /* Total Number of Iterations */
477: PetscInt totalFail=0; /* Total number of failures */
478: PetscInt nFail=0; /* Number of failures per transformation */
479: PetscReal tolZero=1.0/16; /* Tolerance for zero shifts */
480: PetscInt maxIt=10*n; /* Maximum number of iterations */
481: PetscInt maxFail=10*n; /* Maximum number of failures allowed per each transformation */
482: PetscReal tolDef=PETSC_MACHINE_EPSILON; /* Tolerance for deflation eps, 10*eps, 100*eps */
483: PetscReal tolGrowth=100000;
485: PetscInt i,k,nwu=0,nwall,begin,ind,flag,dim,m;
486: PetscReal norm,gr,gl,sigma,delta,meanEig,*work,*U,*L,*U1,*L1,*split;
487: PetscReal acShift,initialShift,shift=0.0,sum,det,disc,prod,x1,x2;
488: PetscInt realSteps,complexSteps,earlyDef,lastSplit,splitCount;
489: PetscBool test1,test2;
492: dim = n;
493: /* Test if the matrix is unreduced */
494: for (i=0;i<n-1;i++) {
495: if (PetscAbsReal(b[i])==0 || PetscAbsReal(c[i])==0) {
496: SETERRQ(PETSC_COMM_SELF,1,"Initial tridiagonal matrix is not unreduced");
497: }
498: }
499: nwall = 9*n+4;
500: if (w && nw>=nwall) {
501: work = w;
502: nwall = nw;
503: } else {
504: PetscMalloc(nwall*sizeof(PetscReal),&work);
505: }
506: U = work;
507: L = work+n;
508: U1 = work+2*n;
509: L1 = work+3*n;
510: nwu = 4*n;
511: if (wi) {
512: PetscMemzero(wi,n*sizeof(PetscScalar));
513: }
514: /* Normalization - the J form of C */
515: for (i=0;i<n-1;i++) b[i] *= c[i]; /* subdiagonal of the J form */
517: /* Scan matrix J ---- Finding a box of inclusion for the eigenvalues */
518: norm = 0.0;
519: for (i=0;i<n-1;i++) {
520: norm = PetscMax(norm,PetscMax(PetscAbsReal(a[i]),PetscAbsReal(b[i])));
521: }
522: norm = PetscMax(norm,PetscMax(1,PetscAbsReal(a[n-1])));
523: ScanJ(n,a,b,&gl,&gr,&sigma);
524: delta = (gr-gl)/n; /* How much to add to the shift, in case of failure (element growth) */
525: meanEig = 0.0;
526: for (i=0;i<n;i++) meanEig += a[i];
527: meanEig /= n; /* shift = initial shift = mean of eigenvalues */
528: Prologue(n,a,b,gl,gr,&m,&shift,work+nwu,nwall-nwu);
529: if (m==n) { /* Multiple eigenvalue, we have the one-point spectrum case */
530: for (i=0;i<dim;i++) {
531: wr[i] = shift;
532: if (wi) wi[i] = 0.0;
533: }
534: return(0);
535: }
536: /* Initial LU Factorization */
537: if ( delta==0 ) shift=0;/* The case when all eigenvalues are pure imaginary */
538: LUfac(n,a,b,shift,tolGrowth,norm,L,U,&flag,work+nwu,nwall-nwu); /* flag=1 failure; flag=0 successful transformation*/
539: while (flag==1 && nFail<maxFail) {
540: shift=shift+delta;
541: if (shift>gr || shift<gl) { /* Successive failures */
542: shift=meanEig;
543: delta=-delta;
544: }
545: nFail=nFail+1;
546: LUfac(n,a,b,shift,tolGrowth,norm,L,U,&flag,work+nwu,nwall-nwu); /* flag=1 failure; flag=0 successful transformation*/
547: }
548: if (nFail==maxFail) {
549: SETERRQ(PETSC_COMM_SELF,1,"Maximun number of failures reached in Initial LU factorization");
550: }
551: /* Successful Initial transformation */
552: totalFail = totalFail+nFail;
553: nFail = 0;
554: acShift = 0;
555: initialShift = shift;
556: shift = 0;
557: begin = 0;
558: lastSplit = 0;
559: split = work+nwu;
560: nwu += n;
561: split[lastSplit] = begin;
562: splitCount = 0;
563: while (begin!=-1) {
564: while (n-begin>2 && totalIt<maxIt) {
565: /* Check for deflation before performing a transformation */
566: test1 = ((PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-2])) && (PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-1]+acShift)) && (PetscAbsReal(L[n-2]*U[n])<tolDef*PetscAbsReal(acShift+U[n-1])) && (PetscAbsReal(L[n-2])*(PetscAbsReal(U[n-2])+1)<tolDef*PetscAbsReal(acShift+U[n-1])))? PETSC_TRUE: PETSC_FALSE;
567: if (flag==2) { /* Early 2x2 deflation */
568: earlyDef=earlyDef+1;
569: test2 = PETSC_TRUE;
570: } else {
571: if (n-begin>4) {
572: test2 = ((PetscAbsReal(L[n-3])<tolDef*PetscAbsReal(U[n-3])) && (PetscAbsReal(L[n-3]*(U[n-4]+L[n-4]))< tolDef*PetscAbsReal(U[n-4]*(U[n-3]+L[n-3])+L[n-4]*L[n-3])))? PETSC_TRUE: PETSC_FALSE;
573: } else { /* n-begin+1=3 */
574: test2 = (PetscAbsReal(L[begin])<tolDef*PetscAbsReal(U[begin]))? PETSC_TRUE: PETSC_FALSE;
575: }
576: }
577: while (test2 || test1) {
578: /* 2x2 deflation */
579: if (test2) {
580: if (flag==2) { /* Early deflation */
581: sum = L[n-2];
582: det = U[n-2];
583: disc = U[n-1];
584: flag = 0;
585: } else {
586: sum = (L[n-2]+(U[n-2]+U[n-1]))/2;
587: disc = (L[n-2]*(L[n-2]+2*(U[n-2]+U[n-1]))+(U[n-2]-U[n-1])*(U[n-2]-U[n-1]))/4;
588: det = U[n-2]*U[n-1];
589: }
590: if (disc<=0) {
591: #if !defined(PETSC_USE_COMPLEX)
592: wr[--n] = sum+acShift; wi[n] = PetscSqrtReal(-disc);
593: wr[--n] = sum+acShift; wi[n] = -PetscSqrtReal(-disc);
594: #else
595: wr[--n] = sum-PETSC_i*PetscSqrtReal(-disc)+acShift; if (wi) wi[n] = 0.0;
596: wr[--n] = sum+PETSC_i*PetscSqrtReal(-disc)+acShift; if (wi) wi[n] = 0.0;
597: #endif
598: } else {
599: if (sum==0) {
600: x1 = PetscSqrtReal(disc);
601: x2 = -x1;
602: } else {
603: x1 = ((sum>=0.0)?1.0:-1.0)*(PetscAbsReal(sum)+PetscSqrtReal(disc));
604: x2 = det/x1;
605: }
606: wr[--n] = x1+acShift;
607: wr[--n] = x2+acShift;
608: }
609: } else { /* test1 -- 1x1 deflation */
610: x1 = U[n-1]+acShift;
611: wr[--n] = x1;
612: }
613:
614: if (n<=begin+2) {
615: break;
616: } else {
617: test1 = ((PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-2])) && (PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-1]+acShift)) && (PetscAbsReal(L[n-2]*U[n-1])<tolDef*PetscAbsReal(acShift+U[n-1])) && (PetscAbsReal(L[n-2])*(PetscAbsReal(U[n-2])+1)< tolDef*PetscAbsReal(acShift+U[n-1])))? PETSC_TRUE: PETSC_FALSE;
618: if (n-begin>4) {
619: test2 = ((PetscAbsReal(L[n-3])<tolDef*PetscAbsReal(U[n-3])) && (PetscAbsReal(L[n-3]*(U[n-4]+L[n-4]))< tolDef*PetscAbsReal(U[n-4]*(U[n-3]+L[n-3])+L[n-4]*L[n-3])))? PETSC_TRUE: PETSC_FALSE;
620: } else { /* n-begin+1=3 */
621: test2 = (PetscAbsReal(L[begin])<tolDef*PetscAbsReal(U[begin]))? PETSC_TRUE: PETSC_FALSE;
622: }
623: }
624: } /* end "WHILE deflations" */
625: /* After deflation */
626: if (n>begin+3) {
627: ind = begin;
628: for (k=n-4;k>=begin+1;k--) {
629: if ( (PetscAbsReal(L[k])<tolDef*PetscAbsReal(U[k]))&&(PetscAbsReal(L[k]*U[k+1]*(U[k+2]+L[k+2])*(U[k-1]+L[k-1]))<tolDef*PetscAbsReal((U[k-1]*(U[k]+L[k])+L[k-1]*L[k])*(U[k+1]*(U[k+2]+L[k+2])+L[k+1]*L[k+2]))) ) {
630: ind=k;
631: break;
632: }
633: }
634: if ( ind>begin || PetscAbsReal(L[begin]) <tolDef*PetscAbsReal(U[begin]) ) {
635: lastSplit = lastSplit+1;
636: split[lastSplit] = begin;
637: L[ind] = acShift; /* Use of L[ind] to save acShift */
638: begin = ind+1;
639: splitCount = splitCount+1;
640: }
641: }
642:
643: if (n>begin+2) {
644: disc = (L[n-2]*(L[n-2]+2*(U[n-2]+U[n-1]))+(U[n-2]-U[n-1])*(U[n-2]-U[n-1]))/4;
645: if ( (PetscAbsReal(L[n-2])>tolZero) && (PetscAbsReal(L[n-3])>tolZero)) { /* L's are big */
646: shift = 0;
647: sum = 0; /* Needed in case of failure */
648: prod = 0;
649: realDQDS(n-begin,L+begin,U+begin,0,tolGrowth,norm,L1+begin,U1+begin,&flag);
650: realSteps++;
651: if (flag) { /* Failure */
652: tridqdsZhuang(n-begin,L+begin,U+begin,0.0,0.0,tolGrowth,norm,tolDef,L1+begin,U1+begin,&flag);
653: shift = 0.0;
654: while (flag==1 && nFail<maxFail) { /* Successive failures */
655: shift = shift+delta;
656: if (shift>gr-acShift || shift<gl-acShift) {
657: shift = meanEig-acShift;
658: delta = -delta;
659: }
660: nFail++;
661: realDQDS(n-begin,L+begin,U+begin,0,tolGrowth,norm,L1+begin,U1+begin,&flag);
662: }
663: }
664: } else { /* L's are small */
665: if (disc<0) { /* disc <0 Complex case; Francis shift; 3dqds */
666: sum = U[n-2]+L[n-2]+U[n-1];
667: prod = U[n-2]*U[n-1];
668: tridqdsZhuang(n-begin,L+begin,U+begin,sum,prod,tolGrowth,norm,tolDef,L1+begin,U1+begin,&flag);
669: complexSteps++;
670: shift = 0.0; /* Restoring transformation */
671: while (flag==1 && nFail<maxFail) { /* In case of failure */
672: shift = shift+U[n-1]; /* first time shift=0 */
673: realDQDS(n-begin,L+begin,U+begin,shift,tolGrowth,norm,L1+begin,U1+begin,&flag);
674: nFail++;
675: }
676: } else { /* disc >0 Real case; real Wilkinson shift; dqds */
677: sum = (L[n-2]+U[n-2]+U[n-1])/2;
678: if (sum==0) {
679: x1 = PetscSqrtReal(disc);
680: x2 = -x1;
681: } else {
682: x1 = ((sum>=0)?1.0:-1.0)*(PetscAbsReal(sum)+PetscSqrtReal(disc));
683: x2 = U[n-2]*U[n-1]/x1;
684: }
685: /* Take the eigenvalue closest to UL(n,n) */
686: if (PetscAbsReal(x1-U[n-1])<PetscAbsReal(x2-U[n-1])) {
687: shift = x1;
688: } else {
689: shift = x2;
690: }
691: realDQDS(n-begin,L+begin,U+begin,shift,tolGrowth,norm,L1+begin,U1+begin,&flag);
692: realSteps++;
693: /* In case of failure */
694: while (flag==1 && nFail<maxFail) {
695: sum = 2*shift;
696: prod = shift*shift;
697: tridqdsZhuang(n-1-begin,L+begin,U+begin,sum,prod,tolGrowth,norm,tolDef,L1+begin,U1+begin,&flag);
698: /* In case of successive failures */
699: if (shift==0) {
700: shift = PetscMin(PetscAbsReal(L[n-2]),PetscAbsReal(L[n-3]))*delta;
701: } else {
702: shift=shift+delta;
703: }
704: if (shift>gr-acShift || shift<gl-acShift) {
705: shift = meanEig-acShift;
706: delta = -delta;
707: }
708: if (flag==0) { /* We changed from real dqds to 3dqds */
709: shift=0;
710: }
711: nFail++;
712: }
713: }
714: } /* end "if tolZero" */
715: if (nFail==maxFail) {
716: SETERRQ(PETSC_COMM_SELF,1,"Maximun number of failures reached. No convergence in DQDS");
717: }
718: /* Successful Transformation; flag==0 */
719: totalIt++;
720: acShift = shift+acShift;
721: for (i=begin;i<n-1;i++) {
722: L[i] = L1[i];
723: U[i] = U1[i];
724: }
725: U[n-1] = U1[n-1];
726: totalFail = totalFail+nFail;
727: nFail = 0;
728: } /* end "if n>begin+1" */
729: } /* end WHILE 1 */
730: if (totalIt>=maxIt) {
731: SETERRQ(PETSC_COMM_SELF,1,"Maximun number of iterations reached. No convergence in DQDS");
732: }
733: /* END: n=2 or n=1 % n=begin+1 or n=begin */
734: if (n==begin+2) {
735: sum = (L[n-2]+U[n-2]+U[n-1])/2;
736: disc = (L[n-2]*(L[n-2]+2*(U[n-2]+U[n-1]))+(U[n-2]-U[n-1])*(U[n-2]-U[n-1]))/4;
737: if (disc<=0) { /* Complex case */
738: /* Deflation 2 */
739: #if !defined(PETSC_USE_COMPLEX)
740: wr[--n] = sum+acShift; wi[n] = PetscSqrtReal(-disc);
741: wr[--n] = sum+acShift; wi[n] = -PetscSqrtReal(-disc);
742: #else
743: wr[--n] = sum-PETSC_i*PetscSqrtReal(-disc)+acShift; if (wi) wi[n] = 0.0;
744: wr[--n] = sum+PETSC_i*PetscSqrtReal(-disc)+acShift; if (wi) wi[n] = 0.0;
745: #endif
746: }else { /* Real case */
747: if (sum==0) {
748: x1 = PetscSqrtReal(disc);
749: x2 = -x1;
750: } else {
751: x1 = ((sum>=0)?1.0:-1.0)*(PetscAbsReal(sum)+PetscSqrtReal(disc));
752: x2 = U[n-2]*U[n-1]/x1;
753: }
754: /* Deflation 2 */
755: wr[--n] = x2+acShift;
756: wr[--n] = x1+acShift;
757: }
758: }else { /* n=1 n=begin */
759: /* deflation 1 */
760: x1 = U[n-1]+acShift;
761: wr[--n] = x1;
762: }
763: switch (n) {
764: case 0:
765: begin = -1;
766: break;
767: case 1:
768: acShift = L[begin-1];
769: begin = split[lastSplit];
770: lastSplit--;
771: break;
772: default : /* n>=2 */
773: acShift = L[begin-1];
774: begin = split[lastSplit];
775: lastSplit--;
776: }
777: }/* While begin~=-1 */
778: for (i=0;i<dim;i++) {
779: wr[i] = wr[i]+initialShift;
780: }
781: if (work!=w) {
782: PetscFree(work);
783: }
784: return(0);
785: }
788: PetscErrorCode DSSolve_GHIEP_DQDS_II(DS ds,PetscScalar *wr,PetscScalar *wi)
789: {
791: PetscInt i,off,ld,nwall,nwu;
792: PetscScalar *A,*B,*Q,*vi;
793: PetscReal *d,*e,*s,*a,*b,*c;
796: #if !defined(PETSC_USE_COMPLEX)
798: #endif
799: ld = ds->ld;
800: off = ds->l + ds->l*ld;
801: A = ds->mat[DS_MAT_A];
802: B = ds->mat[DS_MAT_B];
803: Q = ds->mat[DS_MAT_Q];
804: d = ds->rmat[DS_MAT_T];
805: e = ds->rmat[DS_MAT_T] + ld;
806: c = ds->rmat[DS_MAT_T] + 2*ld;
807: s = ds->rmat[DS_MAT_D];
808: /* Quick return if possible */
809: if (ds->n-ds->l == 1) {
810: *(Q+off) = 1;
811: if (!ds->compact) {
812: d[ds->l] = PetscRealPart(A[off]);
813: s[ds->l] = PetscRealPart(B[off]);
814: }
815: wr[ds->l] = d[ds->l]/s[ds->l];
816: if (wi) wi[ds->l] = 0.0;
817: return(0);
818: }
819: nwall = 12*ld+4;
820: DSAllocateWork_Private(ds,0,nwall,0);
821: /* Reduce to pseudotriadiagonal form */
822: DSIntermediate_GHIEP( ds);
823:
824: /* Compute Eigenvalues (DQDS)*/
825: /* Form pseudosymmetric tridiagonal */
826: a = ds->rwork;
827: b = a+ld;
828: c = b+ld;
829: nwu = 3*ld;
830: if (ds->compact) {
831: for (i=ds->l;i<ds->n-1;i++) {
832: a[i] = d[i]*s[i];
833: b[i] = e[i]*s[i+1];
834: c[i] = e[i]*s[i];
835: }
836: a[ds->n-1] = d[ds->n-1]*s[ds->n-1];
837: } else {
838: for (i=ds->l;i<ds->n-1;i++) {
839: a[i] = PetscRealPart(A[i+i*ld]*B[i+i*ld]);
840: b[i] = PetscRealPart(A[i+1+i*ld]*s[i+1]);
841: c[i] = PetscRealPart(A[i+(i+1)*ld]*s[i]);
842: }
843: a[ds->n-1] = PetscRealPart(A[ds->n-1+(ds->n-1)*ld]*B[ds->n-1+(ds->n-1)*ld]);
844: }
845: vi = (wi)?wi+ds->l:PETSC_NULL;
846: DSGHIEP_Eigen3DQDS(ds->n-ds->l,a+ds->l,b+ds->l,c+ds->l,wr+ds->l,vi,ds->rwork+nwu,nwall-nwu);
848: /* Compute Eigenvectors with Inverse Iteration */
849: DSGHIEPPseudoOrthogInverseIteration(ds,wr,wi);
850:
851: /* Recover eigenvalues from diagonal */
852: DSGHIEPComplexEigs(ds, 0, ds->l, wr, wi);
853: #if defined(PETSC_USE_COMPLEX)
854: if (wi) {
855: for (i=ds->l;i<ds->n;i++) wi[i] = 0.0;
856: }
857: #endif
858: return(0);
859: }