Actual source code: dsghiep_dqds.c

  1: /*
  2:    DQDS-type dense solver for generalized symmetric-indefinite eigenproblem.
  3:    Based on Matlab code from Carla Ferreira.

  5:    References:

  7:        [1] C. Ferreira, B. Parlett, "Real DQDS for the nonsymmetric tridiagonal
  8:            eigenvalue problem", preprint, 2012.

 10:        [2] C. Ferreira. The unsymmetric tridiagonal eigenvalue problem. Ph.D
 11:            Thesis, University of Minho, 2007.

 13:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 14:    SLEPc - Scalable Library for Eigenvalue Problem Computations
 15:    Copyright (c) 2002-2012, Universitat Politecnica de Valencia, Spain

 17:    This file is part of SLEPc.
 18:       
 19:    SLEPc is free software: you can redistribute it and/or modify it under  the
 20:    terms of version 3 of the GNU Lesser General Public License as published by
 21:    the Free Software Foundation.

 23:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY 
 24:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS 
 25:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for 
 26:    more details.

 28:    You  should have received a copy of the GNU Lesser General  Public  License
 29:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 30:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 31: */
 32: #include <slepc-private/dsimpl.h>      /*I "slepcds.h" I*/
 33: #include <slepcblaslapack.h>

 35: extern PetscErrorCode DSGHIEPComplexEigs(DS,PetscInt,PetscInt,PetscScalar*,PetscScalar*);
 36: extern PetscErrorCode DSGHIEPPseudoOrthogInverseIteration(DS,PetscScalar*,PetscScalar*);
 37: extern PetscErrorCode DSIntermediate_GHIEP(DS);

 41: /*
 42:   INPUT:
 43:     a ---- diagonal of J
 44:     b ---- subdiagonal of J;
 45:     the superdiagonal of J is all 1's

 47:   OUTPUT:
 48:     For an eigenvalue lambda of J we have:
 49:       gl=<real(lambda)<=gr
 50:       -sigma=<imag(lambda)<=sigma
 51: */
 52: static PetscErrorCode ScanJ(PetscInt n,PetscReal *a,PetscReal *b,PetscReal *gl,PetscReal *gr,PetscReal *sigma)
 53: {
 54:   PetscInt  i;
 55:   PetscReal b0,b1,rad;

 58:   /* For original matrix C, C_bal=T+S; T-symmetric and S=skew-symmetric
 59:    C_bal is the balanced form of C */
 60:   /* Bounds on the imaginary part of C (Gersgorin bound for S)*/
 61:   *sigma = 0.0;
 62:   b0 = 0.0;
 63:   for (i=0;i<n-1;i++) {
 64:     if (b[i]<0.0) b1 = PetscSqrtReal(-b[i]);
 65:     else b1 = 0.0;
 66:     *sigma = PetscMax(*sigma,b1+b0);
 67:     b0 = b1;
 68:   }
 69:   *sigma = PetscMax(*sigma,b0);
 70:   /* Gersgorin bounds for T (=Gersgorin bounds on the real part for C) */
 71:   rad = (b[0]>0.0)?PetscSqrtReal(b[0]):0.0; /* rad = b1+b0, b0 = 0 */
 72:   *gr = a[0]+rad;
 73:   *gl = a[0]-rad;
 74:   b0 = rad;
 75:   for (i=1;i<n-1;i++) {
 76:     b1 = (b[i]>0.0)?PetscSqrtReal(b[i]):0.0;
 77:     rad = b0+b1;
 78:     *gr = PetscMax(*gr,a[i]+rad);
 79:     *gl = PetscMin(*gl,a[i]-rad);
 80:     b0 = b1;
 81:   }
 82:   rad = b0;
 83:   *gr = PetscMax(*gr,a[n-1]+rad);
 84:   *gl = PetscMin(*gl,a[n-1]-rad);

 86:   return(0);
 87: }

 91: /* 
 92:   INPUT: 
 93:     a  - vector with the diagonal elements
 94:     b  - vector with the subdiagonal elements
 95:     gl - Gersgorin left bound (real axis)
 96:     gr - Gersgorin right bound (real axis)
 97:   OUTPUT:
 98:     eigvalue - multiple eigenvalue (if there is an eigenvalue)
 99:     m        - its multiplicity    (m=0 if there isn't a multiple eigenvalue)
100:     X        - matrix of generalized eigenvectors  
101:     shift    
102: */
103: static PetscErrorCode Prologue(PetscInt n,PetscReal *a,PetscReal *b,PetscReal gl,PetscReal gr,PetscInt *m,PetscReal *shift,PetscReal *w,PetscReal nw)
104: {

107:   PetscReal      mu,tol,*a1,*work,*y,*yp,*x,*xp;
108:   PetscInt       i,k,nwall=0;

111:   *m = 0;
112:   mu = 0.0;
113:   for (i=0;i<n;i++) mu += a[i];
114:   mu /= n;
115:   tol=n*PETSC_MACHINE_EPSILON*(gr-gl);
116:   nwall = 5*n+4;
117:   if (w && nw>=nwall) {
118:     work = w;
119:     nwall = nw;
120:   }else {
121:     PetscMalloc(nwall*sizeof(PetscReal),&work);
122:     PetscMemzero(work,nwall*sizeof(PetscReal));
123:   }
124:   a1 = work; /* size n */
125:   y = work+n; /* size n+1 */
126:   yp = y+n+1; /* size n+1. yp is the derivative of y (p for "prime") */
127:   x = yp+n+1; /* size n+1 */
128:   xp = x+n+1; /* size n+1 */
129:   for (i=0;i<n;i++) a1[i] = mu-a[i];
130:   x[0] = 1;
131:   xp[0] = 0;
132:   x[1] = a1[0];
133:   xp[1] = 1;
134:   for (i=1;i<n;i++) {
135:     x[i+1]=a1[i]*x[i]-b[i-1]*x[i-1];
136:     xp[i+1]=a1[i]*xp[i]+x[i]-b[i-1]*xp[i-1];
137:   }
138:   *shift = mu;
139:   if ( PetscAbsReal(x[n])<tol) {
140:     /* mu is an eigenvalue */
141:     *m = *m+1;
142:     if ( PetscAbsReal(xp[n])<tol ) {
143:       /* mu is a multiple eigenvalue; Is it the one-point spectrum case? */
144:       k = 0;
145:       while (PetscAbsReal(xp[n])<tol && k<n-1) {
146:            PetscMemcpy(x,y,(n+1)*sizeof(PetscReal));
147:            PetscMemcpy(xp,yp,(n+1)*sizeof(PetscReal));
148:            x[k] = 0.0;
149:            k++;
150:            x[k] = 1.0;
151:            xp[k] = 0.0;
152:            x[k+1] = a1[k] + y[k];
153:            xp[k+1] = 1+yp[k];
154:            for (i=k+1;i<n;i++) {
155:              x[i+1] = a1[i]*x[i]-b[i-1]*x[i-1]+y[i];
156:              xp[i+1]=a1[i]*xp[i]+x[i]-b[i-1]*xp[i-1]+yp[i];
157:            }
158:            *m = *m+1;
159:       }
160:     }
161:   }
162:   if (work != w) {
163:     PetscFree(work);
164:   }
165: /*
166:   When mu is not an eigenvalue or it it an eigenvalue but it is not the one-point spectrum case, we will always have shift=mu

168:   Need to check for overflow!

170:   After calling Prologue, eigenComplexdqds and eigen3dqds will test if m==n in which case we have the one-point spectrum case; 
171:   If m!=0, the only output to be used is the shift returned.
172: */
173:   return(0);
174: }


179: static PetscErrorCode LUfac(PetscInt n,PetscReal *a,PetscReal *b,PetscReal shift,PetscReal tol,PetscReal norm,PetscReal *L,PetscReal *U,PetscInt *fail,PetscReal *w,PetscInt nw) {
181:   PetscInt       nwall,i;
182:   PetscReal      *work,*a1;

185:   nwall = n;
186:   if (w && nw>=nwall) {
187:     work = w;
188:     nwall = nw;
189:   } else {
190:     PetscMalloc(nwall*sizeof(PetscReal),&work);
191:   }
192:   a1 = work;
193:   for (i=0;i<n;i++) a1[i] = a[i]-shift;
194:   *fail = 0;
195:   for (i=0;i<n-1;i++) {
196:     U[i] = a1[i];
197:     L[i] = b[i]/U[i];
198:     a1[i+1] = a1[i+1]-L[i];
199:   }
200:   U[n-1] = a1[n-1];

202:   /* Check if there are NaN values */
203:   for (i=0;i<n-1 && *fail==0;i++) {
204:     if (PetscIsInfOrNanReal(L[i])) *fail=1;
205:     if (PetscIsInfOrNanReal(U[i])) *fail=1;
206:   }
207:   if (*fail==0 && PetscIsInfOrNanReal(U[n-1])) *fail=1;

209:   for (i=0;i<n-1 && *fail==0;i++) {
210:     if ( PetscAbsReal(L[i])>tol*norm) *fail = 1;  /* This demands IEEE arithmetic */
211:     if ( PetscAbsReal(U[i])>tol*norm) *fail = 1;
212:   }
213:   if ( *fail==0 && PetscAbsReal(U[n-1])>tol*norm) *fail = 1;
214: 
215:   if (work != w) {
216:     PetscFree(work);
217:   }
218:   return(0);
219: }

223: static PetscErrorCode realDQDS(PetscInt n,PetscReal *L,PetscReal *U,PetscReal shift,PetscReal tol,PetscReal norm,PetscReal *L1,PetscReal *U1,PetscInt *fail)
224: {
225:   PetscReal d;
226:   PetscInt  i;

229:   *fail = 0;
230:   d = U[0]-shift;
231:   for (i=0;i<n-1;i++) {
232:     U1[i] = d+L[i];
233:     L1[i] = L[i]*(U[i+1]/U1[i]);
234:     d = d*(U[i+1]/U1[i])-shift;
235:   }
236:   U1[n-1]=d;

238:   /* The following demands IEEE arithmetic */
239:   for (i=0;i<n-1 && *fail==0;i++) {
240:     if (PetscIsInfOrNanReal(L1[i])) *fail=1;
241:     if (PetscIsInfOrNanReal(U1[i])) *fail=1;
242:   }
243:   if (*fail==0 && PetscIsInfOrNanReal(U1[n-1])) *fail=1;
244:   for (i=0;i<n-1 && *fail==0;i++) {
245:     if (PetscAbsReal(L1[i])>tol*norm) *fail=1;
246:     if (PetscAbsReal(U1[i])>tol*norm) *fail=1;
247:   }
248:   if (*fail==0 && PetscAbsReal(U1[n-1])>tol*norm) *fail=1;
249:   return(0);
250: }

254: static PetscErrorCode tridqdsZhuang3(PetscInt n,PetscReal *e,PetscReal *q,PetscReal sum,PetscReal prod,PetscReal tol,PetscReal norm,PetscReal tolDef,PetscInt *fail)
255: {
256:   PetscReal xl,yl,xr,yr,zr,t;
257:   PetscInt  i;

260:   *fail = 0;
261:   xr = 1.0;
262:   yr = e[0];
263:   zr = 0.0;
264:   /* Step 1 */
265:   /* the efect of Z1 */
266:   xr = xr*q[0]+yr;
267:   /* the inverse of L1 */
268:   xl = (q[0]+e[0])*(q[0]+e[0])+q[1]*e[0]-sum*(q[0]+e[0])+prod;
269:   yl = -(q[2]*e[1]*q[1]*e[0])/xl;
270:   xl = -(q[1]*e[0]*(q[0]+e[0]+q[1]+e[1]-sum))/xl;
271:   /* the efect of L1 */
272:   q[0] = xr-xl;
273:   xr = yr-xl;
274:   yr = zr-yl-xl*e[1];
275:   /*the inverse of Y1 */
276:   xr = xr/q[0];
277:   yr = yr/q[0];
278:   /*the effect of Y1 inverse */
279:   e[0] = xl+yr+xr*q[1];
280:   xl = yl+zr+yr*q[2];      /* zr=0  when n=3 */
281:   /*the effect of Y1 */
282:   xr = 1.0-xr;
283:   yr = e[1]-yr;

285:   /* STEP n-1 */

287:   if (PetscAbsReal(e[n-3])>tolDef*PetscAbsReal(xl) || PetscAbsReal(e[n-3])>tolDef*PetscAbsReal(q[n-3])) {
288:     /* the efect of Zn-1 */
289:     xr = xr*q[n-2]+yr;
290:     /* the inverse of Ln-1 */
291:     xl = -xl/e[n-3];
292:     /* the efect of Ln-1 */
293:     q[n-2] = xr-xl;
294:     xr = yr-xl;
295:     /*the inverse of Yn-1 */
296:     xr = xr/q[n-2];
297:     /*the effect of the inverse of Yn-1 */
298:     e[n-2] = xl+xr*q[n-1];
299:     /*the effects of Yn-1 */
300:     xr = 1.0-xr;
301:     /* STEP n */
302:     /*the effect of Zn */
303:     xr = xr*q[n-1];
304:     /*the inverse of Ln=I */
305:     /*the effect of Ln */
306:     q[n-1] = xr;
307:     /* the inverse of  Yn-1=I */

309:   } else { /* Free deflation */
310:     e[n-2] = (e[n-3]+(xr*q[n-2]+yr)+q[n-1])*0.5;       /* Sum=trace/2 */
311:     q[n-2] = (e[n-3]+q[n-2]*xr)*q[n-1]-xl;             /* det */
312:     t = ((e[n-3]+(xr*q[n-2]+yr)-q[n-1])*0.5);
313:     q[n-1] = t*t+(xl+q[n-1]*yr);
314:     *fail = 2;
315:   }

317:   /* The following demands IEEE arithmetic */
318:   for (i=0;i<n-1 && *fail==0;i++) {
319:     if (PetscIsInfOrNanReal(e[i])) *fail=1;
320:     if (PetscIsInfOrNanReal(q[i])) *fail=1;
321:   }
322:   if (*fail==0 && PetscIsInfOrNanReal(q[n-1])) *fail=1;
323:   for (i=0;i<n-1 && *fail==0;i++) {
324:     if (PetscAbsReal(e[i])>tol*norm) *fail=1;
325:     if (PetscAbsReal(q[i])>tol*norm) *fail=1;
326:   }
327:   if (*fail==0 && PetscAbsReal(q[n-1])>tol*norm) *fail=1;
328:   return(0);
329: }

333: static PetscErrorCode tridqdsZhuang(PetscInt n,PetscReal *e,PetscReal *q,PetscReal sum,PetscReal prod,PetscReal tol,PetscReal norm,PetscReal tolDef,PetscReal *e1,PetscReal *q1,PetscInt *fail) {
335:   PetscInt       i;
336:   PetscReal      xl,yl,xr,yr,zr,t;

339:   for (i=0;i<n-1;i++) {
340:     e1[i] = e[i];
341:     q1[i] = q[i];
342:   }
343:   q1[n-1] = q[n-1];
344:   *fail = 0;
345:   if (n>3) {   /* For n>3 */
346:     *fail = 0;
347:     xr = 1;
348:     yr = e1[0];
349:     zr = 0;
350:     /* step 1 */
351:     /* the efect of Z1 */
352:     xr = xr*q1[0]+yr;
353:     /* the inverse of L1 */
354:     xl = (q1[0]+e1[0])*(q1[0]+e1[0])+q1[1]*e1[0]-sum*(q1[0]+e1[0])+prod;
355:     yl = -(q1[2]*e1[1]*q1[1]*e1[0])/xl;
356:     xl = -(q1[1]*e1[0]*(q1[0]+e1[0]+q1[1]+e1[1]-sum))/xl;
357:     /* the efect of L1 */
358:     q1[0] = xr-xl;
359:     xr = yr-xl;
360:     yr = zr-yl-xl*e1[1];
361:     zr = -yl*e1[2];
362:     /* the inverse of Y1 */
363:     xr = xr/q1[0];
364:     yr = yr/q1[0];
365:     zr = zr/q1[0];
366:     /* the effect of Y1 inverse */
367:     e1[0] = xl+yr+xr*q1[1];
368:     xl = yl+zr+yr*q1[2];
369:     yl = zr*q1[3];
370:     /* the effect of Y1 */
371:     xr = 1-xr;
372:     yr = e1[1]-yr;
373:     zr = -zr;
374:     /* step i=2,...,n-3 */
375:     for (i=1;i<n-3;i++) {
376:       /* the efect of Zi */
377:       xr = xr*q1[i]+yr;
378:       /* the inverse of Li */
379:       xl = -xl/e1[i-1];
380:       yl = -yl/e1[i-1];
381:       /* the efect of Li */
382:       q1[i] = xr-xl;
383:       xr = yr-xl;
384:       yr = zr-yl-xl*e1[i+1];
385:       zr = -yl*e1[i+2];
386:       /* the inverse of Yi */
387:       xr = xr/q1[i];
388:       yr = yr/q1[i];
389:       zr = zr/q1[i];
390:       /* the effect of the inverse of Yi */
391:       e1[i] = xl+yr+xr*q1[i+1];
392:       xl = yl+zr+yr*q1[i+2];
393:       yl = zr*q1[i+3];
394:       /* the effects of Yi */
395:       xr = 1.0-xr;
396:       yr = e1[i+1]-yr;
397:       zr = -zr;
398:     }

400:     /* STEP n-2            zr is no longer needed */

402:     /* the efect of Zn-2 */
403:     xr = xr*q1[n-3]+yr;
404:     /* the inverse of Ln-2 */
405:     xl = -xl/e1[n-4];
406:     yl = -yl/e1[n-4];
407:     /* the efect of Ln-2 */
408:     q1[n-3] = xr-xl;
409:     xr = yr-xl;
410:     yr = zr-yl-xl*e1[n-2];
411:     /* the inverse of Yn-2 */
412:     xr = xr/q1[n-3];
413:     yr = yr/q1[n-3];
414:     /* the effect of the inverse of Yn-2 */
415:     e1[n-3] = xl+yr+xr*q1[n-2];
416:     xl = yl+yr*q1[n-1];
417:     /* the effect of Yn-2 */
418:     xr = 1.0-xr;
419:     yr = e1[n-2]-yr;
420: 
421:     /* STEP n-1           yl and yr are no longer needed */
422:     /* Testing for EARLY DEFLATION */

424:     if (PetscAbsReal(e1[n-3])>tolDef*PetscAbsReal(xl) || PetscAbsReal(e1[n-3])>tolDef*PetscAbsReal(q1[n-3])) {
425:       /* the efect of Zn-1 */
426:       xr = xr*q1[n-2]+yr;
427:       /* the inverse of Ln-1 */
428:       xl = -xl/e1[n-3];
429:       /* the efect of Ln-1 */
430:       q1[n-2] = xr-xl;
431:       xr = yr-xl;
432:       /*the inverse of Yn-1 */
433:       xr = xr/q1[n-2];
434:       /*the effect of the inverse of Yn-1 */
435:       e1[n-2] = xl+xr*q1[n-1];
436:       /*the effects of Yn-1 */
437:       xr = 1.0-xr;
438: 
439:       /* STEP n;     xl no longer needed */
440:       /* the effect of Zn */
441:       xr = xr*q1[n-1];
442:       /* the inverse of Ln = I */
443:       /* the effect of Ln */
444:       q1[n-1] = xr;
445:       /* the inverse of  Yn-1=I */

447:     } else {  /* FREE DEFLATION */
448:       e1[n-2] = (e1[n-3]+xr*q1[n-2]+yr+q1[n-1])*0.5;     /* sum=trace/2 */
449:       q1[n-2] = (e1[n-3]+q1[n-2]*xr)*q1[n-1]-xl;         /* det */
450:       t = (e1[n-3]+xr*q1[n-2]+yr-q1[n-1])*0.5;
451:       q1[n-1] = t*t+xl+q1[n-1]*yr;
452:       *fail = 2;
453:     }

455:     for (i=0;i<n-1 && *fail==0;i++) {
456:       if (PetscIsInfOrNanReal(e1[i])) *fail=1;
457:       if (PetscIsInfOrNanReal(q1[i])) *fail=1;
458:     }
459:     if (*fail==0 && PetscIsInfOrNanReal(q1[n-1])) *fail=1;
460:     for (i=0;i<n-1 && *fail==0;i++) {
461:       if (PetscAbsReal(e1[i])>tol*norm) *fail = 1;  /* This demands IEEE arithmetic */
462:       if (PetscAbsReal(q1[i])>tol*norm) *fail = 1;
463:     }
464:     if ( *fail==0 && PetscAbsReal(q1[n-1])>tol*norm) *fail = 1;
465: 
466:   } else {  /* The case n=3 */
467:     tridqdsZhuang3(n,e1,q1,sum,prod,tol,norm,tolDef,fail);
468:   }
469:   return(0);
470: }

474: static PetscErrorCode DSGHIEP_Eigen3DQDS(PetscInt n,PetscReal *a,PetscReal *b,PetscReal *c, PetscScalar *wr, PetscScalar *wi,PetscReal *w,PetscInt nw)
475: {
476:   PetscInt       totalIt=0;       /* Total Number of Iterations  */
477:   PetscInt       totalFail=0;     /* Total number of failures */
478:   PetscInt       nFail=0;         /* Number of failures per transformation */
479:   PetscReal      tolZero=1.0/16;  /* Tolerance for zero shifts */
480:   PetscInt       maxIt=10*n;      /* Maximum number of iterations */
481:   PetscInt       maxFail=10*n;    /* Maximum number of failures allowed per each transformation */
482:   PetscReal      tolDef=PETSC_MACHINE_EPSILON;  /* Tolerance for deflation eps, 10*eps, 100*eps */
483:   PetscReal      tolGrowth=100000;
485:   PetscInt       i,k,nwu=0,nwall,begin,ind,flag,dim,m;
486:   PetscReal      norm,gr,gl,sigma,delta,meanEig,*work,*U,*L,*U1,*L1,*split;
487:   PetscReal      acShift,initialShift,shift=0.0,sum,det,disc,prod,x1,x2;
488:   PetscInt       realSteps,complexSteps,earlyDef,lastSplit,splitCount;
489:   PetscBool      test1,test2;

492:   dim = n;
493:   /* Test if the matrix is unreduced */
494:   for (i=0;i<n-1;i++) {
495:     if (PetscAbsReal(b[i])==0 || PetscAbsReal(c[i])==0) {
496:       SETERRQ(PETSC_COMM_SELF,1,"Initial tridiagonal matrix is not unreduced");
497:     }
498:   }
499:   nwall = 9*n+4;
500:   if (w && nw>=nwall) {
501:     work = w;
502:     nwall = nw;
503:   } else {
504:     PetscMalloc(nwall*sizeof(PetscReal),&work);
505:   }
506:   U = work;
507:   L = work+n;
508:   U1 = work+2*n;
509:   L1 = work+3*n;
510:   nwu = 4*n;
511:   if (wi) {
512:     PetscMemzero(wi,n*sizeof(PetscScalar));
513:   }
514:   /* Normalization - the J form of C */
515:   for (i=0;i<n-1;i++) b[i] *= c[i]; /* subdiagonal of the J form */

517:   /* Scan matrix J  ---- Finding a box of inclusion for the eigenvalues */
518:   norm = 0.0;
519:   for (i=0;i<n-1;i++) {
520:     norm = PetscMax(norm,PetscMax(PetscAbsReal(a[i]),PetscAbsReal(b[i])));
521:   }
522:   norm = PetscMax(norm,PetscMax(1,PetscAbsReal(a[n-1])));
523:   ScanJ(n,a,b,&gl,&gr,&sigma);
524:   delta = (gr-gl)/n; /* How much to add to the shift, in case of failure (element growth) */
525:   meanEig = 0.0;
526:   for (i=0;i<n;i++) meanEig += a[i];
527:   meanEig /= n; /* shift = initial shift = mean of eigenvalues */
528:   Prologue(n,a,b,gl,gr,&m,&shift,work+nwu,nwall-nwu);
529:   if (m==n) { /* Multiple eigenvalue, we have the one-point spectrum case */
530:     for (i=0;i<dim;i++) {
531:       wr[i] = shift;
532:       if (wi) wi[i] = 0.0;
533:     }
534:     return(0);
535:   }
536:   /* Initial LU Factorization */
537:   if ( delta==0 ) shift=0;/* The case when all eigenvalues are pure imaginary */
538:   LUfac(n,a,b,shift,tolGrowth,norm,L,U,&flag,work+nwu,nwall-nwu); /* flag=1 failure; flag=0 successful transformation*/
539:   while (flag==1 && nFail<maxFail) {
540:     shift=shift+delta;
541:     if (shift>gr || shift<gl) { /* Successive failures */
542:       shift=meanEig;
543:       delta=-delta;
544:     }
545:     nFail=nFail+1;
546:     LUfac(n,a,b,shift,tolGrowth,norm,L,U,&flag,work+nwu,nwall-nwu); /* flag=1 failure; flag=0 successful transformation*/
547:   }
548:   if (nFail==maxFail) {
549:     SETERRQ(PETSC_COMM_SELF,1,"Maximun number of failures reached in Initial LU factorization");
550:   }
551:   /* Successful Initial transformation */
552:   totalFail = totalFail+nFail;
553:   nFail = 0;
554:   acShift = 0;
555:   initialShift = shift;
556:   shift = 0;
557:   begin = 0;
558:   lastSplit = 0;
559:   split = work+nwu;
560:   nwu += n;
561:   split[lastSplit] = begin;
562:   splitCount = 0;
563:   while (begin!=-1) {
564:     while (n-begin>2 && totalIt<maxIt) {
565:       /* Check for deflation before performing a transformation */
566:       test1 = ((PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-2])) && (PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-1]+acShift)) && (PetscAbsReal(L[n-2]*U[n])<tolDef*PetscAbsReal(acShift+U[n-1])) && (PetscAbsReal(L[n-2])*(PetscAbsReal(U[n-2])+1)<tolDef*PetscAbsReal(acShift+U[n-1])))? PETSC_TRUE: PETSC_FALSE;
567:       if (flag==2) {  /* Early 2x2 deflation */
568:         earlyDef=earlyDef+1;
569:         test2 = PETSC_TRUE;
570:       } else {
571:         if (n-begin>4) {
572:           test2 = ((PetscAbsReal(L[n-3])<tolDef*PetscAbsReal(U[n-3])) && (PetscAbsReal(L[n-3]*(U[n-4]+L[n-4]))< tolDef*PetscAbsReal(U[n-4]*(U[n-3]+L[n-3])+L[n-4]*L[n-3])))? PETSC_TRUE: PETSC_FALSE;
573:         } else { /* n-begin+1=3 */
574:           test2 = (PetscAbsReal(L[begin])<tolDef*PetscAbsReal(U[begin]))? PETSC_TRUE: PETSC_FALSE;
575:         }
576:       }
577:       while (test2 || test1) {
578:         /* 2x2 deflation */
579:         if (test2) {
580:           if (flag==2) { /* Early deflation */
581:             sum = L[n-2];
582:             det = U[n-2];
583:             disc = U[n-1];
584:             flag = 0;
585:           } else {
586:             sum = (L[n-2]+(U[n-2]+U[n-1]))/2;
587:             disc = (L[n-2]*(L[n-2]+2*(U[n-2]+U[n-1]))+(U[n-2]-U[n-1])*(U[n-2]-U[n-1]))/4;
588:             det = U[n-2]*U[n-1];
589:           }
590:           if (disc<=0) {
591: #if !defined(PETSC_USE_COMPLEX)
592:             wr[--n] = sum+acShift; wi[n] = PetscSqrtReal(-disc);
593:             wr[--n] = sum+acShift; wi[n] = -PetscSqrtReal(-disc);
594: #else
595:             wr[--n] = sum-PETSC_i*PetscSqrtReal(-disc)+acShift; if (wi) wi[n] = 0.0;
596:             wr[--n] = sum+PETSC_i*PetscSqrtReal(-disc)+acShift; if (wi) wi[n] = 0.0;
597: #endif
598:           } else {
599:             if (sum==0) {
600:               x1 = PetscSqrtReal(disc);
601:               x2 = -x1;
602:             } else {
603:               x1 = ((sum>=0.0)?1.0:-1.0)*(PetscAbsReal(sum)+PetscSqrtReal(disc));
604:               x2 = det/x1;
605:             }
606:             wr[--n] = x1+acShift;
607:             wr[--n] = x2+acShift;
608:           }
609:         } else { /* test1 -- 1x1 deflation */
610:           x1 = U[n-1]+acShift;
611:           wr[--n] = x1;
612:         }
613: 
614:         if (n<=begin+2) {
615:           break;
616:         } else {
617:           test1 = ((PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-2])) && (PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-1]+acShift)) && (PetscAbsReal(L[n-2]*U[n-1])<tolDef*PetscAbsReal(acShift+U[n-1])) && (PetscAbsReal(L[n-2])*(PetscAbsReal(U[n-2])+1)< tolDef*PetscAbsReal(acShift+U[n-1])))? PETSC_TRUE: PETSC_FALSE;
618:           if (n-begin>4) {
619:             test2 = ((PetscAbsReal(L[n-3])<tolDef*PetscAbsReal(U[n-3])) && (PetscAbsReal(L[n-3]*(U[n-4]+L[n-4]))< tolDef*PetscAbsReal(U[n-4]*(U[n-3]+L[n-3])+L[n-4]*L[n-3])))? PETSC_TRUE: PETSC_FALSE;
620:           } else { /* n-begin+1=3 */
621:             test2 = (PetscAbsReal(L[begin])<tolDef*PetscAbsReal(U[begin]))? PETSC_TRUE: PETSC_FALSE;
622:           }
623:         }
624:       } /* end "WHILE deflations" */
625:       /* After deflation */
626:       if (n>begin+3) {
627:         ind = begin;
628:         for (k=n-4;k>=begin+1;k--) {
629:           if ( (PetscAbsReal(L[k])<tolDef*PetscAbsReal(U[k]))&&(PetscAbsReal(L[k]*U[k+1]*(U[k+2]+L[k+2])*(U[k-1]+L[k-1]))<tolDef*PetscAbsReal((U[k-1]*(U[k]+L[k])+L[k-1]*L[k])*(U[k+1]*(U[k+2]+L[k+2])+L[k+1]*L[k+2]))) ) {
630:              ind=k;
631:              break;
632:           }
633:         }
634:         if ( ind>begin || PetscAbsReal(L[begin]) <tolDef*PetscAbsReal(U[begin]) ) {
635:           lastSplit = lastSplit+1;
636:           split[lastSplit] = begin;
637:           L[ind] = acShift; /* Use of L[ind] to save acShift */
638:           begin = ind+1;
639:           splitCount = splitCount+1;
640:         }
641:       }
642: 
643:       if (n>begin+2) {
644:         disc = (L[n-2]*(L[n-2]+2*(U[n-2]+U[n-1]))+(U[n-2]-U[n-1])*(U[n-2]-U[n-1]))/4;
645:         if ( (PetscAbsReal(L[n-2])>tolZero) && (PetscAbsReal(L[n-3])>tolZero)) { /* L's are big */
646:           shift = 0;
647:           sum = 0; /* Needed in case of failure */
648:           prod = 0;
649:           realDQDS(n-begin,L+begin,U+begin,0,tolGrowth,norm,L1+begin,U1+begin,&flag);
650:           realSteps++;
651:           if (flag) {  /* Failure */
652:             tridqdsZhuang(n-begin,L+begin,U+begin,0.0,0.0,tolGrowth,norm,tolDef,L1+begin,U1+begin,&flag);
653:             shift = 0.0;
654:             while (flag==1 && nFail<maxFail) {  /* Successive failures */
655:               shift = shift+delta;
656:               if (shift>gr-acShift || shift<gl-acShift) {
657:                 shift = meanEig-acShift;
658:                 delta = -delta;
659:               }
660:               nFail++;
661:               realDQDS(n-begin,L+begin,U+begin,0,tolGrowth,norm,L1+begin,U1+begin,&flag);
662:             }
663:           }
664:         } else { /* L's are small */
665:           if (disc<0) {  /* disc <0   Complex case; Francis shift; 3dqds */
666:             sum = U[n-2]+L[n-2]+U[n-1];
667:             prod = U[n-2]*U[n-1];
668:             tridqdsZhuang(n-begin,L+begin,U+begin,sum,prod,tolGrowth,norm,tolDef,L1+begin,U1+begin,&flag);
669:             complexSteps++;
670:             shift = 0.0; /* Restoring transformation */
671:             while (flag==1 && nFail<maxFail) { /* In case of failure */
672:               shift = shift+U[n-1];  /* first time shift=0 */
673:               realDQDS(n-begin,L+begin,U+begin,shift,tolGrowth,norm,L1+begin,U1+begin,&flag);
674:               nFail++;
675:             }
676:           } else  { /* disc >0  Real case; real Wilkinson shift; dqds */
677:             sum = (L[n-2]+U[n-2]+U[n-1])/2;
678:             if (sum==0) {
679:               x1 = PetscSqrtReal(disc);
680:               x2 = -x1;
681:             } else {
682:               x1 = ((sum>=0)?1.0:-1.0)*(PetscAbsReal(sum)+PetscSqrtReal(disc));
683:               x2 = U[n-2]*U[n-1]/x1;
684:             }
685:             /* Take the eigenvalue closest to UL(n,n) */
686:             if (PetscAbsReal(x1-U[n-1])<PetscAbsReal(x2-U[n-1])) {
687:               shift = x1;
688:             } else {
689:               shift = x2;
690:             }
691:             realDQDS(n-begin,L+begin,U+begin,shift,tolGrowth,norm,L1+begin,U1+begin,&flag);
692:             realSteps++;
693:             /* In case of failure */
694:             while (flag==1 && nFail<maxFail) {
695:               sum = 2*shift;
696:               prod = shift*shift;
697:               tridqdsZhuang(n-1-begin,L+begin,U+begin,sum,prod,tolGrowth,norm,tolDef,L1+begin,U1+begin,&flag);
698:               /* In case of successive failures */
699:               if (shift==0) {
700:                 shift = PetscMin(PetscAbsReal(L[n-2]),PetscAbsReal(L[n-3]))*delta;
701:               } else {
702:                 shift=shift+delta;
703:               }
704:               if (shift>gr-acShift || shift<gl-acShift) {
705:                 shift = meanEig-acShift;
706:                 delta = -delta;
707:               }
708:               if (flag==0) { /* We changed from real dqds to 3dqds */
709:                 shift=0;
710:               }
711:               nFail++;
712:             }
713:           }
714:         } /* end "if tolZero" */
715:         if (nFail==maxFail) {
716:           SETERRQ(PETSC_COMM_SELF,1,"Maximun number of failures reached. No convergence in DQDS");
717:         }
718:         /* Successful Transformation; flag==0 */
719:         totalIt++;
720:         acShift = shift+acShift;
721:         for (i=begin;i<n-1;i++) {
722:           L[i] = L1[i];
723:           U[i] = U1[i];
724:         }
725:         U[n-1] = U1[n-1];
726:         totalFail = totalFail+nFail;
727:         nFail = 0;
728:       }  /* end "if n>begin+1" */
729:     }  /* end WHILE 1 */
730:     if (totalIt>=maxIt) {
731:       SETERRQ(PETSC_COMM_SELF,1,"Maximun number of iterations reached. No convergence in DQDS");
732:     }
733:     /* END: n=2 or n=1  % n=begin+1 or n=begin */
734:     if (n==begin+2) {
735:       sum = (L[n-2]+U[n-2]+U[n-1])/2;
736:       disc = (L[n-2]*(L[n-2]+2*(U[n-2]+U[n-1]))+(U[n-2]-U[n-1])*(U[n-2]-U[n-1]))/4;
737:         if (disc<=0)  {  /* Complex case */
738:         /* Deflation 2 */
739: #if !defined(PETSC_USE_COMPLEX)
740:         wr[--n] = sum+acShift; wi[n] = PetscSqrtReal(-disc);
741:         wr[--n] = sum+acShift; wi[n] = -PetscSqrtReal(-disc);
742: #else
743:         wr[--n] = sum-PETSC_i*PetscSqrtReal(-disc)+acShift; if (wi) wi[n] = 0.0;
744:         wr[--n] = sum+PETSC_i*PetscSqrtReal(-disc)+acShift; if (wi) wi[n] = 0.0;
745: #endif
746:       }else  { /* Real case */
747:         if (sum==0) {
748:           x1 = PetscSqrtReal(disc);
749:           x2 = -x1;
750:         } else {
751:           x1 = ((sum>=0)?1.0:-1.0)*(PetscAbsReal(sum)+PetscSqrtReal(disc));
752:           x2 = U[n-2]*U[n-1]/x1;
753:         }
754:         /* Deflation 2 */
755:         wr[--n] = x2+acShift;
756:         wr[--n] = x1+acShift;
757:       }
758:     }else { /* n=1   n=begin */
759:       /* deflation 1 */
760:       x1 = U[n-1]+acShift;
761:       wr[--n] = x1;
762:     }
763:     switch (n) {
764:       case 0:
765:         begin = -1;
766:         break;
767:       case 1:
768:         acShift = L[begin-1];
769:         begin = split[lastSplit];
770:         lastSplit--;
771:         break;
772:       default : /* n>=2 */
773:         acShift = L[begin-1];
774:         begin = split[lastSplit];
775:         lastSplit--;
776:     }
777:   }/* While begin~=-1 */
778:   for (i=0;i<dim;i++) {
779:     wr[i] = wr[i]+initialShift;
780:   }
781:   if (work!=w) {
782:     PetscFree(work);
783:   }
784:   return(0);
785: }
788: PetscErrorCode DSSolve_GHIEP_DQDS_II(DS ds,PetscScalar *wr,PetscScalar *wi)
789: {
791:   PetscInt       i,off,ld,nwall,nwu;
792:   PetscScalar    *A,*B,*Q,*vi;
793:   PetscReal      *d,*e,*s,*a,*b,*c;

796: #if !defined(PETSC_USE_COMPLEX)
798: #endif
799:   ld = ds->ld;
800:   off = ds->l + ds->l*ld;
801:   A = ds->mat[DS_MAT_A];
802:   B = ds->mat[DS_MAT_B];
803:   Q = ds->mat[DS_MAT_Q];
804:   d = ds->rmat[DS_MAT_T];
805:   e = ds->rmat[DS_MAT_T] + ld;
806:   c = ds->rmat[DS_MAT_T] + 2*ld;
807:   s = ds->rmat[DS_MAT_D];
808:   /* Quick return if possible */
809:   if (ds->n-ds->l == 1) {
810:     *(Q+off) = 1;
811:     if (!ds->compact) {
812:       d[ds->l] = PetscRealPart(A[off]);
813:       s[ds->l] = PetscRealPart(B[off]);
814:     }
815:     wr[ds->l] = d[ds->l]/s[ds->l];
816:     if (wi) wi[ds->l] = 0.0;
817:     return(0);
818:   }
819:   nwall = 12*ld+4;
820:   DSAllocateWork_Private(ds,0,nwall,0);
821:   /* Reduce to pseudotriadiagonal form */
822:   DSIntermediate_GHIEP( ds);
823: 
824:   /* Compute Eigenvalues (DQDS)*/
825:   /* Form pseudosymmetric tridiagonal */
826:   a = ds->rwork;
827:   b = a+ld;
828:   c = b+ld;
829:   nwu = 3*ld;
830:   if (ds->compact) {
831:     for (i=ds->l;i<ds->n-1;i++) {
832:       a[i] = d[i]*s[i];
833:       b[i] = e[i]*s[i+1];
834:       c[i] = e[i]*s[i];
835:     }
836:     a[ds->n-1] = d[ds->n-1]*s[ds->n-1];
837:   } else {
838:     for (i=ds->l;i<ds->n-1;i++) {
839:       a[i] = PetscRealPart(A[i+i*ld]*B[i+i*ld]);
840:       b[i] = PetscRealPart(A[i+1+i*ld]*s[i+1]);
841:       c[i] = PetscRealPart(A[i+(i+1)*ld]*s[i]);
842:     }
843:     a[ds->n-1] = PetscRealPart(A[ds->n-1+(ds->n-1)*ld]*B[ds->n-1+(ds->n-1)*ld]);
844:   }
845:   vi = (wi)?wi+ds->l:PETSC_NULL;
846:   DSGHIEP_Eigen3DQDS(ds->n-ds->l,a+ds->l,b+ds->l,c+ds->l,wr+ds->l,vi,ds->rwork+nwu,nwall-nwu);

848:   /* Compute Eigenvectors with Inverse Iteration */
849:   DSGHIEPPseudoOrthogInverseIteration(ds,wr,wi);
850: 
851:   /* Recover eigenvalues from diagonal */
852:   DSGHIEPComplexEigs(ds, 0, ds->l, wr, wi);
853: #if defined(PETSC_USE_COMPLEX)
854:   if (wi) {
855:     for (i=ds->l;i<ds->n;i++) wi[i] = 0.0;
856:   }
857: #endif
858:   return(0);
859: }