Actual source code: dsghiep_dqds.c

  1: /*
  2:    DQDS-type dense solver for generalized symmetric-indefinite eigenproblem.
  3:    Based on Matlab code from Carla Ferreira.

  5:    References:

  7:        [1] C. Ferreira, B. Parlett, "Real DQDS for the nonsymmetric tridiagonal
  8:            eigenvalue problem", preprint, 2012.

 10:        [2] C. Ferreira. The unsymmetric tridiagonal eigenvalue problem. Ph.D
 11:            Thesis, University of Minho, 2007.

 13:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 14:    SLEPc - Scalable Library for Eigenvalue Problem Computations
 15:    Copyright (c) 2002-2012, Universitat Politecnica de Valencia, Spain

 17:    This file is part of SLEPc.
 18:       
 19:    SLEPc is free software: you can redistribute it and/or modify it under  the
 20:    terms of version 3 of the GNU Lesser General Public License as published by
 21:    the Free Software Foundation.

 23:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY 
 24:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS 
 25:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for 
 26:    more details.

 28:    You  should have received a copy of the GNU Lesser General  Public  License
 29:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 30:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 31: */
 32: #include <slepc-private/dsimpl.h>      /*I "slepcds.h" I*/
 33: #include <slepcblaslapack.h>

 35: extern PetscErrorCode DSGHIEPComplexEigs(DS,PetscInt,PetscInt,PetscScalar*,PetscScalar*);
 36: extern PetscErrorCode DSGHIEPPseudoOrthogInverseIteration(DS,PetscScalar*,PetscScalar*);
 37: extern PetscErrorCode DSIntermediate_GHIEP(DS);

 41: /*
 42:   INPUT:
 43:     a ---- diagonal of J
 44:     b ---- subdiagonal of J;
 45:     the superdiagonal of J is all 1's

 47:   OUTPUT:
 48:     For an eigenvalue lambda of J we have:
 49:       gl=<real(lambda)<=gr
 50:       -sigma=<imag(lambda)<=sigma
 51: */
 52: static PetscErrorCode ScanJ(PetscInt n,PetscReal *a,PetscReal *b,PetscReal *gl,PetscReal *gr,PetscReal *sigma)
 53: {
 54:   PetscInt  i;
 55:   PetscReal b0,b1,rad;

 58:   /* For original matrix C, C_bal=T+S; T-symmetric and S=skew-symmetric
 59:    C_bal is the balanced form of C */
 60:   /* Bounds on the imaginary part of C (Gersgorin bound for S)*/
 61:   *sigma = 0.0;
 62:   b0 = 0.0;
 63:   for (i=0;i<n-1;i++) {
 64:     if (b[i]<0.0) b1 = PetscSqrtReal(-b[i]);
 65:     else b1 = 0.0;
 66:     *sigma = PetscMax(*sigma,b1+b0);
 67:     b0 = b1;
 68:   }
 69:   *sigma = PetscMax(*sigma,b0);
 70:   /* Gersgorin bounds for T (=Gersgorin bounds on the real part for C) */
 71:   rad = (b[0]>0.0)?PetscSqrtReal(b[0]):0.0; /* rad = b1+b0, b0 = 0 */
 72:   *gr = a[0]+rad;
 73:   *gl = a[0]-rad;
 74:   b0 = rad;
 75:   for (i=1;i<n-1;i++) {
 76:     b1 = (b[i]>0.0)?PetscSqrtReal(b[i]):0.0;
 77:     rad = b0+b1;
 78:     *gr = PetscMax(*gr,a[i]+rad);
 79:     *gl = PetscMin(*gl,a[i]-rad);
 80:     b0 = b1;
 81:   }
 82:   rad = b0;
 83:   *gr = PetscMax(*gr,a[n-1]+rad);
 84:   *gl = PetscMin(*gl,a[n-1]-rad);

 86:   return(0);
 87: }

 91: /* 
 92:   INPUT: 
 93:     a  - vector with the diagonal elements
 94:     b  - vector with the subdiagonal elements
 95:     gl - Gersgorin left bound (real axis)
 96:     gr - Gersgorin right bound (real axis)
 97:   OUTPUT:
 98:     eigvalue - multiple eigenvalue (if there is an eigenvalue)
 99:     m        - its multiplicity    (m=0 if there isn't a multiple eigenvalue)
100:     X        - matrix of generalized eigenvectors  
101:     shift    
102: */
103: static PetscErrorCode Prologue(PetscInt n,PetscReal *a,PetscReal *b,PetscReal gl,PetscReal gr,PetscInt *m,PetscReal *shift,PetscReal *w,PetscReal nw)
104: {

107:   PetscReal      mu,tol,*a1,*work,*y,*yp,*x,*xp;
108:   PetscInt       i,k,nwall=0;

111:   *m = 0;
112:   mu = 0.0;
113:   for (i=0;i<n;i++) mu += a[i];
114:   mu /= n;
115:   tol=n*PETSC_MACHINE_EPSILON*(gr-gl);
116:   nwall = 5*n+4;
117:   if (w && nw>=nwall) {
118:     work = w;
119:     nwall = nw;
120:   }else {
121:     PetscMalloc(nwall*sizeof(PetscReal),&work);
122:     PetscMemzero(work,nwall*sizeof(PetscReal));
123:   }
124:   a1 = work; /* size n */
125:   y = work+n; /* size n+1 */
126:   yp = y+n+1; /* size n+1. yp is the derivative of y (p for "prime") */
127:   x = yp+n+1; /* size n+1 */
128:   xp = x+n+1; /* size n+1 */
129:   for (i=0;i<n;i++) a1[i] = mu-a[i];
130:   x[0] = 1;
131:   xp[0] = 0;
132:   x[1] = a1[0];
133:   xp[1] = 1;
134:   for (i=1;i<n;i++) {
135:     x[i+1]=a1[i]*x[i]-b[i-1]*x[i-1];
136:     xp[i+1]=a1[i]*xp[i]+x[i]-b[i-1]*xp[i-1];
137:   }
138:   *shift = mu;
139:   if ( PetscAbsReal(x[n])<tol) {
140:     /* mu is an eigenvalue */
141:     *m = *m+1;
142:     if ( PetscAbsReal(xp[n])<tol ) {
143:       /* mu is a multiple eigenvalue; Is it the one-point spectrum case? */
144:       k = 0;
145:       while (PetscAbsReal(xp[n])<tol && k<n-1) {
146:            PetscMemcpy(x,y,(n+1)*sizeof(PetscReal));
147:            PetscMemcpy(xp,yp,(n+1)*sizeof(PetscReal));
148:            x[k] = 0.0;
149:            k++;
150:            x[k] = 1.0;
151:            xp[k] = 0.0;
152:            x[k+1] = a1[k] + y[k];
153:            xp[k+1] = 1+yp[k];
154:            for (i=k+1;i<n;i++) {
155:              x[i+1] = a1[i]*x[i]-b[i-1]*x[i-1]+y[i];
156:              xp[i+1]=a1[i]*xp[i]+x[i]-b[i-1]*xp[i-1]+yp[i];
157:            }
158:            *m = *m+1;
159:       }
160:     }
161:   }
162:   if (work != w) {
163:     PetscFree(work);
164:   }
165: /*
166:   When mu is not an eigenvalue or it it an eigenvalue but it is not the one-point spectrum case, we will always have shift=mu

168:   Need to check for overflow!

170:   After calling Prologue, eigenComplexdqds and eigen3dqds will test if m==n in which case we have the one-point spectrum case; 
171:   If m!=0, the only output to be used is the shift returned.
172: */
173:   return(0);
174: }


179: static PetscErrorCode LUfac(PetscInt n,PetscReal *a,PetscReal *b,PetscReal shift,PetscReal tol,PetscReal norm,PetscReal *L,PetscReal *U,PetscInt *fail,PetscReal *w,PetscInt nw) {
181:   PetscInt       nwall,i;
182:   PetscReal      *work,*a1;

185:   nwall = n;
186:   if (w && nw>=nwall) {
187:     work = w;
188:     nwall = nw;
189:   } else {
190:     PetscMalloc(nwall*sizeof(PetscReal),&work);
191:   }
192:   a1 = work;
193:   for (i=0;i<n;i++) a1[i] = a[i]-shift;
194:   *fail = 0;
195:   for (i=0;i<n-1;i++) {
196:     U[i] = a1[i];
197:     L[i] = b[i]/U[i];
198:     a1[i+1] = a1[i+1]-L[i];
199:   }
200:   U[n-1] = a1[n-1];

202:   /* Check if there are NaN values */
203:   for (i=0;i<n-1 && *fail==0;i++) {
204:     if (PetscIsInfOrNanReal(L[i])) *fail=1;
205:     if (PetscIsInfOrNanReal(U[i])) *fail=1;
206:   }
207:   if (*fail==0 && PetscIsInfOrNanReal(U[n-1])) *fail=1;

209:   for (i=0;i<n-1 && *fail==0;i++) {
210:     if ( PetscAbsReal(L[i])>tol*norm) *fail = 1;  /* This demands IEEE arithmetic */
211:     if ( PetscAbsReal(U[i])>tol*norm) *fail = 1;
212:   }
213:   if ( *fail==0 && PetscAbsReal(U[n-1])>tol*norm) *fail = 1;
214: 
215:   if (work != w) {
216:     PetscFree(work);
217:   }
218:   return(0);
219: }

223: static PetscErrorCode realDQDS(PetscInt n,PetscReal *L,PetscReal *U,PetscReal shift,PetscReal tol,PetscReal norm,PetscReal *L1,PetscReal *U1,PetscInt *fail)
224: {
225:   PetscReal d;
226:   PetscInt  i;

229:   *fail = 0;
230:   d = U[0]-shift;
231:   for (i=0;i<n-1;i++) {
232:     U1[i] = d+L[i];
233:     L1[i] = L[i]*(U[i+1]/U1[i]);
234:     d = d*(U[i+1]/U1[i])-shift;
235:   }
236:   U1[n-1]=d;

238:   /* The following demands IEEE arithmetic */
239:   for (i=0;i<n-1 && *fail==0;i++) {
240:     if (PetscIsInfOrNanReal(L1[i])) *fail=1;
241:     if (PetscIsInfOrNanReal(U1[i])) *fail=1;
242:   }
243:   if (*fail==0 && PetscIsInfOrNanReal(U1[n-1])) *fail=1;
244:   for (i=0;i<n-1 && *fail==0;i++) {
245:     if (PetscAbsReal(L1[i])>tol*norm) *fail=1;
246:     if (PetscAbsReal(U1[i])>tol*norm) *fail=1;
247:   }
248:   if (*fail==0 && PetscAbsReal(U1[n-1])>tol*norm) *fail=1;
249:   return(0);
250: }

254: static PetscErrorCode tridqdsZhuang3(PetscInt n,PetscReal *e,PetscReal *q,PetscReal sum,PetscReal prod,PetscReal tol,PetscReal norm,PetscReal tolDef,PetscInt *fail)
255: {
256:   PetscReal xl,yl,xr,yr,zr,t;
257:   PetscInt  i;

260:   *fail = 0;
261:   xr = 1.0;
262:   yr = e[0];
263:   zr = 0.0;
264:   /* Step 1 */
265:   /* the efect of Z1 */
266:   xr = xr*q[0]+yr;
267:   /* the inverse of L1 */
268:   xl = (q[0]+e[0])*(q[0]+e[0])+q[1]*e[0]-sum*(q[0]+e[0])+prod;
269:   yl = -(q[2]*e[1]*q[1]*e[0])/xl;
270:   xl = -(q[1]*e[0]*(q[0]+e[0]+q[1]+e[1]-sum))/xl;
271:   /* the efect of L1 */
272:   q[0] = xr-xl;
273:   xr = yr-xl;
274:   yr = zr-yl-xl*e[1];
275:   /*the inverse of Y1 */
276:   xr = xr/q[0];
277:   yr = yr/q[0];
278:   /*the effect of Y1 inverse */
279:   e[0] = xl+yr+xr*q[1];
280:   xl = yl+zr+yr*q[2];      /* zr=0  when n=3 */
281:   /*the effect of Y1 */
282:   xr = 1.0-xr;
283:   yr = e[1]-yr;

285:   /* STEP n-1 */

287:   if (PetscAbsReal(e[n-3])>tolDef*PetscAbsReal(xl) || PetscAbsReal(e[n-3])>tolDef*PetscAbsReal(q[n-3])) {
288:     /* the efect of Zn-1 */
289:     xr = xr*q[n-2]+yr;
290:     /* the inverse of Ln-1 */
291:     xl = -xl/e[n-3];
292:     /* the efect of Ln-1 */
293:     q[n-2] = xr-xl;
294:     xr = yr-xl;
295:     /*the inverse of Yn-1 */
296:     xr = xr/q[n-2];
297:     /*the effect of the inverse of Yn-1 */
298:     e[n-2] = xl+xr*q[n-1];
299:     /*the effects of Yn-1 */
300:     xr = 1.0-xr;
301:     /* STEP n */
302:     /*the effect of Zn */
303:     xr = xr*q[n-1];
304:     /*the inverse of Ln=I */
305:     /*the effect of Ln */
306:     q[n-1] = xr;
307:     /* the inverse of  Yn-1=I */

309:   } else { /* Free deflation */
310:     e[n-2] = (e[n-3]+(xr*q[n-2]+yr)+q[n-1])*0.5;       /* Sum=trace/2 */
311:     q[n-2] = (e[n-3]+q[n-2]*xr)*q[n-1]-xl;             /* det */
312:     t = ((e[n-3]+(xr*q[n-2]+yr)-q[n-1])*0.5);
313:     q[n-1] = t*t+(xl+q[n-1]*yr);
314:     *fail = 2;
315:   }

317:   /* The following demands IEEE arithmetic */
318:   for (i=0;i<n-1 && *fail==0;i++) {
319:     if (PetscIsInfOrNanReal(e[i])) *fail=1;
320:     if (PetscIsInfOrNanReal(q[i])) *fail=1;
321:   }
322:   if (*fail==0 && PetscIsInfOrNanReal(q[n-1])) *fail=1;
323:   for (i=0;i<n-1 && *fail==0;i++) {
324:     if (PetscAbsReal(e[i])>tol*norm) *fail=1;
325:     if (PetscAbsReal(q[i])>tol*norm) *fail=1;
326:   }
327:   if (*fail==0 && PetscAbsReal(q[n-1])>tol*norm) *fail=1;
328:   return(0);
329: }

333: static PetscErrorCode tridqdsZhuang(PetscInt n,PetscReal *e,PetscReal *q,PetscReal sum,PetscReal prod,PetscReal tol,PetscReal norm,PetscReal tolDef,PetscReal *e1,PetscReal *q1,PetscInt *fail) {
335:   PetscInt       i;
336:   PetscReal      xl,yl,xr,yr,zr,t;

339:   for (i=0;i<n-1;i++) {
340:     e1[i] = e[i];
341:     q1[i] = q[i];
342:   }
343:   q1[n-1] = q[n-1];
344:   *fail = 0;
345:   if (n>3) {   /* For n>3 */
346:     *fail = 0;
347:     xr = 1;
348:     yr = e1[0];
349:     zr = 0;
350:     /* step 1 */
351:     /* the efect of Z1 */
352:     xr = xr*q1[0]+yr;
353:     /* the inverse of L1 */
354:     xl = (q1[0]+e1[0])*(q1[0]+e1[0])+q1[1]*e1[0]-sum*(q1[0]+e1[0])+prod;
355:     yl = -(q1[2]*e1[1]*q1[1]*e1[0])/xl;
356:     xl = -(q1[1]*e1[0]*(q1[0]+e1[0]+q1[1]+e1[1]-sum))/xl;
357:     /* the efect of L1 */
358:     q1[0] = xr-xl;
359:     xr = yr-xl;
360:     yr = zr-yl-xl*e1[1];
361:     zr = -yl*e1[2];
362:     /* the inverse of Y1 */
363:     xr = xr/q1[0];
364:     yr = yr/q1[0];
365:     zr = zr/q1[0];
366:     /* the effect of Y1 inverse */
367:     e1[0] = xl+yr+xr*q1[1];
368:     xl = yl+zr+yr*q1[2];
369:     yl = zr*q1[3];
370:     /* the effect of Y1 */
371:     xr = 1-xr;
372:     yr = e1[1]-yr;
373:     zr = -zr;
374:     /* step i=2,...,n-3 */
375:     for (i=1;i<n-3;i++) {
376:       /* the efect of Zi */
377:       xr = xr*q1[i]+yr;
378:       /* the inverse of Li */
379:       xl = -xl/e1[i-1];
380:       yl = -yl/e1[i-1];
381:       /* the efect of Li */
382:       q1[i] = xr-xl;
383:       xr = yr-xl;
384:       yr = zr-yl-xl*e1[i+1];
385:       zr = -yl*e1[i+2];
386:       /* the inverse of Yi */
387:       xr = xr/q1[i];
388:       yr = yr/q1[i];
389:       zr = zr/q1[i];
390:       /* the effect of the inverse of Yi */
391:       e1[i] = xl+yr+xr*q1[i+1];
392:       xl = yl+zr+yr*q1[i+2];
393:       yl = zr*q1[i+3];
394:       /* the effects of Yi */
395:       xr = 1.0-xr;
396:       yr = e1[i+1]-yr;
397:       zr = -zr;
398:     }

400:     /* STEP n-2            zr is no longer needed */

402:     /* the efect of Zn-2 */
403:     xr = xr*q1[n-3]+yr;
404:     /* the inverse of Ln-2 */
405:     xl = -xl/e1[n-4];
406:     yl = -yl/e1[n-4];
407:     /* the efect of Ln-2 */
408:     q1[n-3] = xr-xl;
409:     xr = yr-xl;
410:     yr = zr-yl-xl*e1[n-2];
411:     /* the inverse of Yn-2 */
412:     xr = xr/q1[n-3];
413:     yr = yr/q1[n-3];
414:     /* the effect of the inverse of Yn-2 */
415:     e1[n-3] = xl+yr+xr*q1[n-2];
416:     xl = yl+yr*q1[n-1];
417:     /* the effect of Yn-2 */
418:     xr = 1.0-xr;
419:     yr = e1[n-2]-yr;
420: 
421:     /* STEP n-1           yl and yr are no longer needed */
422:     /* Testing for EARLY DEFLATION */

424:     if (PetscAbsReal(e1[n-3])>tolDef*PetscAbsReal(xl) || PetscAbsReal(e1[n-3])>tolDef*PetscAbsReal(q1[n-3])) {
425:       /* the efect of Zn-1 */
426:       xr = xr*q1[n-2]+yr;
427:       /* the inverse of Ln-1 */
428:       xl = -xl/e1[n-3];
429:       /* the efect of Ln-1 */
430:       q1[n-2] = xr-xl;
431:       xr = yr-xl;
432:       /*the inverse of Yn-1 */
433:       xr = xr/q1[n-2];
434:       /*the effect of the inverse of Yn-1 */
435:       e1[n-2] = xl+xr*q1[n-1];
436:       /*the effects of Yn-1 */
437:       xr = 1.0-xr;
438: 
439:       /* STEP n;     xl no longer needed */
440:       /* the effect of Zn */
441:       xr = xr*q1[n-1];
442:       /* the inverse of Ln = I */
443:       /* the effect of Ln */
444:       q1[n-1] = xr;
445:       /* the inverse of  Yn-1=I */

447:     } else {  /* FREE DEFLATION */
448:       e1[n-2] = (e1[n-3]+xr*q1[n-2]+yr+q1[n-1])*0.5;     /* sum=trace/2 */
449:       q1[n-2] = (e1[n-3]+q1[n-2]*xr)*q1[n-1]-xl;         /* det */
450:       t = (e1[n-3]+xr*q1[n-2]+yr-q1[n-1])*0.5;
451:       q1[n-1] = t*t+xl+q1[n-1]*yr;
452:       *fail = 2;
453:     }

455:     for (i=0;i<n-1 && *fail==0;i++) {
456:       if (PetscIsInfOrNanReal(e1[i])) *fail=1;
457:       if (PetscIsInfOrNanReal(q1[i])) *fail=1;
458:     }
459:     if (*fail==0 && PetscIsInfOrNanReal(q1[n-1])) *fail=1;
460:     for (i=0;i<n-1 && *fail==0;i++) {
461:       if (PetscAbsReal(e1[i])>tol*norm) *fail = 1;  /* This demands IEEE arithmetic */
462:       if (PetscAbsReal(q1[i])>tol*norm) *fail = 1;
463:     }
464:     if ( *fail==0 && PetscAbsReal(q1[n-1])>tol*norm) *fail = 1;
465: 
466:   } else {  /* The case n=3 */
467:     tridqdsZhuang3(n,e1,q1,sum,prod,tol,norm,tolDef,fail);
468:   }
469:   return(0);
470: }

474: static PetscErrorCode DSGHIEP_Eigen3DQDS(PetscInt n,PetscReal *a,PetscReal *b,PetscReal *c, PetscScalar *wr, PetscScalar *wi,PetscReal *w,PetscInt nw)
475: {
476:   PetscInt       totalIt=0;       /* Total Number of Iterations  */
477:   PetscInt       totalFail=0;     /* Total number of failures */
478:   PetscInt       nFail=0;         /* Number of failures per transformation */
479:   PetscReal      tolZero=1.0/16;  /* Tolerance for zero shifts */
480:   PetscInt       maxIt=10*n;      /* Maximum number of iterations */
481:   PetscInt       maxFail=10*n;    /* Maximum number of failures allowed per each transformation */
482:   PetscReal      tolDef=PETSC_MACHINE_EPSILON;  /* Tolerance for deflation eps, 10*eps, 100*eps */
483:   PetscReal      tolGrowth=100000;
485:   PetscInt       i,k,nwu=0,nwall,begin,ind,flag,dim,m;
486:   PetscReal      norm,gr,gl,sigma,delta,meanEig,*work,*U,*L,*U1,*L1,*split;
487:   PetscReal      acShift,initialShift,shift=0.0,sum,det,disc,prod,x1,x2;
488:   PetscInt       realSteps,complexSteps,earlyDef,lastSplit,splitCount;
489:   PetscBool      test1,test2;

492:   dim = n;
493:   /* Test if the matrix is unreduced */
494:   for (i=0;i<n-1;i++) {
495:     if (PetscAbsReal(b[i])==0 || PetscAbsReal(c[i])==0) {
496:       SETERRQ(PETSC_COMM_SELF,1,"Initial tridiagonal matrix is not unreduced");
497:     }
498:   }
499:   nwall = 9*n+4;
500:   if (w && nw>=nwall) {
501:     work = w;
502:     nwall = nw;
503:   } else {
504:     PetscMalloc(nwall*sizeof(PetscReal),&work);
505:   }
506:   U = work;
507:   L = work+n;
508:   U1 = work+2*n;
509:   L1 = work+3*n;
510:   nwu = 4*n;
511:   PetscMemzero(wi,n*sizeof(PetscScalar));
512:   /* Normalization - the J form of C */
513:   for (i=0;i<n-1;i++) b[i] *= c[i]; /* subdiagonal of the J form */

515:   /* Scan matrix J  ---- Finding a box of inclusion for the eigenvalues */
516:   norm = 0.0;
517:   for (i=0;i<n-1;i++) {
518:     norm = PetscMax(norm,PetscMax(PetscAbsReal(a[i]),PetscAbsReal(b[i])));
519:   }
520:   norm = PetscMax(norm,PetscMax(1,PetscAbsReal(a[n-1])));
521:   ScanJ(n,a,b,&gl,&gr,&sigma);
522:   delta = (gr-gl)/n; /* How much to add to the shift, in case of failure (element growth) */
523:   meanEig = 0.0;
524:   for (i=0;i<n;i++) meanEig += a[i];
525:   meanEig /= n; /* shift = initial shift = mean of eigenvalues */
526:   Prologue(n,a,b,gl,gr,&m,&shift,work+nwu,nwall-nwu);
527:   if (m==n) { /* Multiple eigenvalue, we have the one-point spectrum case */
528:     for (i=0;i<dim;i++) {
529:       wr[i] = shift;
530:       wi[i] = 0.0;
531:     }
532:     return(0);
533:   }
534:   /* Initial LU Factorization */
535:   if ( delta==0 ) shift=0;/* The case when all eigenvalues are pure imaginary */
536:   LUfac(n,a,b,shift,tolGrowth,norm,L,U,&flag,work+nwu,nwall-nwu); /* flag=1 failure; flag=0 successful transformation*/
537:   while (flag==1 && nFail<maxFail) {
538:     shift=shift+delta;
539:     if (shift>gr || shift<gl) { /* Successive failures */
540:       shift=meanEig;
541:       delta=-delta;
542:     }
543:     nFail=nFail+1;
544:     LUfac(n,a,b,shift,tolGrowth,norm,L,U,&flag,work+nwu,nwall-nwu); /* flag=1 failure; flag=0 successful transformation*/
545:   }
546:   if (nFail==maxFail) {
547:     SETERRQ(PETSC_COMM_SELF,1,"Maximun number of failures reached in Initial LU factorization");
548:   }
549:   /* Successful Initial transformation */
550:   totalFail = totalFail+nFail;
551:   nFail = 0;
552:   acShift = 0;
553:   initialShift = shift;
554:   shift = 0;
555:   begin = 0;
556:   lastSplit = 0;
557:   split = work+nwu;
558:   nwu += n;
559:   split[lastSplit] = begin;
560:   splitCount = 0;
561:   while (begin!=-1) {
562:     while (n-begin>2 && totalIt<maxIt) {
563:       /* Check for deflation before performing a transformation */
564:       test1 = ((PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-2])) && (PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-1]+acShift)) && (PetscAbsReal(L[n-2]*U[n])<tolDef*PetscAbsReal(acShift+U[n-1])) && (PetscAbsReal(L[n-2])*(PetscAbsReal(U[n-2])+1)<tolDef*PetscAbsReal(acShift+U[n-1])))? PETSC_TRUE: PETSC_FALSE;
565:       if (flag==2) {  /* Early 2x2 deflation */
566:         earlyDef=earlyDef+1;
567:         test2 = PETSC_TRUE;
568:       } else {
569:         if (n-begin>4) {
570:           test2 = ((PetscAbsReal(L[n-3])<tolDef*PetscAbsReal(U[n-3])) && (PetscAbsReal(L[n-3]*(U[n-4]+L[n-4]))< tolDef*PetscAbsReal(U[n-4]*(U[n-3]+L[n-3])+L[n-4]*L[n-3])))? PETSC_TRUE: PETSC_FALSE;
571:         } else { /* n-begin+1=3 */
572:           test2 = (PetscAbsReal(L[begin])<tolDef*PetscAbsReal(U[begin]))? PETSC_TRUE: PETSC_FALSE;
573:         }
574:       }
575:       while (test2 || test1) {
576:         /* 2x2 deflation */
577:         if (test2) {
578:           if (flag==2) { /* Early deflation */
579:             sum = L[n-2];
580:             det = U[n-2];
581:             disc = U[n-1];
582:             flag = 0;
583:           } else {
584:             sum = (L[n-2]+(U[n-2]+U[n-1]))/2;
585:             disc = (L[n-2]*(L[n-2]+2*(U[n-2]+U[n-1]))+(U[n-2]-U[n-1])*(U[n-2]-U[n-1]))/4;
586:             det = U[n-2]*U[n-1];
587:           }
588:           if (disc<=0) {
589: #if !defined(PETSC_USE_COMPLEX)
590:             wr[--n] = sum+acShift; wi[n] = PetscSqrtReal(-disc);
591:             wr[--n] = sum+acShift; wi[n] = -PetscSqrtReal(-disc);
592: #else
593:             wr[--n] = sum-PETSC_i*PetscSqrtReal(-disc)+acShift; wi[n] = 0.0;
594:             wr[--n] = sum+PETSC_i*PetscSqrtReal(-disc)+acShift; wi[n] = 0.0;
595: #endif
596:           } else {
597:             if (sum==0) {
598:               x1 = PetscSqrtReal(disc);
599:               x2 = -x1;
600:             } else {
601:               x1 = ((sum>=0.0)?1.0:-1.0)*(PetscAbsReal(sum)+PetscSqrtReal(disc));
602:               x2 = det/x1;
603:             }
604:             wr[--n] = x1+acShift;
605:             wr[--n] = x2+acShift;
606:           }
607:         } else { /* test1 -- 1x1 deflation */
608:           x1 = U[n-1]+acShift;
609:           wr[--n] = x1;
610:         }
611: 
612:         if (n<=begin+2) {
613:           break;
614:         } else {
615:           test1 = ((PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-2])) && (PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-1]+acShift)) && (PetscAbsReal(L[n-2]*U[n-1])<tolDef*PetscAbsReal(acShift+U[n-1])) && (PetscAbsReal(L[n-2])*(PetscAbsReal(U[n-2])+1)< tolDef*PetscAbsReal(acShift+U[n-1])))? PETSC_TRUE: PETSC_FALSE;
616:           if (n-begin>4) {
617:             test2 = ((PetscAbsReal(L[n-3])<tolDef*PetscAbsReal(U[n-3])) && (PetscAbsReal(L[n-3]*(U[n-4]+L[n-4]))< tolDef*PetscAbsReal(U[n-4]*(U[n-3]+L[n-3])+L[n-4]*L[n-3])))? PETSC_TRUE: PETSC_FALSE;
618:           } else { /* n-begin+1=3 */
619:             test2 = (PetscAbsReal(L[begin])<tolDef*PetscAbsReal(U[begin]))? PETSC_TRUE: PETSC_FALSE;
620:           }
621:         }
622:       } /* end "WHILE deflations" */
623:       /* After deflation */
624:       if (n>begin+3) {
625:         ind = begin;
626:         for (k=n-4;k>=begin+1;k--) {
627:           if ( (PetscAbsReal(L[k])<tolDef*PetscAbsReal(U[k]))&&(PetscAbsReal(L[k]*U[k+1]*(U[k+2]+L[k+2])*(U[k-1]+L[k-1]))<tolDef*PetscAbsReal((U[k-1]*(U[k]+L[k])+L[k-1]*L[k])*(U[k+1]*(U[k+2]+L[k+2])+L[k+1]*L[k+2]))) ) {
628:              ind=k;
629:              break;
630:           }
631:         }
632:         if ( ind>begin || PetscAbsReal(L[begin]) <tolDef*PetscAbsReal(U[begin]) ) {
633:           lastSplit = lastSplit+1;
634:           split[lastSplit] = begin;
635:           L[ind] = acShift; /* Use of L[ind] to save acShift */
636:           begin = ind+1;
637:           splitCount = splitCount+1;
638:         }
639:       }
640: 
641:       if (n>begin+2) {
642:         disc = (L[n-2]*(L[n-2]+2*(U[n-2]+U[n-1]))+(U[n-2]-U[n-1])*(U[n-2]-U[n-1]))/4;
643:         if ( (PetscAbsReal(L[n-2])>tolZero) && (PetscAbsReal(L[n-3])>tolZero)) { /* L's are big */
644:           shift = 0;
645:           sum = 0; /* Needed in case of failure */
646:           prod = 0;
647:           realDQDS(n-begin,L+begin,U+begin,0,tolGrowth,norm,L1+begin,U1+begin,&flag);
648:           realSteps++;
649:           if (flag) {  /* Failure */
650:             tridqdsZhuang(n-begin,L+begin,U+begin,0.0,0.0,tolGrowth,norm,tolDef,L1+begin,U1+begin,&flag);
651:             shift = 0.0;
652:             while (flag==1 && nFail<maxFail) {  /* Successive failures */
653:               shift = shift+delta;
654:               if (shift>gr-acShift || shift<gl-acShift) {
655:                 shift = meanEig-acShift;
656:                 delta = -delta;
657:               }
658:               nFail++;
659:               realDQDS(n-begin,L+begin,U+begin,0,tolGrowth,norm,L1+begin,U1+begin,&flag);
660:             }
661:           }
662:         } else { /* L's are small */
663:           if (disc<0) {  /* disc <0   Complex case; Francis shift; 3dqds */
664:             sum = U[n-2]+L[n-2]+U[n-1];
665:             prod = U[n-2]*U[n-1];
666:             tridqdsZhuang(n-begin,L+begin,U+begin,sum,prod,tolGrowth,norm,tolDef,L1+begin,U1+begin,&flag);
667:             complexSteps++;
668:             shift = 0.0; /* Restoring transformation */
669:             while (flag==1 && nFail<maxFail) { /* In case of failure */
670:               shift = shift+U[n-1];  /* first time shift=0 */
671:               realDQDS(n-begin,L+begin,U+begin,shift,tolGrowth,norm,L1+begin,U1+begin,&flag);
672:               nFail++;
673:             }
674:           } else  { /* disc >0  Real case; real Wilkinson shift; dqds */
675:             sum = (L[n-2]+U[n-2]+U[n-1])/2;
676:             if (sum==0) {
677:               x1 = PetscSqrtReal(disc);
678:               x2 = -x1;
679:             } else {
680:               x1 = ((sum>=0)?1.0:-1.0)*(PetscAbsReal(sum)+PetscSqrtReal(disc));
681:               x2 = U[n-2]*U[n-1]/x1;
682:             }
683:             /* Take the eigenvalue closest to UL(n,n) */
684:             if (PetscAbsReal(x1-U[n-1])<PetscAbsReal(x2-U[n-1])) {
685:               shift = x1;
686:             } else {
687:               shift = x2;
688:             }
689:             realDQDS(n-begin,L+begin,U+begin,shift,tolGrowth,norm,L1+begin,U1+begin,&flag);
690:             realSteps++;
691:             /* In case of failure */
692:             while (flag==1 && nFail<maxFail) {
693:               sum = 2*shift;
694:               prod = shift*shift;
695:               tridqdsZhuang(n-1-begin,L+begin,U+begin,sum,prod,tolGrowth,norm,tolDef,L1+begin,U1+begin,&flag);
696:               /* In case of successive failures */
697:               if (shift==0) {
698:                 shift = PetscMin(PetscAbsReal(L[n-2]),PetscAbsReal(L[n-3]))*delta;
699:               } else {
700:                 shift=shift+delta;
701:               }
702:               if (shift>gr-acShift || shift<gl-acShift) {
703:                 shift = meanEig-acShift;
704:                 delta = -delta;
705:               }
706:               if (flag==0) { /* We changed from real dqds to 3dqds */
707:                 shift=0;
708:               }
709:               nFail++;
710:             }
711:           }
712:         } /* end "if tolZero" */
713:         if (nFail==maxFail) {
714:           SETERRQ(PETSC_COMM_SELF,1,"Maximun number of failures reached. No convergence in DQDS");
715:         }
716:         /* Successful Transformation; flag==0 */
717:         totalIt++;
718:         acShift = shift+acShift;
719:         for (i=begin;i<n-1;i++) {
720:           L[i] = L1[i];
721:           U[i] = U1[i];
722:         }
723:         U[n-1] = U1[n-1];
724:         totalFail = totalFail+nFail;
725:         nFail = 0;
726:       }  /* end "if n>begin+1" */
727:     }  /* end WHILE 1 */
728:     if (totalIt>=maxIt) {
729:       SETERRQ(PETSC_COMM_SELF,1,"Maximun number of iterations reached. No convergence in DQDS");
730:     }
731:     /* END: n=2 or n=1  % n=begin+1 or n=begin */
732:     if (n==begin+2) {
733:       sum = (L[n-2]+U[n-2]+U[n-1])/2;
734:       disc = (L[n-2]*(L[n-2]+2*(U[n-2]+U[n-1]))+(U[n-2]-U[n-1])*(U[n-2]-U[n-1]))/4;
735:         if (disc<=0)  {  /* Complex case */
736:         /* Deflation 2 */
737: #if !defined(PETSC_USE_COMPLEX)
738:         wr[--n] = sum+acShift; wi[n] = PetscSqrtReal(-disc);
739:         wr[--n] = sum+acShift; wi[n] = -PetscSqrtReal(-disc);
740: #else
741:         wr[--n] = sum-PETSC_i*PetscSqrtReal(-disc)+acShift; wi[n] = 0.0;
742:         wr[--n] = sum+PETSC_i*PetscSqrtReal(-disc)+acShift; wi[n] = 0.0;
743: #endif
744:       }else  { /* Real case */
745:         if (sum==0) {
746:           x1 = PetscSqrtReal(disc);
747:           x2 = -x1;
748:         } else {
749:           x1 = ((sum>=0)?1.0:-1.0)*(PetscAbsReal(sum)+PetscSqrtReal(disc));
750:           x2 = U[n-2]*U[n-1]/x1;
751:         }
752:         /* Deflation 2 */
753:         wr[--n] = x2+acShift;
754:         wr[--n] = x1+acShift;
755:       }
756:     }else { /* n=1   n=begin */
757:       /* deflation 1 */
758:       x1 = U[n-1]+acShift;
759:       wr[--n] = x1;
760:     }
761:     switch (n) {
762:       case 0:
763:         begin = -1;
764:         break;
765:       case 1:
766:         acShift = L[begin-1];
767:         begin = split[lastSplit];
768:         lastSplit--;
769:         break;
770:       default : /* n>=2 */
771:         acShift = L[begin-1];
772:         begin = split[lastSplit];
773:         lastSplit--;
774:     }
775:   }/* While begin~=-1 */
776:   for (i=0;i<dim;i++) {
777:     wr[i] = wr[i]+initialShift;
778:   }
779:   if (work!=w) {
780:     PetscFree(work);
781:   }
782:   return(0);
783: }
786: PetscErrorCode DSSolve_GHIEP_DQDS_II(DS ds,PetscScalar *wr,PetscScalar *wi)
787: {
789:   PetscInt       i,off,ld,nwall,nwu;
790:   PetscScalar    *A,*B,*Q;
791:   PetscReal      *d,*e,*s,*a,*b,*c;

794:   ld = ds->ld;
795:   off = ds->l + ds->l*ld;
796:   A = ds->mat[DS_MAT_A];
797:   B = ds->mat[DS_MAT_B];
798:   Q = ds->mat[DS_MAT_Q];
799:   d = ds->rmat[DS_MAT_T];
800:   e = ds->rmat[DS_MAT_T] + ld;
801:   c = ds->rmat[DS_MAT_T] + 2*ld;
802:   s = ds->rmat[DS_MAT_D];
803:   /* Quick return if possible */
804:   if (ds->n-ds->l == 1) {
805:     *(Q+off) = 1;
806:     if (ds->compact) {
807:       wr[ds->l] = d[ds->l]/s[ds->l];
808:       wi[ds->l] = 0.0;
809:     } else {
810:       d[ds->l] = PetscRealPart(A[off]);
811:       s[ds->l] = PetscRealPart(B[off]);
812:       wr[ds->l] = d[ds->l]/s[ds->l];
813:       wi[ds->l] = 0.0;
814:     }
815:     return(0);
816:   }
817:   nwall = 12*ld+4;
818:   DSAllocateWork_Private(ds,0,nwall,0);
819:   /* Reduce to pseudotriadiagonal form */
820:   DSIntermediate_GHIEP( ds);
821: 
822:   /* Compute Eigenvalues (DQDS)*/
823:   /* Form pseudosymmetric tridiagonal */
824:   a = ds->rwork;
825:   b = a+ld;
826:   c = b+ld;
827:   nwu = 3*ld;
828:   if (ds->compact) {
829:     for (i=ds->l;i<ds->n-1;i++) {
830:       a[i] = d[i]*s[i];
831:       b[i] = e[i]*s[i+1];
832:       c[i] = e[i]*s[i];
833:     }
834:     a[ds->n-1] = d[ds->n-1]*s[ds->n-1];
835:   } else {
836:     for (i=ds->l;i<ds->n-1;i++) {
837:       a[i] = PetscRealPart(A[i+i*ld]*B[i+i*ld]);
838:       b[i] = PetscRealPart(A[i+1+i*ld]*s[i+1]);
839:       c[i] = PetscRealPart(A[i+(i+1)*ld]*s[i]);
840:     }
841:     a[ds->n-1] = PetscRealPart(A[ds->n-1+(ds->n-1)*ld]*B[ds->n-1+(ds->n-1)*ld]);
842:   }
843:   DSGHIEP_Eigen3DQDS(ds->n-ds->l,a+ds->l,b+ds->l,c+ds->l,wr+ds->l,wi+ds->l,ds->rwork+nwu,nwall-nwu);

845:   /* Compute Eigenvectors with Inverse Iteration */
846:   DSGHIEPPseudoOrthogInverseIteration(ds,wr,wi);
847: 
848:   /* Recover eigenvalues from diagonal */
849:   DSGHIEPComplexEigs(ds, 0, ds->l, wr, wi);
850:   return(0);
851: }