Actual source code: dsghiep_hz.c

  1: /*
  2:    HZ iteration for generalized symmetric-indefinite eigenproblem.
  3:    Based on Matlab code from David Watkins.

  5:    References:

  7:        [1] D.S. Watkins, The Matrix Eigenvalue Problem: GR and Krylov Subspace
  8:            Methods, SIAM, 2007.

 10:        [2] M.A. Brebner, J. Grad, "Eigenvalues of Ax = lambda Bx for real
 11:            symmetric matrices A and B computed by reduction to pseudosymmetric
 12:            form and the HR process", Linear Alg. Appl. 43, pp. 99-118, 1982.

 14:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 15:    SLEPc - Scalable Library for Eigenvalue Problem Computations
 16:    Copyright (c) 2002-2012, Universitat Politecnica de Valencia, Spain

 18:    This file is part of SLEPc.
 19:       
 20:    SLEPc is free software: you can redistribute it and/or modify it under  the
 21:    terms of version 3 of the GNU Lesser General Public License as published by
 22:    the Free Software Foundation.

 24:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY 
 25:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS 
 26:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for 
 27:    more details.

 29:    You  should have received a copy of the GNU Lesser General  Public  License
 30:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 31:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 32: */
 33: #include <slepc-private/dsimpl.h>      /*I "slepcds.h" I*/
 34: #include <slepcblaslapack.h>

 36: extern PetscErrorCode DSSwitchFormat_GHIEP(DS,PetscBool);
 37: extern PetscErrorCode DSGHIEPComplexEigs(DS,PetscInt,PetscInt,PetscScalar*,PetscScalar*);
 38: extern PetscErrorCode HRApply(PetscInt,PetscScalar*,PetscInt,PetscScalar*,PetscInt,PetscReal,PetscReal);
 39: extern PetscErrorCode DSIntermediate_GHIEP(DS);
 40: extern PetscErrorCode DSGHIEPRealBlocks(DS);

 44: /*
 45:    Sets up a 2-by-2 matrix to eliminate y in the vector [x y]'.
 46:    Transformation is rotator if sygn = 1 and hyperbolic if sygn = -1.
 47: */
 48: static PetscErrorCode UnifiedRotation(PetscReal x,PetscReal y,PetscReal sygn,PetscReal *rot,PetscReal *rcond,PetscBool *swap)
 49: {
 50:   PetscReal nrm,c,s;

 53:   *swap = PETSC_FALSE;
 54:   if (y == 0) {
 55:     rot[0] = 1.0; rot[1] = 0.0; rot[2] = 0.0; rot[3] = 1.0;
 56:     *rcond = 1.0;
 57:   } else {
 58:     nrm = PetscMax(PetscAbs(x),PetscAbs(y));
 59:     c = x/nrm; s = y/nrm;
 60:     if (sygn == 1.0) {  /* set up a rotator */
 61:       nrm = PetscSqrtReal(c*c+s*s);
 62:       c = c/nrm; s = s/nrm;
 63:       /* rot = [c s; -s c]; */
 64:       rot[0] = c; rot[1] = -s; rot[2] = s; rot[3] = c;
 65:       *rcond = 1.0;
 66:     } else if (sygn == -1) {  /* set up a hyperbolic transformation */
 67:       nrm = c*c-s*s;
 68:       if (nrm > 0) nrm = PetscSqrtReal(nrm);
 69:       else if (nrm < 0) {
 70:         nrm = PetscSqrtReal(-nrm);
 71:         *swap = PETSC_TRUE;
 72:       } else {  /* breakdown */
 73:         SETERRQ(PETSC_COMM_SELF,1,"Breakdown in construction of hyperbolic transformation");
 74:         rot[0] = 1.0; rot[1] = 0.0; rot[2] = 0.0; rot[3] = 1.0;
 75:         *rcond = 0.0;
 76:         return(0);
 77:       }
 78:       c = c/nrm; s = s/nrm;
 79:       /* rot = [c -s; -s c]; */
 80:       rot[0] = c; rot[1] = -s; rot[2] = -s; rot[3] = c;
 81:       *rcond = PetscAbs(PetscAbs(s)-PetscAbs(c))/(PetscAbs(s)+PetscAbs(c));
 82:     } else SETERRQ(PETSC_COMM_SELF,1,"Value of sygn sent to transetup must be 1 or -1");
 83:   }
 84:   return(0);
 85: }

 89: static PetscErrorCode HZStep(PetscBLASInt ntop,PetscBLASInt nn,PetscReal tr,PetscReal dt,PetscReal *aa,PetscReal *bb,PetscReal *dd,PetscScalar *uu,PetscInt n,PetscInt ld,PetscBool *flag)
 90: {
 92:   PetscBLASInt   one=1;
 93:   PetscInt       k,jj;
 94:   PetscBLASInt   n_;
 95:   PetscReal      bulge10,bulge20,bulge30,bulge31,bulge41,bulge42;
 96:   PetscReal      sygn,rcond=1.0,worstcond,rot[4],buf[2];
 97:   PetscScalar    rtmp;
 98:   PetscBool      swap;

101:   worstcond = 1.0;
102:   n_ = PetscBLASIntCast(n);

104:   /* Build initial bulge that sets step in motion */
105:   bulge10 = dd[ntop+1]*(aa[ntop]*(aa[ntop] - dd[ntop]*tr) + dt*dd[ntop]*dd[ntop]) + dd[ntop]*bb[ntop]*bb[ntop];
106:   bulge20 = bb[ntop]*(dd[ntop+1]*aa[ntop] + dd[ntop]*aa[ntop+1] - dd[ntop]*dd[ntop+1]*tr);
107:   bulge30 = bb[ntop]*bb[ntop+1]*dd[ntop];
108:   bulge31 = 0.0;
109:   bulge41 = 0.0;
110:   bulge42 = 0.0;

112:   /* Chase the bulge */
113:   for (jj=ntop;jj<nn-1;jj++) {
114: 
115:     /* Check for trivial bulge */
116:     if (jj>ntop && PetscMax(PetscMax(PetscAbs(bulge10),PetscAbs(bulge20)),PetscAbs(bulge30))<PETSC_MACHINE_EPSILON*(PetscAbs(aa[jj]) + PetscAbs(aa[jj+1]))) {
117:       bb[jj-1] = 0.0;  /* deflate and move on */
118: 
119:     } else { /* carry out the step */

121:       /* Annihilate tip entry bulge30 */
122:       if (bulge30 != 0.0) {
123: 
124:         /* Make an interchange if necessary to ensure that the 
125:            first transformation is othogonal, not hyperbolic.  */
126:         if (dd[jj+1] != dd[jj+2]) { /* make an interchange */
127:           if (dd[jj] != dd[jj+1]) {  /* interchange 1st and 2nd */
128:             buf[0] = bulge20; bulge20 = bulge10; bulge10 = buf[0];
129:             buf[0] = aa[jj]; aa[jj] = aa[jj+1]; aa[jj+1] = buf[0];
130:             buf[0] = bb[jj+1]; bb[jj+1] = bulge31; bulge31 = buf[0];
131:             buf[0] = dd[jj]; dd[jj] = dd[jj+1]; dd[jj+1] = buf[0];
132:             for (k=0;k<n;k++) {
133:               rtmp = uu[k+jj*ld]; uu[k+jj*ld] = uu[k+(jj+1)*ld]; uu[k+(jj+1)*ld] = rtmp;
134:             }
135:           } else {  /* interchange 1st and 3rd */
136:             buf[0] = bulge30; bulge30 = bulge10; bulge10 = buf[0];
137:             buf[0] = aa[jj]; aa[jj] = aa[jj+2]; aa[jj+2] = buf[0];
138:             buf[0] = bb[jj]; bb[jj] = bb[jj+1]; bb[jj+1] = buf[0];
139:             buf[0] = dd[jj]; dd[jj] = dd[jj+2]; dd[jj+2] = buf[0];
140:             if (jj + 2 < nn-1) {
141:               bulge41 = bb[jj+2];
142:               bb[jj+2] = 0;
143:             }
144:             for (k=0;k<n;k++) {
145:               rtmp = uu[k+jj*ld]; uu[k+jj*ld] = uu[k+(jj+2)*ld]; uu[k+(jj+2)*ld] = rtmp;
146:             }
147:           }
148:         }
149: 
150:         /* Set up transforming matrix rot. */
151:         UnifiedRotation(bulge20,bulge30,1,rot,&rcond,&swap);

153:         /* Apply transforming matrix rot to T. */
154:         bulge20 = rot[0]*bulge20 + rot[2]*bulge30;
155:         buf[0] = rot[0]*bb[jj] + rot[2]*bulge31;
156:         buf[1] = rot[1]*bb[jj] + rot[3]*bulge31;
157:         bb[jj] = buf[0];
158:         bulge31 = buf[1];
159:         buf[0] = rot[0]*rot[0]*aa[jj+1] + 2.0*rot[0]*rot[2]*bb[jj+1] + rot[2]*rot[2]*aa[jj+2];
160:         buf[1] = rot[1]*rot[1]*aa[jj+1] + 2.0*rot[3]*rot[1]*bb[jj+1] + rot[3]*rot[3]*aa[jj+2];
161:         bb[jj+1] = rot[1]*rot[0]*aa[jj+1] + rot[3]*rot[2]*aa[jj+2] + (rot[3]*rot[0] + rot[1]*rot[2])*bb[jj+1];
162:         aa[jj+1] = buf[0];
163:         aa[jj+2] = buf[1];
164:         if (jj + 2 < nn-1) {
165:           bulge42 = bb[jj+2]*rot[2];
166:           bb[jj+2] = bb[jj+2]*rot[3];
167:         }

169:         /* Accumulate transforming matrix */
170:         BLASrot_(&n_,uu+(jj+1)*ld,&one,uu+(jj+2)*ld,&one,&rot[0],&rot[2]);
171:       }

173:       /* Annihilate inner entry bulge20 */
174:       if (bulge20 != 0.0) {

176:         /* Begin by setting up transforming matrix rot */
177:         sygn = dd[jj]*dd[jj+1];
178:         UnifiedRotation(bulge10,bulge20,sygn,rot,&rcond,&swap);
179:         if (rcond<PETSC_MACHINE_EPSILON) {
180:           SETERRQ1(PETSC_COMM_SELF,0,"Transforming matrix is numerically singular rcond=%g",rcond);
181:           *flag = PETSC_TRUE;
182:           return(0);
183:         }
184:         if (rcond < worstcond) worstcond = rcond;

186:         /* Apply transforming matrix rot to T */
187:         if (jj > ntop) bb[jj-1] = rot[0]*bulge10 + rot[2]*bulge20;
188:         buf[0] = rot[0]*rot[0]*aa[jj] + 2*rot[0]*rot[2]*bb[jj] + rot[2]*rot[2]*aa[jj+1];
189:         buf[1] = rot[1]*rot[1]*aa[jj] + 2*rot[3]*rot[1]*bb[jj] + rot[3]*rot[3]*aa[jj+1];
190:         bb[jj] = rot[1]*rot[0]*aa[jj] + rot[3]*rot[2]*aa[jj+1] + (rot[3]*rot[0] + rot[1]*rot[2])*bb[jj];
191:         aa[jj] = buf[0];
192:         aa[jj+1] = buf[1];
193:         if (jj + 1 < nn-1) {
194:           /* buf = [ bulge31 bb(jj+1) ] * rot' */
195:           buf[0] = rot[0]*bulge31 + rot[2]*bb[jj+1];
196:           buf[1] = rot[1]*bulge31 + rot[3]*bb[jj+1];
197:           bulge31 = buf[0];
198:           bb[jj+1] = buf[1];
199:         }
200:         if (jj + 2 < nn-1) {
201:           /* buf = [bulge41 bulge42] * rot' */
202:           buf[0] = rot[0]*bulge41 + rot[2]*bulge42;
203:           buf[1] = rot[1]*bulge41 + rot[3]*bulge42;
204:           bulge41 = buf[0];
205:           bulge42 = buf[1];
206:         }

208:         /* Apply transforming matrix rot to D */
209:         if (swap == 1) {
210:           buf[0] = dd[jj]; dd[jj] = dd[jj+1]; dd[jj+1] = buf[0];
211:         }

213:         /* Accumulate transforming matrix, uu(jj:jj+1,:) = rot*uu(jj:jj+1,:) */
214:         if (sygn==1) {
215:           BLASrot_(&n_,uu+jj*ld,&one,uu+(jj+1)*ld,&one,&rot[0],&rot[2]);
216:         } else {
217:           HRApply(n,uu+jj*ld,1,uu+(jj+1)*ld,1,rot[0],rot[1]);
218:         }
219:       }
220:     }

222:     /* Adjust bulge for next step */
223:     bulge10 = bb[jj];
224:     bulge20 = bulge31;
225:     bulge30 = bulge41;
226:     bulge31 = bulge42;
227:     bulge41 = 0.0;
228:     bulge42 = 0.0;
229:   }
230:   return(0);
231: }

235: static PetscErrorCode HZIteration(PetscBLASInt nn,PetscBLASInt cgd,PetscReal *aa,PetscReal *bb,PetscReal *dd,PetscScalar *uu,PetscBLASInt ld)
236: {
238:   PetscBLASInt   j2,one=1;
239:   PetscInt       its,nits,nstop,jj,ntop,nbot,ntry;
240:   PetscReal      htr,det,dis,dif,tn,kt,c,s,tr,dt;
241:   PetscBool      flag=PETSC_FALSE;

244:   its = 0;
245:   nbot = nn-1;
246:   nits = 0;
247:   nstop = 40*(nn - cgd);

249:   while (nbot >= cgd && nits < nstop) {

251:     /* Check for zeros on the subdiagonal */
252:     jj = nbot - 1;
253:     while (jj>=cgd && PetscAbs(bb[jj])>PETSC_MACHINE_EPSILON*(PetscAbs(aa[jj])+PetscAbs(aa[jj+1]))) jj = jj-1;
254:     if (jj>=cgd) bb[jj]=0;
255:     ntop = jj + 1;  /* starting point for step */
256:     if (ntop == nbot) {  /* isolate single eigenvalue */
257:       nbot = ntop - 1;
258:       its = 0;
259:     } else if (ntop+1 == nbot) {  /* isolate pair of eigenvalues */
260:       htr = 0.5*(aa[ntop]*dd[ntop] + aa[nbot]*dd[nbot]);
261:       det = dd[ntop]*dd[nbot]*(aa[ntop]*aa[nbot]-bb[ntop]*bb[ntop]);
262:       dis = htr*htr - det;
263:       if (dis > 0) {  /* distinct real eigenvalues */
264:         if (dd[ntop] == dd[nbot]) {  /* separate the eigenvalues by a Jacobi rotator */
265:           dif = aa[ntop]-aa[nbot];
266:           if (2.0*PetscAbs(bb[ntop])<=dif) {
267:             tn = 2*bb[ntop]/dif;
268:             tn = tn/(1.0 + PetscSqrtScalar(1.0+tn*tn));
269:           } else {
270:             kt = dif/(2.0*bb[ntop]);
271:             tn = PetscSign(kt)/(PetscAbs(kt)+PetscSqrtScalar(1.0+kt*kt));
272:           }
273:           c = 1.0/PetscSqrtScalar(1.0 + tn*tn);
274:           s = c*tn;
275:           aa[ntop] = aa[ntop] + tn*bb[ntop];
276:           aa[nbot] = aa[nbot] - tn*bb[ntop];
277:           bb[ntop] = 0;
278:           j2 = nn-cgd;
279:           BLASrot_(&j2,uu+ntop*ld+cgd,&one,uu+nbot*ld+cgd,&one,&c,&s);
280:         } else {
281:           dis = PetscSqrtScalar(dis);
282:           if (htr < 0) dis = -dis;
283:         }
284:       }
285:       nbot = ntop - 1;
286:     } else {  /* Do an HZ iteration */
287:       its = its + 1;
288:       nits = nits + 1;
289:       tr = aa[nbot-1]*dd[nbot-1] + aa[nbot]*dd[nbot];
290:       dt = dd[nbot-1]*dd[nbot]*(aa[nbot-1]*aa[nbot]-bb[nbot-1]*bb[nbot-1]);
291:       for (ntry=1;ntry<=6;ntry++) {
292:         HZStep(ntop,nbot+1,tr,dt,aa,bb,dd,uu,nn,ld,&flag);
293:         if (!flag) break;
294:         else if (ntry == 6)
295:           SETERRQ(PETSC_COMM_SELF,1,"Unable to complete hz step on six tries");
296:         else {
297:           tr = 0.9*tr; dt = 0.81*dt;
298:         }
299:       }
300:     }
301:   }
302:   return(0);
303: }

307: PetscErrorCode DSSolve_GHIEP_HZ(DS ds,PetscScalar *wr,PetscScalar *wi)
308: {
310:   PetscInt       off;
311:   PetscBLASInt   n1,ld;
312:   PetscScalar    *A,*B,*Q;
313:   PetscReal      *d,*e,*s;
314: #if defined(PETSC_USE_COMPLEX)
315:   PetscInt       i;
316: #endif

319: #if !defined(PETSC_USE_COMPLEX)
321: #endif
322:   n1  = ds->n - ds->l;
323:   ld = PetscBLASIntCast(ds->ld);
324:   off = ds->l + ds->l*ld;
325:   A  = ds->mat[DS_MAT_A];
326:   B  = ds->mat[DS_MAT_B];
327:   Q = ds->mat[DS_MAT_Q];
328:   d = ds->rmat[DS_MAT_T];
329:   e = ds->rmat[DS_MAT_T] + ld;
330:   s  = ds->rmat[DS_MAT_D];
331:   /* Quick return */
332:   if (n1 == 1) {
333:     *(Q+off) = 1;
334:     if (ds->compact) {
335:       wr[ds->l] = d[ds->l]/s[ds->l]; wi[ds->l] = 0.0;
336:     }else{
337:       d[ds->l] = PetscRealPart(A[off]); s[ds->l] = PetscRealPart(B[off]);
338:       wr[ds->l] = d[ds->l]/s[ds->l]; wi[ds->l] = 0.0;
339:     }
340:     return(0);
341:   }
342:   /* Reduce to pseudotriadiagonal form */
343:   DSIntermediate_GHIEP(ds);
344:   HZIteration(ds->n,ds->l,d,e,s,Q,ld);
345:   if (!ds->compact) {
346:     DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
347:   }
348:   /* Undo from diagonal the blocks whith real eigenvalues*/
349:    DSGHIEPRealBlocks(ds);

351:   /* Recover eigenvalues from diagonal */
352:   DSGHIEPComplexEigs(ds, 0, ds->n, wr, wi);
353: #if defined(PETSC_USE_COMPLEX)
354:   if (wi) {
355:     for (i=ds->l;i<ds->n;i++) wi[i] = 0.0;
356:   }
357: #endif
358:   return(0);
359: }