Actual source code: hz.c

slepc-3.21.1 2024-04-26
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    HZ iteration for generalized symmetric-indefinite eigenproblem.
 12:    Based on Matlab code from David Watkins.

 14:    References:

 16:        [1] D.S. Watkins, The Matrix Eigenvalue Problem: GR and Krylov Subspace
 17:            Methods, SIAM, 2007.

 19:        [2] M.A. Brebner, J. Grad, "Eigenvalues of Ax = lambda Bx for real
 20:            symmetric matrices A and B computed by reduction to pseudosymmetric
 21:            form and the HR process", Linear Alg. Appl. 43:99-118, 1982.
 22: */

 24: #include <slepc/private/dsimpl.h>
 25: #include <slepcblaslapack.h>

 27: /*
 28:    Sets up a 2-by-2 matrix to eliminate y in the vector [x y]'.
 29:    Transformation is rotator if sygn = 1 and hyperbolic if sygn = -1.
 30: */
 31: static PetscErrorCode UnifiedRotation(PetscReal x,PetscReal y,PetscReal sygn,PetscReal *rot,PetscReal *rcond,PetscBool *swap)
 32: {
 33:   PetscReal nrm,c,s;

 35:   PetscFunctionBegin;
 36:   *swap = PETSC_FALSE;
 37:   if (y == 0) {
 38:     rot[0] = 1.0; rot[1] = 0.0; rot[2] = 0.0; rot[3] = 1.0;
 39:     *rcond = 1.0;
 40:   } else {
 41:     nrm = PetscMax(PetscAbs(x),PetscAbs(y));
 42:     c = x/nrm; s = y/nrm;
 43:     PetscCheck(sygn==1.0 || sygn==-1,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Value of sygn sent to transetup must be 1 or -1");
 44:     if (sygn == 1.0) {  /* set up a rotator */
 45:       nrm = SlepcAbs(c,s);
 46:       c = c/nrm; s = s/nrm;
 47:       /* rot = [c s; -s c]; */
 48:       rot[0] = c; rot[1] = -s; rot[2] = s; rot[3] = c;
 49:       *rcond = 1.0;
 50:     } else {  /* sygn == -1, set up a hyperbolic transformation */
 51:       nrm = c*c-s*s;
 52:       if (nrm > 0) nrm = PetscSqrtReal(nrm);
 53:       else {
 54:         PetscCheck(nrm<0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Breakdown in construction of hyperbolic transformation");
 55:         nrm = PetscSqrtReal(-nrm);
 56:         *swap = PETSC_TRUE;
 57:       }
 58:       c = c/nrm; s = s/nrm;
 59:       /* rot = [c -s; -s c]; */
 60:       rot[0] = c; rot[1] = -s; rot[2] = -s; rot[3] = c;
 61:       *rcond = PetscAbs(PetscAbs(s)-PetscAbs(c))/(PetscAbs(s)+PetscAbs(c));
 62:     }
 63:   }
 64:   PetscFunctionReturn(PETSC_SUCCESS);
 65: }

 67: static PetscErrorCode HZStep(PetscBLASInt ntop,PetscBLASInt nn,PetscReal tr,PetscReal dt,PetscReal *aa,PetscReal *bb,PetscReal *dd,PetscScalar *uu,PetscInt n,PetscInt ld,PetscBool *flag)
 68: {
 69:   PetscBLASInt   one=1;
 70:   PetscInt       k,jj,ii;
 71:   PetscBLASInt   n_;
 72:   PetscReal      bulge10,bulge20,bulge30,bulge31,bulge41,bulge42;
 73:   PetscReal      sygn,rcond=1.0,worstcond,rot[4],buf[2],t;
 74:   PetscScalar    rtmp;
 75:   PetscBool      swap;

 77:   PetscFunctionBegin;
 78:   *flag = PETSC_FALSE;
 79:   worstcond = 1.0;
 80:   PetscCall(PetscBLASIntCast(n,&n_));

 82:   /* Build initial bulge that sets step in motion */
 83:   bulge10 = dd[ntop+1]*(aa[ntop]*(aa[ntop] - dd[ntop]*tr) + dt*dd[ntop]*dd[ntop]) + dd[ntop]*bb[ntop]*bb[ntop];
 84:   bulge20 = bb[ntop]*(dd[ntop+1]*aa[ntop] + dd[ntop]*aa[ntop+1] - dd[ntop]*dd[ntop+1]*tr);
 85:   bulge30 = bb[ntop]*bb[ntop+1]*dd[ntop];
 86:   bulge31 = 0.0;
 87:   bulge41 = 0.0;
 88:   bulge42 = 0.0;

 90:   /* Chase the bulge */
 91:   for (jj=ntop;jj<nn-1;jj++) {

 93:     /* Check for trivial bulge */
 94:     if (jj>ntop && PetscMax(PetscMax(PetscAbs(bulge10),PetscAbs(bulge20)),PetscAbs(bulge30))<PETSC_MACHINE_EPSILON*(PetscAbs(aa[jj]) + PetscAbs(aa[jj+1]))) {
 95:       bb[jj-1] = 0.0;  /* deflate and move on */

 97:     } else { /* carry out the step */

 99:       /* Annihilate tip entry bulge30 */
100:       if (bulge30 != 0.0) {

102:         /* Make an interchange if necessary to ensure that the
103:            first transformation is othogonal, not hyperbolic.  */
104:         if (dd[jj+1] != dd[jj+2]) { /* make an interchange */
105:           if (dd[jj] != dd[jj+1]) {  /* interchange 1st and 2nd */
106:             buf[0] = bulge20; bulge20 = bulge10; bulge10 = buf[0];
107:             buf[0] = aa[jj]; aa[jj] = aa[jj+1]; aa[jj+1] = buf[0];
108:             buf[0] = bb[jj+1]; bb[jj+1] = bulge31; bulge31 = buf[0];
109:             buf[0] = dd[jj]; dd[jj] = dd[jj+1]; dd[jj+1] = buf[0];
110:             for (k=0;k<n;k++) {
111:               rtmp = uu[k+jj*ld]; uu[k+jj*ld] = uu[k+(jj+1)*ld]; uu[k+(jj+1)*ld] = rtmp;
112:             }
113:           } else {  /* interchange 1st and 3rd */
114:             buf[0] = bulge30; bulge30 = bulge10; bulge10 = buf[0];
115:             buf[0] = aa[jj]; aa[jj] = aa[jj+2]; aa[jj+2] = buf[0];
116:             buf[0] = bb[jj]; bb[jj] = bb[jj+1]; bb[jj+1] = buf[0];
117:             buf[0] = dd[jj]; dd[jj] = dd[jj+2]; dd[jj+2] = buf[0];
118:             if (jj + 2 < nn-1) {
119:               bulge41 = bb[jj+2];
120:               bb[jj+2] = 0;
121:             }
122:             for (k=0;k<n;k++) {
123:               rtmp = uu[k+jj*ld]; uu[k+jj*ld] = uu[k+(jj+2)*ld]; uu[k+(jj+2)*ld] = rtmp;
124:             }
125:           }
126:         }

128:         /* Set up transforming matrix rot. */
129:         PetscCall(UnifiedRotation(bulge20,bulge30,1,rot,&rcond,&swap));

131:         /* Apply transforming matrix rot to T. */
132:         bulge20 = rot[0]*bulge20 + rot[2]*bulge30;
133:         buf[0] = rot[0]*bb[jj] + rot[2]*bulge31;
134:         buf[1] = rot[1]*bb[jj] + rot[3]*bulge31;
135:         bb[jj] = buf[0];
136:         bulge31 = buf[1];
137:         buf[0] = rot[0]*rot[0]*aa[jj+1] + 2.0*rot[0]*rot[2]*bb[jj+1] + rot[2]*rot[2]*aa[jj+2];
138:         buf[1] = rot[1]*rot[1]*aa[jj+1] + 2.0*rot[3]*rot[1]*bb[jj+1] + rot[3]*rot[3]*aa[jj+2];
139:         bb[jj+1] = rot[1]*rot[0]*aa[jj+1] + rot[3]*rot[2]*aa[jj+2] + (rot[3]*rot[0] + rot[1]*rot[2])*bb[jj+1];
140:         aa[jj+1] = buf[0];
141:         aa[jj+2] = buf[1];
142:         if (jj + 2 < nn-1) {
143:           bulge42 = bb[jj+2]*rot[2];
144:           bb[jj+2] = bb[jj+2]*rot[3];
145:         }

147:         /* Accumulate transforming matrix */
148:         PetscCallBLAS("BLASrot",BLASMIXEDrot_(&n_,uu+(jj+1)*ld,&one,uu+(jj+2)*ld,&one,&rot[0],&rot[2]));
149:       }

151:       /* Annihilate inner entry bulge20 */
152:       if (bulge20 != 0.0) {

154:         /* Begin by setting up transforming matrix rot */
155:         sygn = dd[jj]*dd[jj+1];
156:         PetscCall(UnifiedRotation(bulge10,bulge20,sygn,rot,&rcond,&swap));
157:         if (rcond<PETSC_MACHINE_EPSILON) {
158:           *flag = PETSC_TRUE;
159:           PetscFunctionReturn(PETSC_SUCCESS);
160:         }
161:         if (rcond < worstcond) worstcond = rcond;

163:         /* Apply transforming matrix rot to T */
164:         if (jj > ntop) bb[jj-1] = rot[0]*bulge10 + rot[2]*bulge20;
165:         buf[0] = rot[0]*rot[0]*aa[jj] + 2*rot[0]*rot[2]*bb[jj] + rot[2]*rot[2]*aa[jj+1];
166:         buf[1] = rot[1]*rot[1]*aa[jj] + 2*rot[3]*rot[1]*bb[jj] + rot[3]*rot[3]*aa[jj+1];
167:         bb[jj] = rot[1]*rot[0]*aa[jj] + rot[3]*rot[2]*aa[jj+1] + (rot[3]*rot[0] + rot[1]*rot[2])*bb[jj];
168:         aa[jj] = buf[0];
169:         aa[jj+1] = buf[1];
170:         if (jj + 1 < nn-1) {
171:           /* buf = [ bulge31 bb(jj+1) ] * rot' */
172:           buf[0] = rot[0]*bulge31 + rot[2]*bb[jj+1];
173:           buf[1] = rot[1]*bulge31 + rot[3]*bb[jj+1];
174:           bulge31 = buf[0];
175:           bb[jj+1] = buf[1];
176:         }
177:         if (jj + 2 < nn-1) {
178:           /* buf = [bulge41 bulge42] * rot' */
179:           buf[0] = rot[0]*bulge41 + rot[2]*bulge42;
180:           buf[1] = rot[1]*bulge41 + rot[3]*bulge42;
181:           bulge41 = buf[0];
182:           bulge42 = buf[1];
183:         }

185:         /* Apply transforming matrix rot to D */
186:         if (swap == 1) {
187:           buf[0] = dd[jj]; dd[jj] = dd[jj+1]; dd[jj+1] = buf[0];
188:         }

190:         /* Accumulate transforming matrix, uu(jj:jj+1,:) = rot*uu(jj:jj+1,:) */
191:         if (sygn==1) {
192:           PetscCallBLAS("BLASrot",BLASMIXEDrot_(&n_,uu+jj*ld,&one,uu+(jj+1)*ld,&one,&rot[0],&rot[2]));
193:         } else {
194:           if (PetscAbsReal(rot[0])>PetscAbsReal(rot[1])) { /* Type I */
195:             t = rot[1]/rot[0];
196:             for (ii=0;ii<n;ii++) {
197:               uu[jj*ld+ii] = rot[0]*uu[jj*ld+ii] + rot[1]*uu[(jj+1)*ld+ii];
198:               uu[(jj+1)*ld+ii] = t*uu[jj*ld+ii] + uu[(jj+1)*ld+ii]/rot[0];
199:             }
200:           } else { /* Type II */
201:             t = rot[0]/rot[1];
202:             for (ii=0;ii<n;ii++) {
203:               rtmp = uu[jj*ld+ii];
204:               uu[jj*ld+ii] = rot[0]*uu[jj*ld+ii] + rot[1]*uu[(jj+1)*ld+ii];
205:               uu[(jj+1)*ld+ii] = t*uu[jj*ld+ii] + rtmp/rot[1];
206:             }
207:           }
208:         }
209:       }
210:     }

212:     /* Adjust bulge for next step */
213:     bulge10 = bb[jj];
214:     bulge20 = bulge31;
215:     bulge30 = bulge41;
216:     bulge31 = bulge42;
217:     bulge41 = 0.0;
218:     bulge42 = 0.0;
219:   }
220:   PetscFunctionReturn(PETSC_SUCCESS);
221: }

223: static PetscErrorCode HZIteration(PetscBLASInt nn,PetscBLASInt cgd,PetscReal *aa,PetscReal *bb,PetscReal *dd,PetscScalar *uu,PetscBLASInt ld)
224: {
225:   PetscBLASInt   j2,one=1;
226:   PetscInt       its,nits,nstop,jj,ntop,nbot,ntry;
227:   PetscReal      htr,det,dis,dif,tn,kt,c,s,tr,dt;
228:   PetscBool      flag=PETSC_FALSE;

230:   PetscFunctionBegin;
231:   its = 0;
232:   nbot = nn-1;
233:   nits = 0;
234:   nstop = 40*(nn - cgd);

236:   while (nbot >= cgd && nits < nstop) {

238:     /* Check for zeros on the subdiagonal */
239:     jj = nbot - 1;
240:     while (jj>=cgd && PetscAbs(bb[jj])>PETSC_MACHINE_EPSILON*(PetscAbs(aa[jj])+PetscAbs(aa[jj+1]))) jj = jj-1;
241:     if (jj>=cgd) bb[jj]=0;
242:     ntop = jj + 1;  /* starting point for step */
243:     if (ntop == nbot) {  /* isolate single eigenvalue */
244:       nbot = ntop - 1;
245:       its = 0;
246:     } else if (ntop+1 == nbot) {  /* isolate pair of eigenvalues */
247:       htr = 0.5*(aa[ntop]*dd[ntop] + aa[nbot]*dd[nbot]);
248:       det = dd[ntop]*dd[nbot]*(aa[ntop]*aa[nbot]-bb[ntop]*bb[ntop]);
249:       dis = htr*htr - det;
250:       if (dis > 0) {  /* distinct real eigenvalues */
251:         if (dd[ntop] == dd[nbot]) {  /* separate the eigenvalues by a Jacobi rotator */
252:           dif = aa[ntop]-aa[nbot];
253:           if (2.0*PetscAbs(bb[ntop])<=dif) {
254:             tn = 2*bb[ntop]/dif;
255:             tn = tn/(1.0 + PetscSqrtReal(1.0+tn*tn));
256:           } else {
257:             kt = dif/(2.0*bb[ntop]);
258:             tn = PetscSign(kt)/(PetscAbsReal(kt)+PetscSqrtReal(1.0+kt*kt));
259:           }
260:           c = 1.0/PetscSqrtReal(1.0 + tn*tn);
261:           s = c*tn;
262:           aa[ntop] = aa[ntop] + tn*bb[ntop];
263:           aa[nbot] = aa[nbot] - tn*bb[ntop];
264:           bb[ntop] = 0;
265:           j2 = nn-cgd;
266:           PetscCallBLAS("BLASrot",BLASMIXEDrot_(&j2,uu+ntop*ld+cgd,&one,uu+nbot*ld+cgd,&one,&c,&s));
267:         }
268:       }
269:       nbot = ntop - 1;
270:     } else {  /* Do an HZ iteration */
271:       its = its + 1;
272:       nits = nits + 1;
273:       tr = aa[nbot-1]*dd[nbot-1] + aa[nbot]*dd[nbot];
274:       dt = dd[nbot-1]*dd[nbot]*(aa[nbot-1]*aa[nbot]-bb[nbot-1]*bb[nbot-1]);
275:       for (ntry=1;ntry<=6;ntry++) {
276:         PetscCall(HZStep(ntop,nbot+1,tr,dt,aa,bb,dd,uu,nn,ld,&flag));
277:         if (!flag) break;
278:         PetscCheck(ntry<6,PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"Unable to complete hz step after six tries");
279:         tr = 0.9*tr; dt = 0.81*dt;
280:       }
281:     }
282:   }
283:   PetscFunctionReturn(PETSC_SUCCESS);
284: }

286: PetscErrorCode DSSolve_GHIEP_HZ(DS ds,PetscScalar *wr,PetscScalar *wi)
287: {
288:   PetscInt          i,off;
289:   PetscBLASInt      n1,ld = 0;
290:   const PetscScalar *A,*B;
291:   PetscScalar       *Q;
292:   PetscReal         *d,*e,*s;

294:   PetscFunctionBegin;
295: #if !defined(PETSC_USE_COMPLEX)
296:   PetscAssertPointer(wi,3);
297: #endif
298:   PetscCall(PetscBLASIntCast(ds->ld,&ld));
299:   n1  = ds->n - ds->l;
300:   off = ds->l + ds->l*ld;
301:   PetscCall(DSGetArrayReal(ds,DS_MAT_T,&d));
302:   PetscCall(DSGetArrayReal(ds,DS_MAT_D,&s));
303:   e = d + ld;
304: #if defined(PETSC_USE_DEBUG)
305:   /* Check signature */
306:   PetscCall(MatDenseGetArrayRead(ds->omat[DS_MAT_B],&B));
307:   for (i=0;i<ds->n;i++) {
308:     PetscReal de = (ds->compact)?s[i]:PetscRealPart(B[i*ld+i]);
309:     PetscCheck(de==1.0 || de==-1.0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Diagonal elements of the signature matrix must be 1 or -1");
310:   }
311:   PetscCall(MatDenseRestoreArrayRead(ds->omat[DS_MAT_B],&B));
312: #endif
313:   /* Quick return */
314:   if (n1 == 1) {
315:     PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
316:     for (i=0;i<=ds->l;i++) Q[i+i*ld] = 1.0;
317:     PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
318:     PetscCall(DSGHIEPComplexEigs(ds,0,ds->l,wr,wi));
319:     if (ds->compact) {
320:       wr[ds->l] = d[ds->l]/s[ds->l]; wi[ds->l] = 0.0;
321:     } else {
322:       PetscCall(MatDenseGetArrayRead(ds->omat[DS_MAT_A],&A));
323:       PetscCall(MatDenseGetArrayRead(ds->omat[DS_MAT_B],&B));
324:       d[ds->l] = PetscRealPart(A[off]);
325:       s[ds->l] = PetscRealPart(B[off]);
326:       PetscCall(MatDenseRestoreArrayRead(ds->omat[DS_MAT_A],&A));
327:       PetscCall(MatDenseRestoreArrayRead(ds->omat[DS_MAT_B],&B));
328:       wr[ds->l] = d[ds->l]/s[ds->l]; wi[ds->l] = 0.0;
329:     }
330:     PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));
331:     PetscCall(DSRestoreArrayReal(ds,DS_MAT_D,&s));
332:     PetscFunctionReturn(PETSC_SUCCESS);
333:   }
334:   /* Reduce to pseudotriadiagonal form */
335:   PetscCall(DSIntermediate_GHIEP(ds));
336:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
337:   PetscCall(HZIteration(ds->n,ds->l,d,e,s,Q,ld));
338:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
339:   PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));
340:   PetscCall(DSRestoreArrayReal(ds,DS_MAT_D,&s));
341:   if (!ds->compact) PetscCall(DSSwitchFormat_GHIEP(ds,PETSC_FALSE));
342:   /* Undo from diagonal the blocks with real eigenvalues*/
343:   PetscCall(DSGHIEPRealBlocks(ds));

345:   /* Recover eigenvalues from diagonal */
346:   PetscCall(DSGHIEPComplexEigs(ds,0,ds->n,wr,wi));
347: #if defined(PETSC_USE_COMPLEX)
348:   if (wi) {
349:     for (i=ds->l;i<ds->n;i++) wi[i] = 0.0;
350:   }
351: #endif
352:   ds->t = ds->n;
353:   PetscFunctionReturn(PETSC_SUCCESS);
354: }