Actual source code: dsghiep_hz.c
1: /*
2: HZ iteration for generalized symmetric-indefinite eigenproblem.
3: Based on Matlab code from David Watkins.
5: References:
7: [1] D.S. Watkins, The Matrix Eigenvalue Problem: GR and Krylov Subspace
8: Methods, SIAM, 2007.
10: [2] M.A. Brebner, J. Grad, "Eigenvalues of Ax = lambda Bx for real
11: symmetric matrices A and B computed by reduction to pseudosymmetric
12: form and the HR process", Linear Alg. Appl. 43, pp. 99-118, 1982.
14: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
15: SLEPc - Scalable Library for Eigenvalue Problem Computations
16: Copyright (c) 2002-2012, Universitat Politecnica de Valencia, Spain
18: This file is part of SLEPc.
19:
20: SLEPc is free software: you can redistribute it and/or modify it under the
21: terms of version 3 of the GNU Lesser General Public License as published by
22: the Free Software Foundation.
24: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
25: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
26: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
27: more details.
29: You should have received a copy of the GNU Lesser General Public License
30: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
31: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
32: */
33: #include <slepc-private/dsimpl.h> /*I "slepcds.h" I*/
34: #include <slepcblaslapack.h>
36: extern PetscErrorCode DSSwitchFormat_GHIEP(DS,PetscBool);
37: extern PetscErrorCode DSGHIEPComplexEigs(DS,PetscInt,PetscInt,PetscScalar*,PetscScalar*);
38: extern PetscErrorCode HRApply(PetscInt,PetscScalar*,PetscInt,PetscScalar*,PetscInt,PetscReal,PetscReal);
39: extern PetscErrorCode DSIntermediate_GHIEP(DS);
40: extern PetscErrorCode DSGHIEPRealBlocks(DS);
44: /*
45: Sets up a 2-by-2 matrix to eliminate y in the vector [x y]'.
46: Transformation is rotator if sygn = 1 and hyperbolic if sygn = -1.
47: */
48: static PetscErrorCode UnifiedRotation(PetscReal x,PetscReal y,PetscReal sygn,PetscReal *rot,PetscReal *rcond,PetscBool *swap)
49: {
50: PetscReal nrm,c,s;
53: *swap = PETSC_FALSE;
54: if (y == 0) {
55: rot[0] = 1.0; rot[1] = 0.0; rot[2] = 0.0; rot[3] = 1.0;
56: *rcond = 1.0;
57: } else {
58: nrm = PetscMax(PetscAbs(x),PetscAbs(y));
59: c = x/nrm; s = y/nrm;
60: if (sygn == 1.0) { /* set up a rotator */
61: nrm = PetscSqrtReal(c*c+s*s);
62: c = c/nrm; s = s/nrm;
63: /* rot = [c s; -s c]; */
64: rot[0] = c; rot[1] = -s; rot[2] = s; rot[3] = c;
65: *rcond = 1.0;
66: } else if (sygn == -1) { /* set up a hyperbolic transformation */
67: nrm = c*c-s*s;
68: if (nrm > 0) nrm = PetscSqrtReal(nrm);
69: else if (nrm < 0) {
70: nrm = PetscSqrtReal(-nrm);
71: *swap = PETSC_TRUE;
72: } else { /* breakdown */
73: SETERRQ(PETSC_COMM_SELF,1,"Breakdown in construction of hyperbolic transformation");
74: rot[0] = 1.0; rot[1] = 0.0; rot[2] = 0.0; rot[3] = 1.0;
75: *rcond = 0.0;
76: return(0);
77: }
78: c = c/nrm; s = s/nrm;
79: /* rot = [c -s; -s c]; */
80: rot[0] = c; rot[1] = -s; rot[2] = -s; rot[3] = c;
81: *rcond = PetscAbs(PetscAbs(s)-PetscAbs(c))/(PetscAbs(s)+PetscAbs(c));
82: } else SETERRQ(PETSC_COMM_SELF,1,"Value of sygn sent to transetup must be 1 or -1");
83: }
84: return(0);
85: }
89: static PetscErrorCode HZStep(PetscBLASInt ntop,PetscBLASInt nn,PetscReal tr,PetscReal dt,PetscReal *aa,PetscReal *bb,PetscReal *dd,PetscScalar *uu,PetscInt n,PetscInt ld,PetscBool *flag)
90: {
92: PetscBLASInt one=1;
93: PetscInt k,jj;
94: PetscBLASInt n_;
95: PetscReal bulge10,bulge20,bulge30,bulge31,bulge41,bulge42;
96: PetscReal sygn,rcond=1.0,worstcond,rot[4],buf[2];
97: PetscScalar rtmp;
98: PetscBool swap;
101: worstcond = 1.0;
102: n_ = PetscBLASIntCast(n);
104: /* Build initial bulge that sets step in motion */
105: bulge10 = dd[ntop+1]*(aa[ntop]*(aa[ntop] - dd[ntop]*tr) + dt*dd[ntop]*dd[ntop]) + dd[ntop]*bb[ntop]*bb[ntop];
106: bulge20 = bb[ntop]*(dd[ntop+1]*aa[ntop] + dd[ntop]*aa[ntop+1] - dd[ntop]*dd[ntop+1]*tr);
107: bulge30 = bb[ntop]*bb[ntop+1]*dd[ntop];
108: bulge31 = 0.0;
109: bulge41 = 0.0;
110: bulge42 = 0.0;
112: /* Chase the bulge */
113: for (jj=ntop;jj<nn-1;jj++) {
114:
115: /* Check for trivial bulge */
116: if (jj>ntop && PetscMax(PetscMax(PetscAbs(bulge10),PetscAbs(bulge20)),PetscAbs(bulge30))<PETSC_MACHINE_EPSILON*(PetscAbs(aa[jj]) + PetscAbs(aa[jj+1]))) {
117: bb[jj-1] = 0.0; /* deflate and move on */
118:
119: } else { /* carry out the step */
121: /* Annihilate tip entry bulge30 */
122: if (bulge30 != 0.0) {
123:
124: /* Make an interchange if necessary to ensure that the
125: first transformation is othogonal, not hyperbolic. */
126: if (dd[jj+1] != dd[jj+2]) { /* make an interchange */
127: if (dd[jj] != dd[jj+1]) { /* interchange 1st and 2nd */
128: buf[0] = bulge20; bulge20 = bulge10; bulge10 = buf[0];
129: buf[0] = aa[jj]; aa[jj] = aa[jj+1]; aa[jj+1] = buf[0];
130: buf[0] = bb[jj+1]; bb[jj+1] = bulge31; bulge31 = buf[0];
131: buf[0] = dd[jj]; dd[jj] = dd[jj+1]; dd[jj+1] = buf[0];
132: for (k=0;k<n;k++) {
133: rtmp = uu[k+jj*ld]; uu[k+jj*ld] = uu[k+(jj+1)*ld]; uu[k+(jj+1)*ld] = rtmp;
134: }
135: } else { /* interchange 1st and 3rd */
136: buf[0] = bulge30; bulge30 = bulge10; bulge10 = buf[0];
137: buf[0] = aa[jj]; aa[jj] = aa[jj+2]; aa[jj+2] = buf[0];
138: buf[0] = bb[jj]; bb[jj] = bb[jj+1]; bb[jj+1] = buf[0];
139: buf[0] = dd[jj]; dd[jj] = dd[jj+2]; dd[jj+2] = buf[0];
140: if (jj + 2 < nn-1) {
141: bulge41 = bb[jj+2];
142: bb[jj+2] = 0;
143: }
144: for (k=0;k<n;k++) {
145: rtmp = uu[k+jj*ld]; uu[k+jj*ld] = uu[k+(jj+2)*ld]; uu[k+(jj+2)*ld] = rtmp;
146: }
147: }
148: }
149:
150: /* Set up transforming matrix rot. */
151: UnifiedRotation(bulge20,bulge30,1,rot,&rcond,&swap);
153: /* Apply transforming matrix rot to T. */
154: bulge20 = rot[0]*bulge20 + rot[2]*bulge30;
155: buf[0] = rot[0]*bb[jj] + rot[2]*bulge31;
156: buf[1] = rot[1]*bb[jj] + rot[3]*bulge31;
157: bb[jj] = buf[0];
158: bulge31 = buf[1];
159: buf[0] = rot[0]*rot[0]*aa[jj+1] + 2.0*rot[0]*rot[2]*bb[jj+1] + rot[2]*rot[2]*aa[jj+2];
160: buf[1] = rot[1]*rot[1]*aa[jj+1] + 2.0*rot[3]*rot[1]*bb[jj+1] + rot[3]*rot[3]*aa[jj+2];
161: bb[jj+1] = rot[1]*rot[0]*aa[jj+1] + rot[3]*rot[2]*aa[jj+2] + (rot[3]*rot[0] + rot[1]*rot[2])*bb[jj+1];
162: aa[jj+1] = buf[0];
163: aa[jj+2] = buf[1];
164: if (jj + 2 < nn-1) {
165: bulge42 = bb[jj+2]*rot[2];
166: bb[jj+2] = bb[jj+2]*rot[3];
167: }
169: /* Accumulate transforming matrix */
170: BLASrot_(&n_,uu+(jj+1)*ld,&one,uu+(jj+2)*ld,&one,&rot[0],&rot[2]);
171: }
173: /* Annihilate inner entry bulge20 */
174: if (bulge20 != 0.0) {
176: /* Begin by setting up transforming matrix rot */
177: sygn = dd[jj]*dd[jj+1];
178: UnifiedRotation(bulge10,bulge20,sygn,rot,&rcond,&swap);
179: if (rcond<PETSC_MACHINE_EPSILON) {
180: SETERRQ1(PETSC_COMM_SELF,0,"Transforming matrix is numerically singular rcond=%g",rcond);
181: *flag = PETSC_TRUE;
182: return(0);
183: }
184: if (rcond < worstcond) worstcond = rcond;
186: /* Apply transforming matrix rot to T */
187: if (jj > ntop) bb[jj-1] = rot[0]*bulge10 + rot[2]*bulge20;
188: buf[0] = rot[0]*rot[0]*aa[jj] + 2*rot[0]*rot[2]*bb[jj] + rot[2]*rot[2]*aa[jj+1];
189: buf[1] = rot[1]*rot[1]*aa[jj] + 2*rot[3]*rot[1]*bb[jj] + rot[3]*rot[3]*aa[jj+1];
190: bb[jj] = rot[1]*rot[0]*aa[jj] + rot[3]*rot[2]*aa[jj+1] + (rot[3]*rot[0] + rot[1]*rot[2])*bb[jj];
191: aa[jj] = buf[0];
192: aa[jj+1] = buf[1];
193: if (jj + 1 < nn-1) {
194: /* buf = [ bulge31 bb(jj+1) ] * rot' */
195: buf[0] = rot[0]*bulge31 + rot[2]*bb[jj+1];
196: buf[1] = rot[1]*bulge31 + rot[3]*bb[jj+1];
197: bulge31 = buf[0];
198: bb[jj+1] = buf[1];
199: }
200: if (jj + 2 < nn-1) {
201: /* buf = [bulge41 bulge42] * rot' */
202: buf[0] = rot[0]*bulge41 + rot[2]*bulge42;
203: buf[1] = rot[1]*bulge41 + rot[3]*bulge42;
204: bulge41 = buf[0];
205: bulge42 = buf[1];
206: }
208: /* Apply transforming matrix rot to D */
209: if (swap == 1) {
210: buf[0] = dd[jj]; dd[jj] = dd[jj+1]; dd[jj+1] = buf[0];
211: }
213: /* Accumulate transforming matrix, uu(jj:jj+1,:) = rot*uu(jj:jj+1,:) */
214: if (sygn==1) {
215: BLASrot_(&n_,uu+jj*ld,&one,uu+(jj+1)*ld,&one,&rot[0],&rot[2]);
216: } else {
217: HRApply(n,uu+jj*ld,1,uu+(jj+1)*ld,1,rot[0],rot[1]);
218: }
219: }
220: }
222: /* Adjust bulge for next step */
223: bulge10 = bb[jj];
224: bulge20 = bulge31;
225: bulge30 = bulge41;
226: bulge31 = bulge42;
227: bulge41 = 0.0;
228: bulge42 = 0.0;
229: }
230: return(0);
231: }
235: static PetscErrorCode HZIteration(PetscBLASInt nn,PetscBLASInt cgd,PetscReal *aa,PetscReal *bb,PetscReal *dd,PetscScalar *uu,PetscBLASInt ld)
236: {
238: PetscBLASInt j2,one=1;
239: PetscInt its,nits,nstop,jj,ntop,nbot,ntry;
240: PetscReal htr,det,dis,dif,tn,kt,c,s,tr,dt;
241: PetscBool flag=PETSC_FALSE;
244: its = 0;
245: nbot = nn-1;
246: nits = 0;
247: nstop = 40*(nn - cgd);
249: while (nbot >= cgd && nits < nstop) {
251: /* Check for zeros on the subdiagonal */
252: jj = nbot - 1;
253: while (jj>=cgd && PetscAbs(bb[jj])>PETSC_MACHINE_EPSILON*(PetscAbs(aa[jj])+PetscAbs(aa[jj+1]))) jj = jj-1;
254: if (jj>=cgd) bb[jj]=0;
255: ntop = jj + 1; /* starting point for step */
256: if (ntop == nbot) { /* isolate single eigenvalue */
257: nbot = ntop - 1;
258: its = 0;
259: } else if (ntop+1 == nbot) { /* isolate pair of eigenvalues */
260: htr = 0.5*(aa[ntop]*dd[ntop] + aa[nbot]*dd[nbot]);
261: det = dd[ntop]*dd[nbot]*(aa[ntop]*aa[nbot]-bb[ntop]*bb[ntop]);
262: dis = htr*htr - det;
263: if (dis > 0) { /* distinct real eigenvalues */
264: if (dd[ntop] == dd[nbot]) { /* separate the eigenvalues by a Jacobi rotator */
265: dif = aa[ntop]-aa[nbot];
266: if (2.0*PetscAbs(bb[ntop])<=dif) {
267: tn = 2*bb[ntop]/dif;
268: tn = tn/(1.0 + PetscSqrtScalar(1.0+tn*tn));
269: } else {
270: kt = dif/(2.0*bb[ntop]);
271: tn = PetscSign(kt)/(PetscAbs(kt)+PetscSqrtScalar(1.0+kt*kt));
272: }
273: c = 1.0/PetscSqrtScalar(1.0 + tn*tn);
274: s = c*tn;
275: aa[ntop] = aa[ntop] + tn*bb[ntop];
276: aa[nbot] = aa[nbot] - tn*bb[ntop];
277: bb[ntop] = 0;
278: j2 = nn-cgd;
279: BLASrot_(&j2,uu+ntop*ld+cgd,&one,uu+nbot*ld+cgd,&one,&c,&s);
280: } else {
281: dis = PetscSqrtScalar(dis);
282: if (htr < 0) dis = -dis;
283: }
284: }
285: nbot = ntop - 1;
286: } else { /* Do an HZ iteration */
287: its = its + 1;
288: nits = nits + 1;
289: tr = aa[nbot-1]*dd[nbot-1] + aa[nbot]*dd[nbot];
290: dt = dd[nbot-1]*dd[nbot]*(aa[nbot-1]*aa[nbot]-bb[nbot-1]*bb[nbot-1]);
291: for (ntry=1;ntry<=6;ntry++) {
292: HZStep(ntop,nbot+1,tr,dt,aa,bb,dd,uu,nn,ld,&flag);
293: if (!flag) break;
294: else if (ntry == 6)
295: SETERRQ(PETSC_COMM_SELF,1,"Unable to complete hz step on six tries");
296: else {
297: tr = 0.9*tr; dt = 0.81*dt;
298: }
299: }
300: }
301: }
302: return(0);
303: }
307: PetscErrorCode DSSolve_GHIEP_HZ(DS ds,PetscScalar *wr,PetscScalar *wi)
308: {
310: PetscInt off;
311: PetscBLASInt n1,ld;
312: PetscScalar *A,*B,*Q;
313: PetscReal *d,*e,*s;
314: #if defined(PETSC_USE_COMPLEX)
315: PetscInt i;
316: #endif
319: #if !defined(PETSC_USE_COMPLEX)
321: #endif
322: n1 = ds->n - ds->l;
323: ld = PetscBLASIntCast(ds->ld);
324: off = ds->l + ds->l*ld;
325: A = ds->mat[DS_MAT_A];
326: B = ds->mat[DS_MAT_B];
327: Q = ds->mat[DS_MAT_Q];
328: d = ds->rmat[DS_MAT_T];
329: e = ds->rmat[DS_MAT_T] + ld;
330: s = ds->rmat[DS_MAT_D];
331: /* Quick return */
332: if (n1 == 1) {
333: *(Q+off) = 1;
334: if (ds->compact) {
335: wr[ds->l] = d[ds->l]/s[ds->l]; wi[ds->l] = 0.0;
336: }else{
337: d[ds->l] = PetscRealPart(A[off]); s[ds->l] = PetscRealPart(B[off]);
338: wr[ds->l] = d[ds->l]/s[ds->l]; wi[ds->l] = 0.0;
339: }
340: return(0);
341: }
342: /* Reduce to pseudotriadiagonal form */
343: DSIntermediate_GHIEP(ds);
344: HZIteration(ds->n,ds->l,d,e,s,Q,ld);
345: if (!ds->compact) {
346: DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
347: }
348: /* Undo from diagonal the blocks whith real eigenvalues*/
349: DSGHIEPRealBlocks(ds);
351: /* Recover eigenvalues from diagonal */
352: DSGHIEPComplexEigs(ds, 0, ds->n, wr, wi);
353: #if defined(PETSC_USE_COMPLEX)
354: if (wi) {
355: for (i=ds->l;i<ds->n;i++) wi[i] = 0.0;
356: }
357: #endif
358: return(0);
359: }