Actual source code: dsghiep.c
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2012, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7:
8: SLEPc is free software: you can redistribute it and/or modify it under the
9: terms of version 3 of the GNU Lesser General Public License as published by
10: the Free Software Foundation.
12: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
13: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
15: more details.
17: You should have received a copy of the GNU Lesser General Public License
18: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
19: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
20: */
21: #include <slepc-private/dsimpl.h> /*I "slepcds.h" I*/
22: #include <slepcblaslapack.h>
26: PetscErrorCode DSAllocate_GHIEP(DS ds,PetscInt ld)
27: {
31: DSAllocateMat_Private(ds,DS_MAT_A);
32: DSAllocateMat_Private(ds,DS_MAT_B);
33: DSAllocateMat_Private(ds,DS_MAT_Q);
34: DSAllocateMatReal_Private(ds,DS_MAT_T);
35: DSAllocateMatReal_Private(ds,DS_MAT_D);
36: PetscFree(ds->perm);
37: PetscMalloc(ld*sizeof(PetscInt),&ds->perm);
38: PetscLogObjectMemory(ds,ld*sizeof(PetscInt));
39: return(0);
40: }
44: PetscErrorCode DSSwitchFormat_GHIEP(DS ds,PetscBool tocompact)
45: {
47: PetscReal *T,*S;
48: PetscScalar *A,*B;
49: PetscInt i,n,ld;
52: A = ds->mat[DS_MAT_A];
53: B = ds->mat[DS_MAT_B];
54: T = ds->rmat[DS_MAT_T];
55: S = ds->rmat[DS_MAT_D];
56: n = ds->n;
57: ld = ds->ld;
58: if (tocompact) { /* switch from dense (arrow) to compact storage */
59: PetscMemzero(T,3*ld*sizeof(PetscReal));
60: PetscMemzero(S,ld*sizeof(PetscReal));
61: for (i=0;i<n-1;i++) {
62: T[i] = PetscRealPart(A[i+i*ld]);
63: T[ld+i] = PetscRealPart(A[i+1+i*ld]);
64: S[i] = PetscRealPart(B[i+i*ld]);
65: }
66: T[n-1] = PetscRealPart(A[n-1+(n-1)*ld]);
67: S[n-1] = PetscRealPart(B[n-1+(n-1)*ld]);
68: for (i=ds->l;i< ds->k;i++) T[2*ld+i] = PetscRealPart(A[ds->k+i*ld]);
69: }else { /* switch from compact (arrow) to dense storage */
70: PetscMemzero(A,ld*ld*sizeof(PetscScalar));
71: PetscMemzero(B,ld*ld*sizeof(PetscScalar));
72: for (i=0;i<n-1;i++) {
73: A[i+i*ld] = T[i];
74: A[i+1+i*ld] = T[ld+i];
75: A[i+(i+1)*ld] = T[ld+i];
76: B[i+i*ld] = S[i];
77: }
78: A[n-1+(n-1)*ld] = T[n-1];
79: B[n-1+(n-1)*ld] = S[n-1];
80: for (i=ds->l;i<ds->k;i++) {
81: A[ds->k+i*ld] = T[2*ld+i];
82: A[i+ds->k*ld] = T[2*ld+i];
83: }
84: }
85: return(0);
86: }
90: PetscErrorCode DSView_GHIEP(DS ds,PetscViewer viewer)
91: {
92: PetscErrorCode ierr;
93: PetscViewerFormat format;
94: PetscInt i,j;
95: PetscReal value;
96: const char *methodname[] = {
97: "HR method",
98: "QR + Inverse Iteration",
99: "QR",
100: "DQDS + Inverse Iteration "
101: };
104: PetscViewerGetFormat(viewer,&format);
105: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
106: PetscViewerASCIIPrintf(viewer,"solving the problem with: %s\n",methodname[ds->method]);
107: return(0);
108: }
109: if (ds->compact) {
110: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
111: if (format == PETSC_VIEWER_ASCII_MATLAB) {
112: PetscViewerASCIIPrintf(viewer,"%% Size = %D %D\n",ds->n,ds->n);
113: PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",3*ds->n);
114: PetscViewerASCIIPrintf(viewer,"zzz = [\n");
115: for (i=0;i<ds->n;i++) {
116: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,i+1,*(ds->rmat[DS_MAT_T]+i));
117: }
118: for (i=0;i<ds->n-1;i++) {
119: if (*(ds->rmat[DS_MAT_T]+ds->ld+i) !=0 && i!=ds->k-1) {
120: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+2,i+1,*(ds->rmat[DS_MAT_T]+ds->ld+i));
121: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,i+2,*(ds->rmat[DS_MAT_T]+ds->ld+i));
122: }
123: }
124: for (i = ds->l;i<ds->k;i++) {
125: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",ds->k+1,i+1,*(ds->rmat[DS_MAT_T]+2*ds->ld+i));
126: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,ds->k+1,*(ds->rmat[DS_MAT_T]+2*ds->ld+i));
127: }
128: PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(zzz);\n",DSMatName[DS_MAT_A]);
129:
130: PetscViewerASCIIPrintf(viewer,"%% Size = %D %D\n",ds->n,ds->n);
131: PetscViewerASCIIPrintf(viewer,"omega = zeros(%D,3);\n",3*ds->n);
132: PetscViewerASCIIPrintf(viewer,"omega = [\n");
133: for (i=0;i<ds->n;i++) {
134: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,i+1,*(ds->rmat[DS_MAT_D]+i));
135: }
136: PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(omega);\n",DSMatName[DS_MAT_B]);
138: } else {
139: PetscViewerASCIIPrintf(viewer,"T\n");
140: for (i=0;i<ds->n;i++) {
141: for (j=0;j<ds->n;j++) {
142: if (i==j) value = *(ds->rmat[DS_MAT_T]+i);
143: else if (i==j+1 || j==i+1) value = *(ds->rmat[DS_MAT_T]+ds->ld+PetscMin(i,j));
144: else if ((i<ds->k && j==ds->k) || (i==ds->k && j<ds->k)) value = *(ds->rmat[DS_MAT_T]+2*ds->ld+PetscMin(i,j));
145: else value = 0.0;
146: PetscViewerASCIIPrintf(viewer," %18.16e ",value);
147: }
148: PetscViewerASCIIPrintf(viewer,"\n");
149: }
150: PetscViewerASCIIPrintf(viewer,"omega\n");
151: for (i=0;i<ds->n;i++) {
152: for (j=0;j<ds->n;j++) {
153: if (i==j) value = *(ds->rmat[DS_MAT_D]+i);
154: else value = 0.0;
155: PetscViewerASCIIPrintf(viewer," %18.16e ",value);
156: }
157: PetscViewerASCIIPrintf(viewer,"\n");
158: }
159: }
160: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
161: PetscViewerFlush(viewer);
162: } else {
163: DSViewMat_Private(ds,viewer,DS_MAT_A);
164: DSViewMat_Private(ds,viewer,DS_MAT_B);
165: }
166: if (ds->state>DS_STATE_INTERMEDIATE) {
167: DSViewMat_Private(ds,viewer,DS_MAT_Q);
168: }
169: return(0);
170: }
174: static PetscErrorCode DSVectors_GHIEP_Eigen_Some(DS ds,PetscInt *idx,PetscReal *rnorm)
175: {
177: PetscReal b[4],M[4],d1,d2,s1,s2,e;
178: PetscReal scal1,scal2,wr1,wr2,wi,ep,norm;
179: PetscScalar *Q,*X,Y[4],alpha,zeroS = 0.0;
180: PetscInt k;
181: PetscBLASInt two = 2,n_,ld,one=1;
182: #if !defined(PETSC_USE_COMPLEX)
183: PetscBLASInt four=4;
184: #endif
185:
187: X = ds->mat[DS_MAT_X];
188: Q = ds->mat[DS_MAT_Q];
189: k = *idx;
190: n_ = PetscBLASIntCast(ds->n);
191: ld = PetscBLASIntCast(ds->ld);
192: if (k < ds->n-1) {
193: e = (ds->compact)?*(ds->rmat[DS_MAT_T]+ld+k):PetscRealPart(*(ds->mat[DS_MAT_A]+(k+1)+ld*k));
194: } else e = 0.0;
195: if (e == 0.0) {/* Real */
196: if (ds->state>=DS_STATE_CONDENSED) {
197: PetscMemcpy(X+k*ld,Q+k*ld,ld*sizeof(PetscScalar));
198: } else {
199: PetscMemzero(X+k*ds->ld,ds->ld*sizeof(PetscScalar));
200: X[k+k*ds->ld] = 1.0;
201: }
202: if (rnorm) {
203: *rnorm = PetscAbsScalar(X[ds->n-1+k*ld]);
204: }
205: } else { /* 2x2 block */
206: if (ds->compact) {
207: s1 = *(ds->rmat[DS_MAT_D]+k);
208: d1 = *(ds->rmat[DS_MAT_T]+k);
209: s2 = *(ds->rmat[DS_MAT_D]+k+1);
210: d2 = *(ds->rmat[DS_MAT_T]+k+1);
211: } else {
212: s1 = PetscRealPart(*(ds->mat[DS_MAT_B]+k*ld+k));
213: d1 = PetscRealPart(*(ds->mat[DS_MAT_A]+k+k*ld));
214: s2 = PetscRealPart(*(ds->mat[DS_MAT_B]+(k+1)*ld+k+1));
215: d2 = PetscRealPart(*(ds->mat[DS_MAT_A]+k+1+(k+1)*ld));
216: }
217: M[0] = d1; M[1] = e; M[2] = e; M[3]= d2;
218: b[0] = s1; b[1] = 0.0; b[2] = 0.0; b[3] = s2;
219: ep = LAPACKlamch_("S");
220: /* Compute eigenvalues of the block */
221: LAPACKlag2_(M, &two, b, &two, &ep, &scal1, &scal2, &wr1, &wr2, &wi);
222: if (wi==0.0) { /* Real eigenvalues */
223: SETERRQ(PETSC_COMM_SELF,1,"Real block in DSVectors_GHIEP");
224: } else { /* Complex eigenvalues */
225: if (scal1<ep) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FP,"Nearly infinite eigenvalue");
226: wr1 /= scal1; wi /= scal1;
227: #if !defined(PETSC_USE_COMPLEX)
228: if ( SlepcAbs(s1*d1-wr1,wi)<SlepcAbs(s2*d2-wr1,wi)) {
229: Y[0] = wr1-s2*d2; Y[1] = s2*e; Y[2] = wi; Y[3] = 0.0;
230: } else {
231: Y[0] = s1*e; Y[1] = wr1-s1*d1; Y[2] = 0.0; Y[3] = wi;
232: }
233: norm = BLASnrm2_(&four,Y,&one);
234: norm = 1/norm;
235: if (ds->state >= DS_STATE_CONDENSED) {
236: alpha = norm;
237: BLASgemm_("N","N",&n_,&two,&two,&alpha,ds->mat[DS_MAT_Q]+k*ld,&ld,Y,&two,&zeroS,X+k*ld,&ld);
238: if (rnorm) *rnorm = SlepcAbsEigenvalue(X[ds->n-1+k*ld],X[ds->n-1+(k+1)*ld]);
239: } else {
240: PetscMemzero(X+k*ld,2*ld*sizeof(PetscScalar));
241: X[k*ld+k] = Y[0]*norm; X[k*ld+k+1] = Y[1]*norm;
242: X[(k+1)*ld+k] = Y[2]*norm; X[(k+1)*ld+k+1] = Y[3]*norm;
243: }
244: #else
245: if ( SlepcAbs(s1*d1-wr1,wi)<SlepcAbs(s2*d2-wr1,wi)) {
246: Y[0] = wr1-s2*d2+PETSC_i*wi; Y[1] = s2*e;
247: } else {
248: Y[0] = s1*e; Y[1] = wr1-s1*d1+PETSC_i*wi;
249: }
250: norm = BLASnrm2_(&two,Y,&one);
251: norm = 1/norm;
252: if (ds->state >= DS_STATE_CONDENSED) {
253: alpha = norm;
254: BLASgemv_("N",&n_,&two,&alpha,ds->mat[DS_MAT_Q]+k*ld,&ld,Y,&one,&zeroS,X+k*ld,&one);
255: if (rnorm) *rnorm = PetscAbsScalar(X[ds->n-1+k*ld]);
256: } else {
257: PetscMemzero(X+k*ld,2*ld*sizeof(PetscScalar));
258: X[k*ld+k] = Y[0]*norm; X[k*ld+k+1] = Y[1]*norm;
259: }
260: X[(k+1)*ld+k] = PetscConj(X[k*ld+k]); X[(k+1)*ld+k+1] = PetscConj(X[k*ld+k+1]);
261: #endif
262: (*idx)++;
263: }
264: }
265: return(0);
266: }
270: PetscErrorCode DSVectors_GHIEP(DS ds,DSMatType mat,PetscInt *k,PetscReal *rnorm)
271: {
272: PetscInt i;
273: PetscReal e;
277: switch (mat) {
278: case DS_MAT_X:
279: if (k) {
280: DSVectors_GHIEP_Eigen_Some(ds,k,rnorm);
281: } else {
282: for (i=0; i<ds->n; i++) {
283: e = (ds->compact)?*(ds->rmat[DS_MAT_T]+ds->ld+i):PetscRealPart(*(ds->mat[DS_MAT_A]+(i+1)+ds->ld*i));
284: if (e == 0.0) {/* real */
285: if (ds->state >= DS_STATE_CONDENSED) {
286: PetscMemcpy(ds->mat[mat]+i*ds->ld,ds->mat[DS_MAT_Q]+i*ds->ld,ds->ld*sizeof(PetscScalar));
287: } else {
288: PetscMemzero(ds->mat[mat]+i*ds->ld,ds->ld*sizeof(PetscScalar));
289: *(ds->mat[mat]+i+i*ds->ld) = 1.0;
290: }
291: } else {
292: DSVectors_GHIEP_Eigen_Some(ds,&i,rnorm);
293: }
294: }
295: }
296: break;
297: case DS_MAT_Y:
298: case DS_MAT_U:
299: case DS_MAT_VT:
300: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
301: break;
302: default:
303: SETERRQ(((PetscObject)ds)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
304: }
305: return(0);
306: }
310: /*
311: Extract the eigenvalues contained in the block-diagonal of the indefinite problem.
312: Only the index range n0..n1 is processed.
313: */
314: PetscErrorCode DSGHIEPComplexEigs(DS ds, PetscInt n0, PetscInt n1, PetscScalar *wr, PetscScalar *wi)
315: {
316: PetscInt k,ld;
317: PetscBLASInt two=2;
318: PetscScalar *A,*B;
319: PetscReal *D,*T;
320: PetscReal b[4],M[4],d1,d2,s1,s2,e;
321: PetscReal scal1,scal2,ep,wr1,wr2,wi1;
324: ld = ds->ld;
325: A = ds->mat[DS_MAT_A];
326: B = ds->mat[DS_MAT_B];
327: D = ds->rmat[DS_MAT_D];
328: T = ds->rmat[DS_MAT_T];
329: for (k=n0;k<n1;k++) {
330: if (k < n1-1) {
331: e = (ds->compact)?T[ld+k]:PetscRealPart(A[(k+1)+ld*k]);
332: }else e = 0.0;
333: if (e==0.0) {
334: /* real eigenvalue */
335: wr[k] = (ds->compact)?T[k]/D[k]:A[k+k*ld]/B[k+k*ld];
336: #if !defined(PETSC_USE_COMPLEX)
337: wi[k] = 0.0 ;
338: #endif
339: } else {
340: /* diagonal block */
341: if (ds->compact) {
342: s1 = D[k];
343: d1 = T[k];
344: s2 = D[k+1];
345: d2 = T[k+1];
346: } else {
347: s1 = PetscRealPart(B[k*ld+k]);
348: d1 = PetscRealPart(A[k+k*ld]);
349: s2 = PetscRealPart(B[(k+1)*ld+k+1]);
350: d2 = PetscRealPart(A[k+1+(k+1)*ld]);
351: }
352: M[0] = d1; M[1] = e; M[2] = e; M[3]= d2;
353: b[0] = s1; b[1] = 0.0; b[2] = 0.0; b[3] = s2;
354: ep = LAPACKlamch_("S");
355: /* Compute eigenvalues of the block */
356: LAPACKlag2_(M, &two, b, &two, &ep, &scal1, &scal2, &wr1, &wr2, &wi1);
357: if (scal1<ep) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FP,"Nearly infinite eigenvalue");
358: wr[k] = wr1/scal1;
359: if (wi1==0.0) { /* Real eigenvalues */
360: if (scal2<ep) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FP,"Nearly infinite eigenvalue");
361: wr[k+1] = wr2/scal2;
362: #if !defined(PETSC_USE_COMPLEX)
363: wi[k] = 0.0;
364: wi[k+1] = 0.0;
365: #endif
366: } else { /* Complex eigenvalues */
367: #if !defined(PETSC_USE_COMPLEX)
368: wr[k+1] = wr[k];
369: wi[k] = wi1/scal1;
370: wi[k+1] = -wi[k];
371: #else
372: wr[k] += PETSC_i*wi1/scal1;
373: wr[k+1] = PetscConj(wr[k]);
374: #endif
375: }
376: k++;
377: }
378: }
379: #if defined(PETSC_USE_COMPLEX)
380: if (wi) {
381: for (k=n0;k<n1;k++) wi[k] = 0.0;
382: }
383: #endif
384: return(0);
385: }
389: PetscErrorCode DSSort_GHIEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
390: {
392: PetscInt n,i,*perm;
393: PetscReal *d,*e,*s;
396: #if !defined(PETSC_USE_COMPLEX)
398: #endif
399: n = ds->n;
400: d = ds->rmat[DS_MAT_T];
401: e = d + ds->ld;
402: s = ds->rmat[DS_MAT_D];
403: DSAllocateWork_Private(ds,ds->ld,ds->ld,0);
404: perm = ds->perm;
405: if (!rr) {
406: rr = wr;
407: ri = wi;
408: }
409: DSSortEigenvalues_Private(ds,rr,ri,perm,PETSC_TRUE);
410: if (!ds->compact) {DSSwitchFormat_GHIEP(ds,PETSC_TRUE);}
411: PetscMemcpy(ds->work,wr,n*sizeof(PetscScalar));
412: for (i=ds->l;i<n;i++) {
413: wr[i] = *(ds->work + perm[i]);
414: }
415: #if !defined(PETSC_USE_COMPLEX)
416: PetscMemcpy(ds->work,wi,n*sizeof(PetscScalar));
417: for (i=ds->l;i<n;i++) {
418: wi[i] = *(ds->work + perm[i]);
419: }
420: #endif
421: PetscMemcpy(ds->rwork,s,n*sizeof(PetscReal));
422: for (i=ds->l;i<n;i++) {
423: s[i] = *(ds->rwork+perm[i]);
424: }
425: PetscMemcpy(ds->rwork,d,n*sizeof(PetscReal));
426: for (i=ds->l;i<n;i++) {
427: d[i] = *(ds->rwork + perm[i]);
428: }
429: PetscMemcpy(ds->rwork,e,(n-1)*sizeof(PetscReal));
430: PetscMemzero(e+ds->l,(n-1-ds->l)*sizeof(PetscScalar));
431: for (i=ds->l;i<n-1;i++) {
432: if (perm[i]<n-1) e[i] = *(ds->rwork + perm[i]);
433: }
434: if (!ds->compact) { DSSwitchFormat_GHIEP(ds,PETSC_FALSE);}
435: DSPermuteColumns_Private(ds,ds->l,n,DS_MAT_Q,perm);
436: return(0);
437: }
441: /*
442: Generates a hyperbolic rotation
443: if x1*x1 - x2*x2 != 0
444: r = sqrt( |x1*x1 - x2*x2| )
445: c = x1/r s = x2/r
446:
447: | c -s||x1| |d*r|
448: |-s c||x2| = | 0 |
449: where d = 1 for type==1 and -1 for type==2
450: Returns the condition number of the reduction
451: */
452: static PetscErrorCode HRGen(PetscReal x1,PetscReal x2,PetscInt *type,PetscReal *c,PetscReal *s,PetscReal *r,PetscReal *cond)
453: {
454: PetscReal t,n2,xa,xb;
455: PetscInt type_;
458: if (x2==0) {
459: *r = PetscAbsReal(x1);
460: *c = (x1>=0)?1.0:-1.0;
461: *s = 0.0;
462: if (type) *type = 1;
463: return(0);
464: }
465: if (PetscAbsReal(x1) == PetscAbsReal(x2)) {
466: /* hyperbolic rotation doesn't exist */
467: *c = 0;
468: *s = 0;
469: *r = 0;
470: if (type) *type = 0;
471: *cond = PETSC_MAX_REAL;
472: return(0);
473: }
474:
475: if (PetscAbsReal(x1)>PetscAbsReal(x2)) {
476: xa = x1; xb = x2; type_ = 1;
477: } else {
478: xa = x2; xb = x1; type_ = 2;
479: }
480: t = xb/xa;
481: n2 = PetscAbsReal(1 - t*t);
482: *r = PetscSqrtReal(n2)*PetscAbsReal(xa);
483: *c = x1/(*r);
484: *s = x2/(*r);
485: if (type_ == 2) *r *= -1;
486: if (type) *type = type_;
487: if (cond) *cond = (PetscAbsReal(*c) + PetscAbsReal(*s))/PetscAbsReal(PetscAbsReal(*c) - PetscAbsReal(*s));
488: return(0);
489: }
493: /*
494: |c s|
495: Applies an hyperbolic rotator |s c|
496: |c s|
497: [x1 x2]|s c|
498: */
499: PetscErrorCode HRApply(PetscInt n, PetscScalar *x1,PetscInt inc1, PetscScalar *x2, PetscInt inc2,PetscReal c, PetscReal s)
500: {
501: PetscInt i;
502: PetscReal t;
503: PetscScalar tmp;
504:
506: if (PetscAbsReal(c)>PetscAbsReal(s)) { /* Type I */
507: t = s/c;
508: for (i=0;i<n;i++) {
509: x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
510: x2[i*inc2] = t*x1[i*inc1] + x2[i*inc2]/c;
511: }
512: } else { /* Type II */
513: t = c/s;
514: for (i=0;i<n;i++) {
515: tmp = x1[i*inc1];
516: x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
517: x2[i*inc2] = t*x1[i*inc1] + tmp/s;
518: }
519: }
520: return(0);
521: }
525: /*
526: Reduction to tridiagonal-diagonal form (see F. Tisseur, SIMAX 26(1), 2004).
528: Input:
529: A symmetric (only lower triangular part is refered)
530: s vector +1 and -1 (signature matrix)
531: Output:
532: d,e
533: s
534: Q s-orthogonal matrix whith Q^T*A*Q = T (symmetric tridiagonal matrix)
535: */
536: static PetscErrorCode TridiagDiag_HHR(PetscInt n,PetscScalar *A,PetscInt lda,PetscReal *s,PetscScalar* Q,PetscInt ldq,PetscBool flip,PetscReal *d,PetscReal *e,PetscInt *perm_,PetscScalar *w,PetscInt lw)
537: {
538: #if defined(PETSC_MISSING_LAPACK_LARFG) || defined(PETSC_MISSING_LAPACK_LARF)
540: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARFG/LARF - Lapack routines are unavailable");
541: #else
543: PetscInt i,j,k,*ii,*jj,i0=0,ik=0,tmp,type,*perm,nwall,nwu;
544: PetscReal *ss,cond=1.0,cs,sn,r;
545: PetscScalar *work,tau,t,*AA;
546: PetscBLASInt n0,n1,ni,inc=1,m,n_,lda_,ldq_;
547: PetscBool breakdown = PETSC_TRUE;
548:
550: if (n<3) {
551: if (n==1)Q[0]=1;
552: if (n==2) {Q[0] = Q[1+ldq] = 1; Q[1] = Q[ldq] = 0;}
553: return(0);
554: }
555: lda_ = PetscBLASIntCast(lda);
556: n_ = PetscBLASIntCast(n);
557: ldq_ = PetscBLASIntCast(ldq);
558: nwall = n*n+n;
559: nwu = 0;
560: if (!w || lw < nwall) {
561: PetscMalloc(nwall*sizeof(PetscScalar),&work);
562: }else work = w;
563: PetscMalloc(n*sizeof(PetscReal),&ss);
564: PetscMalloc(n*sizeof(PetscInt),&perm);
565: AA = work;
566: for (i=0;i<n;i++) {
567: PetscMemcpy(AA+i*n,A+i*lda,n*sizeof(PetscScalar));
568: }
569: nwu += n*n;
570: k=0;
571: while (breakdown && k<n) {
572: breakdown = PETSC_FALSE;
573: /* Classify (and flip) A and s according to sign */
574: if (flip) {
575: for (i=0;i<n;i++) {
576: perm[i] = n-1-perm_[i];
577: if (perm[i]==0) i0 = i;
578: if (perm[i]==k) ik = i;
579: }
580: } else {
581: for (i=0;i<n;i++) {
582: perm[i] = perm_[i];
583: if (perm[i]==0) i0 = i;
584: if (perm[i]==k) ik = i;
585: }
586: }
587: perm[ik] = 0;
588: perm[i0] = k;
589: i=1;
590: while (i<n-1 && s[perm[i-1]]==s[perm[0]]) {
591: if (s[perm[i]]!=s[perm[0]]) {
592: j=i+1;
593: while (j<n-1 && s[perm[j]]!=s[perm[0]])j++;
594: tmp = perm[i]; perm[i] = perm[j]; perm[j] = tmp;
595: }
596: i++;
597: }
598: for (i=0;i<n;i++) {
599: ss[i] = s[perm[i]];
600: }
601: if (flip) { ii = &j; jj = &i;} else { ii = &i; jj = &j;}
602: for (i=0;i<n;i++)
603: for (j=0;j<n;j++)
604: A[i+j*lda] = AA[perm[*ii]+perm[*jj]*n];
605: /* Initialize Q */
606: for (i=0;i<n;i++) {
607: PetscMemzero(Q+i*ldq,n*sizeof(PetscScalar));
608: Q[perm[i]+i*ldq] = 1.0;
609: }
610: for (ni=1;ni<n && ss[ni]==ss[0]; ni++);
611: n0 = ni-1; n1 = PetscBLASIntCast(n)-ni;
612: for (j=0;j<n-2;j++) {
613: m = PetscBLASIntCast(n-j-1);
614: /* Forming and applying reflectors */
615: if ( n0 > 1 ) {
616: LAPACKlarfg_(&n0, A+ni-n0+j*lda, A+ni-n0+j*lda+1,&inc,&tau);
617: /* Apply reflector */
618: if ( PetscAbsScalar(tau) != 0.0 ) {
619: t=*( A+ni-n0+j*lda); *(A+ni-n0+j*lda)=1.0;
620: LAPACKlarf_("R",&m,&n0,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu);
621: LAPACKlarf_("L",&n0,&m,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu);
622: /* Update Q */
623: LAPACKlarf_("R",&n_,&n0,A+ni-n0+j*lda,&inc,&tau,Q+(j+1)*ldq,&ldq_,work+nwu);
624: *(A+ni-n0+j*lda) = t;
625: for (i=1;i<n0;i++) {
626: *(A+ni-n0+j*lda+i) = 0.0; *(A+j+(ni-n0+i)*lda) = 0.0;
627: }
628: *(A+j+(ni-n0)*lda) = *(A+ni-n0+j*lda);
629: }
630: }
631: if ( n1 > 1 ) {
632: LAPACKlarfg_(&n1, A+n-n1+j*lda, A+n-n1+j*lda+1,&inc,&tau);
633: /* Apply reflector */
634: if ( PetscAbsScalar(tau) != 0.0 ) {
635: t=*( A+n-n1+j*lda); *(A+n-n1+j*lda)=1.0;
636: LAPACKlarf_("R",&m,&n1,A+n-n1+j*lda,&inc,&tau,A+j+1+(n-n1)*lda,&lda_,work+nwu);
637: LAPACKlarf_("L",&n1,&m,A+n-n1+j*lda,&inc,&tau,A+n-n1+(j+1)*lda,&lda_,work+nwu);
638: /* Update Q */
639: LAPACKlarf_("R",&n_,&n1,A+n-n1+j*lda,&inc,&tau,Q+(n-n1)*ldq,&ldq_,work+nwu);
640: *(A+n-n1+j*lda) = t;
641: for (i=1;i<n1;i++) {
642: *(A+n-n1+i+j*lda) = 0.0; *(A+j+(n-n1+i)*lda) = 0.0;
643: }
644: *(A+j+(n-n1)*lda) = *(A+n-n1+j*lda);
645: }
646: }
647: /* Hyperbolic rotation */
648: if ( n0 > 0 && n1 > 0) {
649: HRGen(PetscRealPart(A[ni-n0+j*lda]),PetscRealPart(A[n-n1+j*lda]),&type,&cs,&sn,&r,&cond);
650: /* Check condition number */
651: if (cond > 1.0/(10*PETSC_SQRT_MACHINE_EPSILON)) {
652: breakdown = PETSC_TRUE;
653: k++;
654: if (k==n || flip)
655: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Breakdown in construction of hyperbolic transformation");
656: break;
657: }
658: A[ni-n0+j*lda] = r; A[n-n1+j*lda] = 0.0;
659: A[j+(ni-n0)*lda] = r; A[j+(n-n1)*lda] = 0.0;
660: /* Apply to A */
661: HRApply(m, A+j+1+(ni-n0)*lda,1, A+j+1+(n-n1)*lda,1, cs, -sn);
662: HRApply(m, A+ni-n0+(j+1)*lda,lda, A+n-n1+(j+1)*lda,lda, cs, -sn);
663:
664: /* Update Q */
665: HRApply(n, Q+(ni-n0)*ldq,1, Q+(n-n1)*ldq,1, cs, -sn);
666: if (type==2) {
667: ss[ni-n0] = -ss[ni-n0]; ss[n-n1] = -ss[n-n1];
668: n0++;ni++;n1--;
669: }
670: }
671: if (n0>0) n0--;else n1--;
672: }
673: }
675: /* flip matrices */
676: if (flip) {
677: for (i=0;i<n-1;i++) {
678: d[i] = PetscRealPart(A[n-i-1+(n-i-1)*lda]);
679: e[i] = PetscRealPart(A[n-i-1+(n-i-2)*lda]);
680: s[i] = ss[n-i-1];
681: }
682: s[n-1] = ss[0];
683: d[n-1] = PetscRealPart(A[0]);
684: for (i=0;i<n;i++) {
685: ierr=PetscMemcpy(work+i*n,Q+i*ldq,n*sizeof(PetscScalar));
686: }
687: for (i=0;i<n;i++)
688: for (j=0;j<n;j++)
689: Q[i+j*ldq] = work[i+(n-j-1)*n];
690: } else {
691: for (i=0;i<n-1;i++) {
692: d[i] = PetscRealPart(A[i+i*lda]);
693: e[i] = PetscRealPart(A[i+1+i*lda]);
694: s[i] = ss[i];
695: }
696: s[n-1] = ss[n-1];
697: d[n-1] = PetscRealPart(A[n-1 + (n-1)*lda]);
698: }
700: PetscFree(ss);
701: PetscFree(perm);
702: return(0);
703: #endif
704: }
708: /*
709: compute x = x - y*ss^{-1}*y^T*s*x where ss=y^T*s*y
710: s diagonal (signature matrix)
711: */
712: static PetscErrorCode IndefOrthog(PetscReal *s, PetscScalar *y, PetscReal ss, PetscScalar *x, PetscScalar *h,PetscInt n)
713: {
714: PetscInt i;
715: PetscScalar h_,r;
718: if (y) {
719: h_ = 0.0; /* h_=(y^Tdiag(s)*y)^{-1}*y^T*diag(s)*x*/
720: for (i=0;i<n;i++) { h_+=y[i]*s[i]*x[i];}
721: h_ /= ss;
722: for (i=0;i<n;i++) {x[i] -= h_*y[i];} /* x = x-h_*y */
723: /* repeat */
724: r = 0.0;
725: for (i=0;i<n;i++) { r+=y[i]*s[i]*x[i];}
726: r /= ss;
727: for (i=0;i<n;i++) {x[i] -= r*y[i];}
728: h_ += r;
729: }else h_ = 0.0;
730: if (h) *h = h_;
731: return(0);
732: }
736: /*
737: normalization with a indefinite norm
738: */
739: static PetscErrorCode IndefNorm(PetscReal *s,PetscScalar *x, PetscReal *norm,PetscInt n)
740: {
741: PetscInt i;
742: PetscReal norm_;
745: /* s-normalization */
746: norm_ = 0.0;
747: for (i=0;i<n;i++) {norm_ += PetscRealPart(x[i]*s[i]*x[i]);}
748: if (norm_<0) {norm_ = -PetscSqrtReal(-norm_);}
749: else {norm_ = PetscSqrtReal(norm_);}
750: for (i=0;i<n;i++)x[i] /= norm_;
751: if (norm) *norm = norm_;
752: return(0);
753: }
757: static PetscErrorCode DSEigenVectorsPseudoOrthog(DS ds, DSMatType mat, PetscScalar *wr, PetscScalar *wi,PetscBool accum)
758: {
760: PetscInt i,j,k,off;
761: PetscBLASInt ld,n1,one=1;
762: PetscScalar PQ[4],xx,yx,xy,yy,*y,h,oneS=1.0,zeroS=0.0,*X,*W,*B;
763: PetscReal *ss,*s,*d,*e,d1,d2,toldeg=PETSC_SQRT_MACHINE_EPSILON*100,vi,vj;
766: ld = PetscBLASIntCast(ds->ld);
767: n1 = PetscBLASIntCast(ds->n - ds->l);
768: DSAllocateWork_Private(ds,ld*ld+2*ld,ld,2*ld);
769: s = ds->rmat[DS_MAT_D];
770: d = ds->rmat[DS_MAT_T];
771: e = d + ld;
772: off = ds->l+ds->l*ld;
773: if (!ds->compact) {
774: B = ds->mat[DS_MAT_B];
775: for (i=ds->l;i<ds->n;i++) {
776: s[i] = PetscRealPart(B[i+i*ld]);
777: }
778: }
780: /* compute real s-orthonormal base */
781: X = ds->mat[mat];
782: ss = ds->rwork;
783: y = ds->work;
785: #if defined(PETSC_USE_COMPLEX)
786: /* with complex scalars we need to operate as in real scalar */
787: for (i=ds->l;i<ds->n;i++) {
788: if (PetscImaginaryPart(wr[i])!=0.0) {
789: for (j=ds->l;j<ds->n;j++) {
790: X[j+(i+1)*ld] = PetscImaginaryPart(X[j+i*ld]);
791: X[j+i*ld] = PetscRealPart(X[j+i*ld]);
792: }
793: i++;
794: }
795: }
796: #endif
798: for (i=ds->l;i<ds->n;i++) {
799: #if defined(PETSC_USE_COMPLEX)
800: vi = PetscImaginaryPart(wr[i]);
801: #else
802: vi = PetscRealPart(wi[i]);
803: #endif
804: if (vi==0.0) { /* real */
805: for (j=ds->l;j<i;j++) {
806: #if defined(PETSC_USE_COMPLEX)
807: vj = PetscImaginaryPart(wr[j]);
808: #else
809: vj = PetscRealPart(wi[j]);
810: #endif
811: /* s-orthogonalization with close eigenvalues */
812: if (vj==0.0) {
813: if ( PetscAbsScalar(wr[j]-wr[i])<toldeg) {
814: IndefOrthog(s+ds->l, X+j*ld+ds->l, ss[j],X+i*ld+ds->l, PETSC_NULL,n1);
815: }
816: }else j++;
817: }
818: IndefNorm(s+ds->l,X+i*ld+ds->l,&d1,n1);
819: ss[i] = (d1<0.0)?-1:1;
820: d[i] = PetscRealPart(wr[i]*ss[i]); e[i] = 0.0;
821: } else { /* complex value */
822: for (j=ds->l;j<i;j++) {
823: /* s-orthogonalization of Xi and Xi+1*/
824: #if defined(PETSC_USE_COMPLEX)
825: vj = PetscImaginaryPart(wr[j]);
826: #else
827: vj = PetscRealPart(wi[j]);
828: #endif
829: if (vj!=0.0) {
830: if (PetscAbsScalar(wr[j]-wr[i])<toldeg && PetscAbsScalar(PetscAbsReal(vj)-PetscAbsReal(vi))<toldeg) {
831: for (k=ds->l;k<ds->n;k++) y[k] = s[k]*X[k+i*ld];
832: xx = BLASdot_(&n1,X+ds->l+j*ld,&one,y+ds->l,&one);
833: yx = BLASdot_(&n1,X+ds->l+(j+1)*ld,&one,y+ds->l,&one);
834: for (k=ds->l;k<ds->n;k++) y[k] = s[k]*X[k+(i+1)*ld];
835: xy = BLASdot_(&n1,X+ds->l+j*ld,&one,y+ds->l,&one);
836: yy = BLASdot_(&n1,X+ds->l+(j+1)*ld,&one,y+ds->l,&one);
837: PQ[0] = ss[j]*xx; PQ[1] = ss[j+1]*yx; PQ[2] = ss[j]*xy; PQ[3] = ss[j+1]*yy;
838: for (k=ds->l;k<ds->n;k++) {
839: X[k+i*ld] -= PQ[0]*X[k+j*ld]+PQ[1]*X[k+(j+1)*ld];
840: X[k+(i+1)*ld] -= PQ[2]*X[k+j*ld]+PQ[3]*X[k+(j+1)*ld];
841: }
842: /* Repeat */
843: for (k=ds->l;k<ds->n;k++) y[k] = s[k]*X[k+i*ld];
844: xx = BLASdot_(&n1,X+ds->l+j*ld,&one,y+ds->l,&one);
845: yx = BLASdot_(&n1,X+ds->l+(j+1)*ld,&one,y+ds->l,&one);
846: for (k=ds->l;k<ds->n;k++) y[k] = s[k]*X[k+(i+1)*ld];
847: xy = BLASdot_(&n1,X+ds->l+j*ld,&one,y+ds->l,&one);
848: yy = BLASdot_(&n1,X+ds->l+(j+1)*ld,&one,y+ds->l,&one);
849: PQ[0] = ss[j]*xx; PQ[1] = ss[j+1]*yx; PQ[2] = ss[j]*xy; PQ[3] = ss[j+1]*yy;
850: for (k=ds->l;k<ds->n;k++) {
851: X[k+i*ld] -= PQ[0]*X[k+j*ld]+PQ[1]*X[k+(j+1)*ld];
852: X[k+(i+1)*ld] -= PQ[2]*X[k+j*ld]+PQ[3]*X[k+(j+1)*ld];
853: }
854: }
855: j++;
856: }
857: }
858: IndefNorm(s+ds->l,X+i*ld+ds->l,&d1,n1);
859: ss[i] = (d1<0)?-1:1;
860: IndefOrthog(s+ds->l, X+i*ld+ds->l, ss[i],X+(i+1)*ld+ds->l, &h,n1);
861: IndefNorm(s+ds->l,X+(i+1)*ld+ds->l,&d2,n1);
862: ss[i+1] = (d2<0)?-1:1;
863: d[i] = (PetscRealPart(wr[i]-vi*h/d1))*ss[i];
864: d[i+1] = (PetscRealPart(wr[i]+vi*h/d1))*ss[i+1];
865: e[i] = vi*d2/d1*ss[i]; e[i+1] = 0.0;
866: i++;
867: }
868: }
869: for (i=ds->l;i<ds->n;i++) s[i] = ss[i];
870: /* accumulate previous Q */
871: if (accum && mat!=DS_MAT_Q) {
872: DSAllocateMat_Private(ds,DS_MAT_W);
873: W = ds->mat[DS_MAT_W];
874: DSCopyMatrix_Private(ds,DS_MAT_W,DS_MAT_Q);
875: BLASgemm_("N","N",&n1,&n1,&n1,&oneS,W+off,&ld,ds->mat[DS_MAT_X]+off,&ld,&zeroS,ds->mat[DS_MAT_Q]+off,&ld);
876: }
877: if (!ds->compact) {DSSwitchFormat_GHIEP(ds,PETSC_FALSE);}
878: return(0);
879: }
883: /*
884: Get eigenvectors with inverse iteration.
885: The system matrix is in Hessenberg form.
886: */
887: PetscErrorCode DSGHIEPPseudoOrthogInverseIteration(DS ds,PetscScalar *wr,PetscScalar *wi)
888: {
889: #if defined(PETSC_MISSING_LAPACK_HSEIN)
891: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"HSEIN - Lapack routine is unavailable");
892: #else
894: PetscInt i,off;
895: PetscBLASInt *select,*infoC,ld,n1,mout,info;
896: PetscScalar *A,*B,*H,*X;
897: PetscReal *s,*d,*e;
900: ld = PetscBLASIntCast(ds->ld);
901: n1 = PetscBLASIntCast(ds->n - ds->l);
902: DSAllocateWork_Private(ds,ld*ld+2*ld,ld,2*ld);
903: DSAllocateMat_Private(ds,DS_MAT_W);
904: A = ds->mat[DS_MAT_A];
905: B = ds->mat[DS_MAT_B];
906: H = ds->mat[DS_MAT_W];
907: s = ds->rmat[DS_MAT_D];
908: d = ds->rmat[DS_MAT_T];
909: e = d + ld;
910: select = ds->iwork;
911: infoC = ds->iwork + ld;
912: off = ds->l+ds->l*ld;
913: if (ds->compact) {
914: H[off] = d[ds->l]*s[ds->l];
915: H[off+ld] = e[ds->l]*s[ds->l];
916: for (i=ds->l+1;i<ds->n-1;i++) {
917: H[i+(i-1)*ld] = e[i-1]*s[i];
918: H[i+i*ld] = d[i]*s[i];
919: H[i+(i+1)*ld] = e[i]*s[i];
920: }
921: H[ds->n-1+(ds->n-2)*ld] = e[ds->n-2]*s[ds->n-1];
922: H[ds->n-1+(ds->n-1)*ld] = d[ds->n-1]*s[ds->n-1];
923: } else {
924: s[ds->l] = PetscRealPart(B[off]);
925: H[off] = A[off]*s[ds->l];
926: H[off+ld] = A[off+ld]*s[ds->l];
927: for (i=ds->l+1;i<ds->n-1;i++) {
928: s[i] = PetscRealPart(B[i+i*ld]);
929: H[i+(i-1)*ld] = A[i+(i-1)*ld]*s[i];
930: H[i+i*ld] = A[i+i*ld]*s[i];
931: H[i+(i+1)*ld] = A[i+(i+1)*ld]*s[i];
932: }
933: s[ds->n-1] = PetscRealPart(B[ds->n-1+(ds->n-1)*ld]);
934: H[ds->n-1+(ds->n-2)*ld] = A[ds->n-1+(ds->n-2)*ld]*s[ds->n-1];
935: H[ds->n-1+(ds->n-1)*ld] = A[ds->n-1+(ds->n-1)*ld]*s[ds->n-1];
936: }
937: DSAllocateMat_Private(ds,DS_MAT_X);
938: X = ds->mat[DS_MAT_X];
939: for (i=0;i<n1;i++)select[i]=1;
940: #if !defined(PETSC_USE_COMPLEX)
941: LAPACKhsein_("R","N","N",select,&n1,H+off,&ld,wr+ds->l,wi+ds->l,PETSC_NULL,&ld,X+off,&ld,&n1,&mout,ds->work,PETSC_NULL,infoC,&info);
942: #else
943: LAPACKhsein_("R","N","N",select,&n1,H+off,&ld,wr+ds->l,PETSC_NULL,&ld,X+off,&ld,&n1,&mout,ds->work,ds->rwork,PETSC_NULL,infoC,&info);
944: #endif
945: if (info<0)SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in hsein routine %d",-i);
946: if (info>0) {
947: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Convergence error in hsein routine %d",i);
948: }
950: DSEigenVectorsPseudoOrthog(ds, DS_MAT_X, wr, wi,PETSC_TRUE);
951: return(0);
952: #endif
953: }
957: /*
958: Reduce to tridiagonal-diagonal pair by means of TridiagDiag_HHR.
959: */
960: PetscErrorCode DSIntermediate_GHIEP(DS ds)
961: {
963: PetscInt i,ld,off;
964: PetscScalar *A,*B,*Q;
965: PetscReal *d,*e,*s;
968: ld = ds->ld;
969: A = ds->mat[DS_MAT_A];
970: B = ds->mat[DS_MAT_B];
971: Q = ds->mat[DS_MAT_Q];
972: d = ds->rmat[DS_MAT_T];
973: e = ds->rmat[DS_MAT_T]+ld;
974: s = ds->rmat[DS_MAT_D];
975: off = ds->l+ds->l*ld;
976: PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
977: DSAllocateWork_Private(ds,ld*ld,0,0);
979: for (i=0;i<ds->n;i++) Q[i+i*ld]=1.0;
980: for (i=0;i<ds->n-ds->l;i++) *(ds->perm+i)=i;
981: if (ds->compact) {
982: if (ds->state < DS_STATE_INTERMEDIATE) {
983: DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
984: TridiagDiag_HHR(ds->k-ds->l+1,A+off,ld,s+ds->l,Q+off,ld,PETSC_TRUE,d+ds->l,e+ds->l,ds->perm,ds->work,ld*ld);
985: ds->k = ds->l;
986: PetscMemzero(d+2*ld+ds->l,(ds->n-ds->l)*sizeof(PetscReal));
987: }
988: } else {
989: if (ds->state < DS_STATE_INTERMEDIATE) {
990: for (i=0;i<ds->n;i++)
991: s[i] = PetscRealPart(B[i+i*ld]);
992: TridiagDiag_HHR(ds->n-ds->l,A+off,ld,s+ds->l,Q+off,ld,PETSC_FALSE,d+ds->l,e+ds->l,ds->perm,ds->work,ld*ld);
993: PetscMemzero(d+2*ld,(ds->n)*sizeof(PetscReal));
994: ds->k = ds->l;
995: DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
996: } else { DSSwitchFormat_GHIEP(ds,PETSC_TRUE); }
997: }
998: return(0);
999: }
1003: /*
1004: Undo 2x2 blocks that have real eigenvalues.
1005: */
1006: PetscErrorCode DSGHIEPRealBlocks(DS ds)
1007: {
1009: PetscInt i;
1010: PetscReal e,d1,d2,s1,s2,ss1,ss2,t,dd,ss;
1011: PetscReal maxy,ep,scal1,scal2,snorm;
1012: PetscReal *T,*D,b[4],M[4],wr1,wr2,wi;
1013: PetscScalar *A,*B,Y[4],oneS = 1.0,zeroS = 0.0;
1014: PetscBLASInt m,two=2,ld;
1015: PetscBool isreal;
1018: ld = PetscBLASIntCast(ds->ld);
1019: m = PetscBLASIntCast(ds->n-ds->l);
1020: A = ds->mat[DS_MAT_A];
1021: B = ds->mat[DS_MAT_B];
1022: T = ds->rmat[DS_MAT_T];
1023: D = ds->rmat[DS_MAT_D];
1024: DSAllocateWork_Private(ds,2*m,0,0);
1025: for (i=ds->l;i<ds->n-1;i++) {
1026: e = (ds->compact)?T[ld+i]:PetscRealPart(A[(i+1)+ld*i]);
1027: if (e != 0.0) { /* 2x2 block */
1028: if (ds->compact) {
1029: s1 = D[i];
1030: d1 = T[i];
1031: s2 = D[i+1];
1032: d2 = T[i+1];
1033: } else {
1034: s1 = PetscRealPart(B[i*ld+i]);
1035: d1 = PetscRealPart(A[i*ld+i]);
1036: s2 = PetscRealPart(B[(i+1)*ld+i+1]);
1037: d2 = PetscRealPart(A[(i+1)*ld+i+1]);
1038: }
1039: isreal = PETSC_FALSE;
1040: if (s1==s2) { /* apply a Jacobi rotation to compute the eigendecomposition */
1041: dd = d1-d2;
1042: if (2*PetscAbsReal(e) <= dd) {
1043: t = 2*e/dd;
1044: t = t/(1 + PetscSqrtReal(1+t*t));
1045: } else {
1046: t = dd/(2*e);
1047: ss = (t>=0)?1.0:-1.0;
1048: t = ss/(PetscAbsReal(t)+PetscSqrtReal(1+t*t));
1049: }
1050: Y[0] = 1/PetscSqrtReal(1 + t*t); Y[3] = Y[0]; /* c */
1051: Y[1] = Y[0]*t; Y[2] = -Y[1]; /* s */
1052: wr1 = d1+t*e;
1053: wr2 = d2-t*e;
1054: ss1 = s1; ss2 = s2;
1055: isreal = PETSC_TRUE;
1056: } else {
1057: ss1 = 1.0; ss2 = 1.0,
1058: M[0] = d1; M[1] = e; M[2] = e; M[3]= d2;
1059: b[0] = s1; b[1] = 0.0; b[2] = 0.0; b[3] = s2;
1060: ep = LAPACKlamch_("S");
1061: /* Compute eigenvalues of the block */
1062: LAPACKlag2_(M, &two, b, &two,&ep , &scal1, &scal2, &wr1, &wr2, &wi);
1063: if (wi==0.0) { /* Real eigenvalues */
1064: isreal = PETSC_TRUE;
1065: if (scal1<ep||scal2<ep) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FP,"Nearly infinite eigenvalue");
1066: wr1 /= scal1; wr2 /= scal2;
1067: if ( PetscAbsReal(s1*d1-wr1)<PetscAbsReal(s2*d2-wr1)) { Y[0] = wr1-s2*d2; Y[1] =s2*e;}
1068: else{ Y[0] = s1*e; Y[1] = wr1-s1*d1; }
1069: /* normalize with a signature*/
1070: maxy = PetscMax(PetscAbsScalar(Y[0]),PetscAbsScalar(Y[1]));
1071: scal1 = PetscRealPart(Y[0])/maxy; scal2 = PetscRealPart(Y[1])/maxy;
1072: snorm = scal1*scal1*s1 + scal2*scal2*s2;
1073: if (snorm<0) {ss1 = -1.0; snorm = -snorm;}
1074: snorm = maxy*PetscSqrtReal(snorm); Y[0] = Y[0]/snorm; Y[1] = Y[1]/snorm;
1075: if ( PetscAbsReal(s1*d1-wr2)<PetscAbsReal(s2*d2-wr2)) { Y[2] = wr2-s2*d2; Y[3] =s2*e;}
1076: else{ Y[2] = s1*e; Y[3] = wr2-s1*d1; }
1077: maxy = PetscMax(PetscAbsScalar(Y[2]),PetscAbsScalar(Y[3]));
1078: scal1 = PetscRealPart(Y[2])/maxy; scal2 = PetscRealPart(Y[3])/maxy;
1079: snorm = scal1*scal1*s1 + scal2*scal2*s2;
1080: if (snorm<0) {ss2 = -1.0; snorm = -snorm;}
1081: snorm = maxy*PetscSqrtReal(snorm);Y[2] = Y[2]/snorm; Y[3] = Y[3]/snorm;
1082: }
1083: wr1 *= ss1; wr2 *= ss2;
1084: }
1085: if (isreal) {
1086: if (ds->compact) {
1087: D[i] = ss1;;
1088: T[i] = wr1;
1089: D[i+1] = ss2;
1090: T[i+1] = wr2;
1091: T[ld+i] = 0.0;
1092: }else {
1093: B[i*ld+i] = ss1;
1094: A[i*ld+i] = wr1;
1095: B[(i+1)*ld+i+1] = ss2;
1096: A[(i+1)*ld+i+1] = wr2;
1097: A[(i+1)+ld*i] = 0.0;
1098: A[i+ld*(i+1)] = 0.0;
1099: }
1100: BLASgemm_("N","N",&m,&two,&two,&oneS,ds->mat[DS_MAT_Q]+ds->l+i*ld,&ld,Y,&two,&zeroS,ds->work,&m);
1101: PetscMemcpy(ds->mat[DS_MAT_Q]+ds->l+i*ld,ds->work,m*sizeof(PetscScalar));
1102: PetscMemcpy(ds->mat[DS_MAT_Q]+ds->l+(i+1)*ld,ds->work+m,m*sizeof(PetscScalar));
1103: }
1104: i++;
1105: }
1106: }
1107: return(0);
1108: }
1112: PetscErrorCode DSSolve_GHIEP_QR_II(DS ds,PetscScalar *wr,PetscScalar *wi)
1113: {
1114: #if defined(PETSC_MISSING_LAPACK_HSEQR)
1116: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"HSEQR - Lapack routine is unavailable");
1117: #else
1119: PetscInt i,off;
1120: PetscBLASInt n1,ld,one,info,lwork;
1121: PetscScalar *H,*A,*B,*Q,*work;
1122: PetscReal *d,*e,*s;
1125: #if !defined(PETSC_USE_COMPLEX)
1127: #endif
1128: one = 1;
1129: n1 = PetscBLASIntCast(ds->n - ds->l);
1130: ld = PetscBLASIntCast(ds->ld);
1131: off = ds->l + ds->l*ld;
1132: A = ds->mat[DS_MAT_A];
1133: B = ds->mat[DS_MAT_B];
1134: Q = ds->mat[DS_MAT_Q];
1135: d = ds->rmat[DS_MAT_T];
1136: e = ds->rmat[DS_MAT_T] + ld;
1137: s = ds->rmat[DS_MAT_D];
1138: DSAllocateWork_Private(ds,ld*ld,2*ld,ld*2);
1139: work = ds->work;
1140: lwork = ld*ld;
1142: /* Quick return if possible */
1143: if (n1 == 1) {
1144: *(Q+off) = 1;
1145: if (!ds->compact) {
1146: d[ds->l] = PetscRealPart(A[off]);
1147: s[ds->l] = PetscRealPart(B[off]);
1148: }
1149: wr[ds->l] = d[ds->l]/s[ds->l];
1150: if (wi) wi[ds->l] = 0.0;
1151: return(0);
1152: }
1153: /* Reduce to pseudotriadiagonal form */
1154: DSIntermediate_GHIEP( ds);
1156: /* Compute Eigenvalues (QR)*/
1157: DSAllocateMat_Private(ds,DS_MAT_W);
1158: H = ds->mat[DS_MAT_W];
1159: if (ds->compact) {
1160: H[off] = d[ds->l]*s[ds->l];
1161: H[off+ld] = e[ds->l]*s[ds->l];
1162: for (i=ds->l+1;i<ds->n-1;i++) {
1163: H[i+(i-1)*ld] = e[i-1]*s[i];
1164: H[i+i*ld] = d[i]*s[i];
1165: H[i+(i+1)*ld] = e[i]*s[i];
1166: }
1167: H[ds->n-1+(ds->n-2)*ld] = e[ds->n-2]*s[ds->n-1];
1168: H[ds->n-1+(ds->n-1)*ld] = d[ds->n-1]*s[ds->n-1];
1169: } else {
1170: s[ds->l] = PetscRealPart(B[off]);
1171: H[off] = A[off]*s[ds->l];
1172: H[off+ld] = A[off+ld]*s[ds->l];
1173: for (i=ds->l+1;i<ds->n-1;i++) {
1174: s[i] = PetscRealPart(B[i+i*ld]);
1175: H[i+(i-1)*ld] = A[i+(i-1)*ld]*s[i];
1176: H[i+i*ld] = A[i+i*ld]*s[i];
1177: H[i+(i+1)*ld] = A[i+(i+1)*ld]*s[i];
1178: }
1179: s[ds->n-1] = PetscRealPart(B[ds->n-1+(ds->n-1)*ld]);
1180: H[ds->n-1+(ds->n-2)*ld] = A[ds->n-1+(ds->n-2)*ld]*s[ds->n-1];
1181: H[ds->n-1+(ds->n-1)*ld] = A[ds->n-1+(ds->n-1)*ld]*s[ds->n-1];
1182: }
1184: #if !defined(PETSC_USE_COMPLEX)
1185: LAPACKhseqr_("E","N",&n1,&one,&n1,H+off,&ld,wr+ds->l,wi+ds->l,PETSC_NULL,&ld,work,&lwork,&info);
1186: #else
1187: LAPACKhseqr_("E","N",&n1,&one,&n1,H+off,&ld,wr+ds->l,PETSC_NULL,&ld,work,&lwork,&info);
1188: #endif
1189: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xHSEQR %d",&info);
1191: /* Compute Eigenvectors with Inverse Iteration */
1192: DSGHIEPPseudoOrthogInverseIteration(ds,wr,wi);
1194: /* Recover eigenvalues from diagonal */
1195: DSGHIEPComplexEigs(ds, 0, ds->l, wr, wi);
1196: #if defined(PETSC_USE_COMPLEX)
1197: if (wi) {
1198: for (i=ds->l;i<ds->n;i++) wi[i] = 0.0;
1199: }
1200: #endif
1201: return(0);
1202: #endif
1203: }
1207: PetscErrorCode DSSolve_GHIEP_QR(DS ds,PetscScalar *wr,PetscScalar *wi)
1208: {
1209: #if defined(SLEPC_MISSING_LAPACK_GEHRD) || defined(SLEPC_MISSING_LAPACK_ORGHR) || defined(PETSC_MISSING_LAPACK_HSEQR)
1211: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEHRD/ORGHR/HSEQR - Lapack routines are unavailable");
1212: #else
1214: PetscInt i,j,off;
1215: PetscBLASInt lwork,info,n1,one=1,mout,ld;
1216: PetscScalar *A,*B,*H,*Q,*work,*tau;
1217: PetscReal *d,*e,*s;
1220: #if !defined(PETSC_USE_COMPLEX)
1222: #endif
1223: n1 = PetscBLASIntCast(ds->n - ds->l);
1224: ld = PetscBLASIntCast(ds->ld);
1225: off = ds->l + ds->l*ld;
1226: A = ds->mat[DS_MAT_A];
1227: B = ds->mat[DS_MAT_B];
1228: Q = ds->mat[DS_MAT_Q];
1229: d = ds->rmat[DS_MAT_T];
1230: e = ds->rmat[DS_MAT_T] + ld;
1231: s = ds->rmat[DS_MAT_D];
1232: DSAllocateMat_Private(ds,DS_MAT_W);
1233: H = ds->mat[DS_MAT_W];
1234: DSAllocateWork_Private(ds,ld+ld*ld,ld,0);
1235: tau = ds->work;
1236: work = ds->work+ld;
1237: lwork = ld*ld;
1239: /* initialize orthogonal matrix */
1240: PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
1241: for (i=0;i< ds->n;i++)
1242: Q[i+i*ld] = 1.0;
1243: /* quick return */
1244: if (n1 == 1) {
1245: if (!ds->compact) {
1246: d[ds->l] = PetscRealPart(A[off]);
1247: s[ds->l] = PetscRealPart(B[off]);
1248: }
1249: wr[ds->l] = d[ds->l]/s[ds->l];
1250: if (wi) wi[ds->l] = 0.0;
1251: return(0);
1252: }
1254: /* form standard problem in H */
1255: if (ds->compact) {
1256: PetscMemzero(H,ld*ld*sizeof(PetscScalar));
1257: for (i=ds->l; i < ds->n-1; i++) {
1258: H[i+i*ld] = d[i]/s[i];
1259: H[(i+1)+i*ld] = e[i]/s[i+1];
1260: H[i+(i+1)*ld] = e[i]/s[i];
1261: }
1262: H[ds->n-1 + (ds->n-1)*ld] = d[ds->n-1]/s[ds->n-1];
1264: for (i=ds->l; i < ds->k; i++) {
1265: H[ds->k+i*ld] = *(ds->rmat[DS_MAT_T]+2*ld+i)/s[ds->k];
1266: H[i+ds->k*ld] = *(ds->rmat[DS_MAT_T]+2*ld+i)/s[i];
1267: }
1268: } else {
1269: for (j=ds->l; j<ds->n; j++) {
1270: for (i=ds->l; i<ds->n; i++) {
1271: H[i+j*ld] = A[i+j*ld]/B[i+i*ld];
1272: }
1273: }
1274: }
1275: /* reduce to upper Hessenberg form */
1276: if (ds->state<DS_STATE_INTERMEDIATE) {
1277: LAPACKgehrd_(&n1,&one,&n1,H+off,&ld,tau,work,&lwork,&info);
1278: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEHRD %d",&info);
1279: for (j=ds->l;j<ds->n-1;j++) {
1280: for (i=j+2;i<ds->n;i++) {
1281: Q[i+j*ld] = H[i+j*ld];
1282: H[i+j*ld] = 0.0;
1283: }
1284: }
1285: LAPACKorghr_(&n1,&one,&n1,Q+off,&ld,tau,work,&lwork,&info);
1286: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGHR %d",&info);
1287: }
1289: /* Compute the real Schur form */
1290: #if !defined(PETSC_USE_COMPLEX)
1291: LAPACKhseqr_("S","V",&n1,&one,&n1,H+off,&ld,wr+ds->l,wi+ds->l,Q+off,&ld,work,&lwork,&info);
1292: #else
1293: LAPACKhseqr_("S","V",&n1,&one,&n1,H+off,&ld,wr+ds->l,Q+off,&ld,work,&lwork,&info);
1294: #endif
1295: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xHSEQR %d",&info);
1296:
1297: /* Compute eigenvectors */
1298: #if !defined(PETSC_USE_COMPLEX)
1299: LAPACKtrevc_("R","B",PETSC_NULL,&n1,H+off,&ld,PETSC_NULL,&ld,Q+off,&ld,&n1,&mout,ds->work,&info);
1300: #else
1301: LAPACKtrevc_("R","B",PETSC_NULL,&n1,H+off,&ld,PETSC_NULL,&ld,Q+off,&ld,&n1,&mout,work,ds->rwork,&info);
1302: #endif
1303: if (info) SETERRQ1(((PetscObject)ds)->comm,PETSC_ERR_LIB,"Error in Lapack xTREVC %i",&info);
1305: /* Compute real s-orthonormal basis */
1306: DSEigenVectorsPseudoOrthog(ds, DS_MAT_Q, wr, wi,PETSC_FALSE);
1308: /* Undo from diagonal the blocks whith real eigenvalues*/
1309: DSGHIEPRealBlocks(ds);
1311: /* Recover eigenvalues from diagonal */
1312: DSGHIEPComplexEigs(ds, 0, ds->l, wr, wi);
1313: #if defined(PETSC_USE_COMPLEX)
1314: if (wi) {
1315: for (i=ds->l;i<ds->n;i++) wi[i] = 0.0;
1316: }
1317: #endif
1318: return(0);
1319: #endif
1320: }
1324: PetscErrorCode DSNormalize_GHIEP(DS ds,DSMatType mat,PetscInt col)
1325: {
1327: PetscInt i,i0,i1;
1328: PetscBLASInt ld,n,one = 1;
1329: PetscScalar *A = ds->mat[DS_MAT_A],norm,*x;
1330: #if !defined(PETSC_USE_COMPLEX)
1331: PetscScalar norm0;
1332: #endif
1335: switch (mat) {
1336: case DS_MAT_X:
1337: case DS_MAT_Y:
1338: case DS_MAT_Q:
1339: /* Supported matrices */
1340: break;
1341: case DS_MAT_U:
1342: case DS_MAT_VT:
1343: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
1344: break;
1345: default:
1346: SETERRQ(((PetscObject)ds)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
1347: }
1349: n = PetscBLASIntCast(ds->n);
1350: ld = PetscBLASIntCast(ds->ld);
1351: DSGetArray(ds,mat,&x);
1352: if (col < 0) {
1353: i0 = 0; i1 = ds->n;
1354: } else if (col>0 && A[ds->ld*(col-1)+col] != 0.0) {
1355: i0 = col-1; i1 = col+1;
1356: } else {
1357: i0 = col; i1 = col+1;
1358: }
1359: for (i=i0; i<i1; i++) {
1360: #if !defined(PETSC_USE_COMPLEX)
1361: if (i<n-1 && A[ds->ld*i+i+1] != 0.0) {
1362: norm = BLASnrm2_(&n,&x[ld*i],&one);
1363: norm0 = BLASnrm2_(&n,&x[ld*(i+1)],&one);
1364: norm = 1.0/SlepcAbsEigenvalue(norm,norm0);
1365: BLASscal_(&n,&norm,&x[ld*i],&one);
1366: BLASscal_(&n,&norm,&x[ld*(i+1)],&one);
1367: i++;
1368: } else
1369: #endif
1370: {
1371: norm = BLASnrm2_(&n,&x[ld*i],&one);
1372: norm = 1.0/norm;
1373: BLASscal_(&n,&norm,&x[ld*i],&one);
1374: }
1375: }
1376: return(0);
1377: }
1379: extern PetscErrorCode DSSolve_GHIEP_HZ(DS,PetscScalar*,PetscScalar*);
1380: extern PetscErrorCode DSSolve_GHIEP_DQDS_II(DS,PetscScalar*,PetscScalar*);
1382: EXTERN_C_BEGIN
1385: PetscErrorCode DSCreate_GHIEP(DS ds)
1386: {
1388: ds->ops->allocate = DSAllocate_GHIEP;
1389: ds->ops->view = DSView_GHIEP;
1390: ds->ops->vectors = DSVectors_GHIEP;
1391: ds->ops->solve[0] = DSSolve_GHIEP_HZ;
1392: ds->ops->solve[1] = DSSolve_GHIEP_QR_II;
1393: ds->ops->solve[2] = DSSolve_GHIEP_QR;
1394: ds->ops->solve[3] = DSSolve_GHIEP_DQDS_II;
1395: ds->ops->sort = DSSort_GHIEP;
1396: ds->ops->normalize = DSNormalize_GHIEP;
1397: return(0);
1398: }
1399: EXTERN_C_END