Actual source code: dsghiep.c
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2012, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7:
8: SLEPc is free software: you can redistribute it and/or modify it under the
9: terms of version 3 of the GNU Lesser General Public License as published by
10: the Free Software Foundation.
12: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
13: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
15: more details.
17: You should have received a copy of the GNU Lesser General Public License
18: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
19: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
20: */
21: #include <slepc-private/dsimpl.h> /*I "slepcds.h" I*/
22: #include <slepcblaslapack.h>
26: PetscErrorCode DSAllocate_GHIEP(DS ds,PetscInt ld)
27: {
31: DSAllocateMat_Private(ds,DS_MAT_A);
32: DSAllocateMat_Private(ds,DS_MAT_B);
33: DSAllocateMat_Private(ds,DS_MAT_Q);
34: DSAllocateMatReal_Private(ds,DS_MAT_T);
35: DSAllocateMatReal_Private(ds,DS_MAT_D);
36: PetscFree(ds->perm);
37: PetscMalloc(ld*sizeof(PetscInt),&ds->perm);
38: PetscLogObjectMemory(ds,ld*sizeof(PetscInt));
39: return(0);
40: }
44: PetscErrorCode DSSwitchFormat_GHIEP(DS ds,PetscBool tocompact)
45: {
47: PetscReal *T,*S;
48: PetscScalar *A,*B;
49: PetscInt i,n,ld;
52: A = ds->mat[DS_MAT_A];
53: B = ds->mat[DS_MAT_B];
54: T = ds->rmat[DS_MAT_T];
55: S = ds->rmat[DS_MAT_D];
56: n = ds->n;
57: ld = ds->ld;
58: if (tocompact) { /* switch from dense (arrow) to compact storage */
59: PetscMemzero(T,3*ld*sizeof(PetscReal));
60: PetscMemzero(S,ld*sizeof(PetscReal));
61: for (i=0;i<n-1;i++) {
62: T[i] = PetscRealPart(A[i+i*ld]);
63: T[ld+i] = PetscRealPart(A[i+1+i*ld]);
64: S[i] = PetscRealPart(B[i+i*ld]);
65: }
66: T[n-1] = PetscRealPart(A[n-1+(n-1)*ld]);
67: S[n-1] = PetscRealPart(B[n-1+(n-1)*ld]);
68: for (i=ds->l;i< ds->k;i++) T[2*ld+i] = PetscRealPart(A[ds->k+i*ld]);
69: }else { /* switch from compact (arrow) to dense storage */
70: PetscMemzero(A,ld*ld*sizeof(PetscScalar));
71: PetscMemzero(B,ld*ld*sizeof(PetscScalar));
72: for (i=0;i<n-1;i++) {
73: A[i+i*ld] = T[i];
74: A[i+1+i*ld] = T[ld+i];
75: A[i+(i+1)*ld] = T[ld+i];
76: B[i+i*ld] = S[i];
77: }
78: A[n-1+(n-1)*ld] = T[n-1];
79: B[n-1+(n-1)*ld] = S[n-1];
80: for (i=ds->l;i<ds->k;i++) {
81: A[ds->k+i*ld] = T[2*ld+i];
82: A[i+ds->k*ld] = T[2*ld+i];
83: }
84: }
85: return(0);
86: }
90: PetscErrorCode DSView_GHIEP(DS ds,PetscViewer viewer)
91: {
92: PetscErrorCode ierr;
93: PetscViewerFormat format;
94: PetscInt i,j;
95: PetscReal value;
96: const char *methodname[] = {
97: "HR method",
98: "QR + Inverse Iteration",
99: "QR",
100: "DQDS + Inverse Iteration "
101: };
104: PetscViewerGetFormat(viewer,&format);
105: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
106: PetscViewerASCIIPrintf(viewer,"solving the problem with: %s\n",methodname[ds->method]);
107: return(0);
108: }
109: if (ds->compact) {
110: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
111: if (format == PETSC_VIEWER_ASCII_MATLAB) {
112: PetscViewerASCIIPrintf(viewer,"%% Size = %D %D\n",ds->n,ds->n);
113: PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",3*ds->n);
114: PetscViewerASCIIPrintf(viewer,"zzz = [\n");
115: for (i=0;i<ds->n;i++) {
116: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,i+1,*(ds->rmat[DS_MAT_T]+i));
117: }
118: for (i=0;i<ds->n-1;i++) {
119: if (*(ds->rmat[DS_MAT_T]+ds->ld+i) !=0 && i!=ds->k-1) {
120: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+2,i+1,*(ds->rmat[DS_MAT_T]+ds->ld+i));
121: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,i+2,*(ds->rmat[DS_MAT_T]+ds->ld+i));
122: }
123: }
124: for (i = ds->l;i<ds->k;i++) {
125: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",ds->k+1,i+1,*(ds->rmat[DS_MAT_T]+2*ds->ld+i));
126: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,ds->k+1,*(ds->rmat[DS_MAT_T]+2*ds->ld+i));
127: }
128: PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(zzz);\n",DSMatName[DS_MAT_A]);
129:
130: PetscViewerASCIIPrintf(viewer,"%% Size = %D %D\n",ds->n,ds->n);
131: PetscViewerASCIIPrintf(viewer,"omega = zeros(%D,3);\n",3*ds->n);
132: PetscViewerASCIIPrintf(viewer,"omega = [\n");
133: for (i=0;i<ds->n;i++) {
134: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,i+1,*(ds->rmat[DS_MAT_D]+i));
135: }
136: PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(omega);\n",DSMatName[DS_MAT_B]);
138: } else {
139: PetscViewerASCIIPrintf(viewer,"T\n");
140: for (i=0;i<ds->n;i++) {
141: for (j=0;j<ds->n;j++) {
142: if (i==j) value = *(ds->rmat[DS_MAT_T]+i);
143: else if (i==j+1 || j==i+1) value = *(ds->rmat[DS_MAT_T]+ds->ld+PetscMin(i,j));
144: else if ((i<ds->k && j==ds->k) || (i==ds->k && j<ds->k)) value = *(ds->rmat[DS_MAT_T]+2*ds->ld+PetscMin(i,j));
145: else value = 0.0;
146: PetscViewerASCIIPrintf(viewer," %18.16e ",value);
147: }
148: PetscViewerASCIIPrintf(viewer,"\n");
149: }
150: PetscViewerASCIIPrintf(viewer,"omega\n");
151: for (i=0;i<ds->n;i++) {
152: for (j=0;j<ds->n;j++) {
153: if (i==j) value = *(ds->rmat[DS_MAT_D]+i);
154: else value = 0.0;
155: PetscViewerASCIIPrintf(viewer," %18.16e ",value);
156: }
157: PetscViewerASCIIPrintf(viewer,"\n");
158: }
159: }
160: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
161: PetscViewerFlush(viewer);
162: } else {
163: DSViewMat_Private(ds,viewer,DS_MAT_A);
164: DSViewMat_Private(ds,viewer,DS_MAT_B);
165: }
166: if (ds->state>DS_STATE_INTERMEDIATE) {
167: DSViewMat_Private(ds,viewer,DS_MAT_Q);
168: }
169: return(0);
170: }
174: static PetscErrorCode DSVectors_GHIEP_Eigen_Some(DS ds,PetscInt *idx,PetscReal *rnorm)
175: {
177: PetscReal b[4],M[4],d1,d2,s1,s2,e;
178: PetscReal scal1,scal2,wr1,wr2,wi,ep,norm;
179: PetscScalar *Q,*X,Y[4],alpha,zeroS = 0.0;
180: PetscInt k;
181: PetscBLASInt two = 2,n_,ld,one=1;
182: #if !defined(PETSC_USE_COMPLEX)
183: PetscBLASInt four=4;
184: #endif
185:
187: X = ds->mat[DS_MAT_X];
188: Q = ds->mat[DS_MAT_Q];
189: k = *idx;
190: n_ = PetscBLASIntCast(ds->n);
191: ld = PetscBLASIntCast(ds->ld);
192: if (k < ds->n-1) {
193: e = (ds->compact)?*(ds->rmat[DS_MAT_T]+ld+k):PetscRealPart(*(ds->mat[DS_MAT_A]+(k+1)+ld*k));
194: } else e = 0.0;
195: if (e == 0.0) {/* Real */
196: if (ds->state>=DS_STATE_CONDENSED) {
197: PetscMemcpy(X+k*ld,Q+k*ld,ld*sizeof(PetscScalar));
198: } else {
199: PetscMemzero(X+k*ds->ld,ds->ld*sizeof(PetscScalar));
200: X[k+k*ds->ld] = 1.0;
201: }
202: if (rnorm) {
203: *rnorm = PetscAbsScalar(X[ds->n-1+k*ld]);
204: }
205: } else { /* 2x2 block */
206: if (ds->compact) {
207: s1 = *(ds->rmat[DS_MAT_D]+k);
208: d1 = *(ds->rmat[DS_MAT_T]+k);
209: s2 = *(ds->rmat[DS_MAT_D]+k+1);
210: d2 = *(ds->rmat[DS_MAT_T]+k+1);
211: } else {
212: s1 = PetscRealPart(*(ds->mat[DS_MAT_B]+k*ld+k));
213: d1 = PetscRealPart(*(ds->mat[DS_MAT_A]+k+k*ld));
214: s2 = PetscRealPart(*(ds->mat[DS_MAT_B]+(k+1)*ld+k+1));
215: d2 = PetscRealPart(*(ds->mat[DS_MAT_A]+k+1+(k+1)*ld));
216: }
217: M[0] = d1; M[1] = e; M[2] = e; M[3]= d2;
218: b[0] = s1; b[1] = 0.0; b[2] = 0.0; b[3] = s2;
219: ep = LAPACKlamch_("S");
220: /* Compute eigenvalues of the block */
221: LAPACKlag2_(M, &two, b, &two, &ep, &scal1, &scal2, &wr1, &wr2, &wi);
222: if (wi==0.0) { /* Real eigenvalues */
223: SETERRQ(PETSC_COMM_SELF,1,"Real block in DSVectors_GHIEP");
224: } else { /* Complex eigenvalues */
225: if (scal1<ep) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FP,"Nearly infinite eigenvalue");
226: wr1 /= scal1; wi /= scal1;
227: #if !defined(PETSC_USE_COMPLEX)
228: if ( SlepcAbs(s1*d1-wr1,wi)<SlepcAbs(s2*d2-wr1,wi)) {
229: Y[0] = wr1-s2*d2; Y[1] = s2*e; Y[2] = wi; Y[3] = 0.0;
230: } else {
231: Y[0] = s1*e; Y[1] = wr1-s1*d1; Y[2] = 0.0; Y[3] = wi;
232: }
233: norm = BLASnrm2_(&four,Y,&one);
234: norm = 1/norm;
235: if (ds->state >= DS_STATE_CONDENSED) {
236: alpha = norm;
237: BLASgemm_("N","N",&n_,&two,&two,&alpha,ds->mat[DS_MAT_Q]+k*ld,&ld,Y,&two,&zeroS,X+k*ld,&ld);
238: if (rnorm) *rnorm = SlepcAbsEigenvalue(X[ds->n-1+k*ld],X[ds->n-1+(k+1)*ld]);
239: } else {
240: PetscMemzero(X+k*ld,2*ld*sizeof(PetscScalar));
241: X[k*ld+k] = Y[0]*norm; X[k*ld+k+1] = Y[1]*norm;
242: X[(k+1)*ld+k] = Y[2]*norm; X[(k+1)*ld+k+1] = Y[3]*norm;
243: }
244: #else
245: if ( SlepcAbs(s1*d1-wr1,wi)<SlepcAbs(s2*d2-wr1,wi)) {
246: Y[0] = wr1-s2*d2+PETSC_i*wi; Y[1] = s2*e;
247: } else {
248: Y[0] = s1*e; Y[1] = wr1-s1*d1+PETSC_i*wi;
249: }
250: norm = BLASnrm2_(&two,Y,&one);
251: norm = 1/norm;
252: if (ds->state >= DS_STATE_CONDENSED) {
253: alpha = norm;
254: BLASgemv_("N",&n_,&two,&alpha,ds->mat[DS_MAT_Q]+k*ld,&ld,Y,&one,&zeroS,X+k*ld,&one);
255: if (rnorm) *rnorm = PetscAbsScalar(X[ds->n-1+k*ld]);
256: } else {
257: PetscMemzero(X+k*ld,2*ld*sizeof(PetscScalar));
258: X[k*ld+k] = Y[0]*norm; X[k*ld+k+1] = Y[1]*norm;
259: }
260: X[(k+1)*ld+k] = PetscConj(X[k*ld+k]); X[(k+1)*ld+k+1] = PetscConj(X[k*ld+k+1]);
261: #endif
262: (*idx)++;
263: }
264: }
265: return(0);
266: }
270: PetscErrorCode DSVectors_GHIEP(DS ds,DSMatType mat,PetscInt *k,PetscReal *rnorm)
271: {
272: PetscInt i;
273: PetscReal e;
277: switch (mat) {
278: case DS_MAT_X:
279: if (k) {
280: DSVectors_GHIEP_Eigen_Some(ds,k,rnorm);
281: } else {
282: for (i=0; i<ds->n; i++) {
283: e = (ds->compact)?*(ds->rmat[DS_MAT_T]+ds->ld+i):PetscRealPart(*(ds->mat[DS_MAT_A]+(i+1)+ds->ld*i));
284: if (e == 0.0) {/* real */
285: if (ds->state >= DS_STATE_CONDENSED) {
286: PetscMemcpy(ds->mat[mat]+i*ds->ld,ds->mat[DS_MAT_Q]+i*ds->ld,ds->ld*sizeof(PetscScalar));
287: } else {
288: PetscMemzero(ds->mat[mat]+i*ds->ld,ds->ld*sizeof(PetscScalar));
289: *(ds->mat[mat]+i+i*ds->ld) = 1.0;
290: }
291: } else {
292: DSVectors_GHIEP_Eigen_Some(ds,&i,rnorm);
293: }
294: }
295: }
296: break;
297: case DS_MAT_Y:
298: case DS_MAT_U:
299: case DS_MAT_VT:
300: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
301: break;
302: default:
303: SETERRQ(((PetscObject)ds)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
304: }
305: return(0);
306: }
310: /*
311: Extract the eigenvalues contained in the block-diagonal of the indefinite problem.
312: Only the index range n0..n1 is processed.
313: */
314: PetscErrorCode DSGHIEPComplexEigs(DS ds, PetscInt n0, PetscInt n1, PetscScalar *wr, PetscScalar *wi)
315: {
316: PetscInt k,ld;
317: PetscBLASInt two=2;
318: PetscScalar *A,*B;
319: PetscReal *D,*T;
320: PetscReal b[4],M[4],d1,d2,s1,s2,e;
321: PetscReal scal1,scal2,ep,wr1,wr2,wi1;
324: ld = ds->ld;
325: A = ds->mat[DS_MAT_A];
326: B = ds->mat[DS_MAT_B];
327: D = ds->rmat[DS_MAT_D];
328: T = ds->rmat[DS_MAT_T];
329: for (k=n0;k<n1;k++) {
330: if (k < n1-1) {
331: e = (ds->compact)?T[ld+k]:PetscRealPart(A[(k+1)+ld*k]);
332: }else e = 0.0;
333: if (e==0.0) {
334: /* real eigenvalue */
335: wr[k] = (ds->compact)?T[k]/D[k]:A[k+k*ld]/B[k+k*ld];
336: wi[k] = 0.0 ;
337: } else {
338: /* diagonal block */
339: if (ds->compact) {
340: s1 = D[k];
341: d1 = T[k];
342: s2 = D[k+1];
343: d2 = T[k+1];
344: } else {
345: s1 = PetscRealPart(B[k*ld+k]);
346: d1 = PetscRealPart(A[k+k*ld]);
347: s2 = PetscRealPart(B[(k+1)*ld+k+1]);
348: d2 = PetscRealPart(A[k+1+(k+1)*ld]);
349: }
350: M[0] = d1; M[1] = e; M[2] = e; M[3]= d2;
351: b[0] = s1; b[1] = 0.0; b[2] = 0.0; b[3] = s2;
352: ep = LAPACKlamch_("S");
353: /* Compute eigenvalues of the block */
354: LAPACKlag2_(M, &two, b, &two, &ep, &scal1, &scal2, &wr1, &wr2, &wi1);
355: if (scal1<ep) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FP,"Nearly infinite eigenvalue");
356: wr[k] = wr1/scal1;
357: if (wi1==0.0) { /* Real eigenvalues */
358: if (scal2<ep) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FP,"Nearly infinite eigenvalue");
359: wr[k+1] = wr2/scal2;
360: wi[k] = 0.0;
361: wi[k+1] = 0.0;
362: } else { /* Complex eigenvalues */
363: #if !defined(PETSC_USE_COMPLEX)
364: wr[k+1] = wr[k];
365: wi[k] = wi1/scal1;
366: wi[k+1] = -wi[k];
367: #else
368: wr[k] += PETSC_i*wi1/scal1;
369: wr[k+1] = PetscConj(wr[k]);
370: wi[k] = 0.0;
371: wi[k+1] = 0.0;
372: #endif
373: }
374: k++;
375: }
376: }
377: return(0);
378: }
382: PetscErrorCode DSSort_GHIEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
383: {
385: PetscInt n,i,*perm;
386: PetscReal *d,*e,*s;
389: n = ds->n;
390: d = ds->rmat[DS_MAT_T];
391: e = d + ds->ld;
392: s = ds->rmat[DS_MAT_D];
393: DSAllocateWork_Private(ds,ds->ld,ds->ld,0);
394: perm = ds->perm;
395: if (!rr) {
396: rr = wr;
397: ri = wi;
398: }
399: DSSortEigenvalues_Private(ds,rr,ri,perm,PETSC_TRUE);
400: if (!ds->compact) {DSSwitchFormat_GHIEP(ds,PETSC_TRUE);}
401: PetscMemcpy(ds->work,wr,n*sizeof(PetscScalar));
402: for (i=ds->l;i<n;i++) {
403: wr[i] = *(ds->work + perm[i]);
404: }
405: PetscMemcpy(ds->work,wi,n*sizeof(PetscScalar));
406: for (i=ds->l;i<n;i++) {
407: wi[i] = *(ds->work + perm[i]);
408: }
409: PetscMemcpy(ds->rwork,s,n*sizeof(PetscReal));
410: for (i=ds->l;i<n;i++) {
411: s[i] = *(ds->rwork+perm[i]);
412: }
413: PetscMemcpy(ds->rwork,d,n*sizeof(PetscReal));
414: for (i=ds->l;i<n;i++) {
415: d[i] = *(ds->rwork + perm[i]);
416: }
417: PetscMemcpy(ds->rwork,e,(n-1)*sizeof(PetscReal));
418: PetscMemzero(e+ds->l,(n-1-ds->l)*sizeof(PetscScalar));
419: for (i=ds->l;i<n-1;i++) {
420: if (perm[i]<n-1) e[i] = *(ds->rwork + perm[i]);
421: }
422: if (!ds->compact) { DSSwitchFormat_GHIEP(ds,PETSC_FALSE);}
423: DSPermuteColumns_Private(ds,ds->l,n,DS_MAT_Q,perm);
424: return(0);
425: }
429: /*
430: Generates a hyperbolic rotation
431: if x1*x1 - x2*x2 != 0
432: r = sqrt( |x1*x1 - x2*x2| )
433: c = x1/r s = x2/r
434:
435: | c -s||x1| |d*r|
436: |-s c||x2| = | 0 |
437: where d = 1 for type==1 and -1 for type==2
438: Returns the condition number of the reduction
439: */
440: static PetscErrorCode HRGen(PetscReal x1,PetscReal x2,PetscInt *type,PetscReal *c,PetscReal *s,PetscReal *r,PetscReal *cond)
441: {
442: PetscReal t,n2,xa,xb;
443: PetscInt type_;
446: if (x2==0) {
447: *r = PetscAbsReal(x1);
448: *c = (x1>=0)?1.0:-1.0;
449: *s = 0.0;
450: if (type) *type = 1;
451: return(0);
452: }
453: if (PetscAbsReal(x1) == PetscAbsReal(x2)) {
454: /* hyperbolic rotation doesn't exist */
455: *c = 0;
456: *s = 0;
457: *r = 0;
458: if (type) *type = 0;
459: *cond = PETSC_MAX_REAL;
460: return(0);
461: }
462:
463: if (PetscAbsReal(x1)>PetscAbsReal(x2)) {
464: xa = x1; xb = x2; type_ = 1;
465: } else {
466: xa = x2; xb = x1; type_ = 2;
467: }
468: t = xb/xa;
469: n2 = PetscAbsReal(1 - t*t);
470: *r = PetscSqrtReal(n2)*PetscAbsReal(xa);
471: *c = x1/(*r);
472: *s = x2/(*r);
473: if (type_ == 2) *r *= -1;
474: if (type) *type = type_;
475: if (cond) *cond = (PetscAbsReal(*c) + PetscAbsReal(*s))/PetscAbsReal(PetscAbsReal(*c) - PetscAbsReal(*s));
476: return(0);
477: }
481: /*
482: |c s|
483: Applies an hyperbolic rotator |s c|
484: |c s|
485: [x1 x2]|s c|
486: */
487: PetscErrorCode HRApply(PetscInt n, PetscScalar *x1,PetscInt inc1, PetscScalar *x2, PetscInt inc2,PetscReal c, PetscReal s)
488: {
489: PetscInt i;
490: PetscReal t;
491: PetscScalar tmp;
492:
494: if (PetscAbsReal(c)>PetscAbsReal(s)) { /* Type I */
495: t = s/c;
496: for (i=0;i<n;i++) {
497: x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
498: x2[i*inc2] = t*x1[i*inc1] + x2[i*inc2]/c;
499: }
500: } else { /* Type II */
501: t = c/s;
502: for (i=0;i<n;i++) {
503: tmp = x1[i*inc1];
504: x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
505: x2[i*inc2] = t*x1[i*inc1] + tmp/s;
506: }
507: }
508: return(0);
509: }
513: /*
514: Reduction to tridiagonal-diagonal form (see F. Tisseur, SIMAX 26(1), 2004).
516: Input:
517: A symmetric (only lower triangular part is refered)
518: s vector +1 and -1 (signature matrix)
519: Output:
520: d,e
521: s
522: Q s-orthogonal matrix whith Q^T*A*Q = T (symmetric tridiagonal matrix)
523: */
524: static PetscErrorCode TridiagDiag_HHR(PetscInt n,PetscScalar *A,PetscInt lda,PetscReal *s,PetscScalar* Q,PetscInt ldq,PetscBool flip,PetscReal *d,PetscReal *e,PetscInt *perm_,PetscScalar *w,PetscInt lw)
525: {
526: #if defined(PETSC_MISSING_LAPACK_LARFG) || defined(PETSC_MISSING_LAPACK_LARF)
528: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARFG/LARF - Lapack routines are unavailable");
529: #else
531: PetscInt i,j,k,*ii,*jj,i0=0,ik=0,tmp,type,*perm,nwall,nwu;
532: PetscReal *ss,cond=1.0,cs,sn,r;
533: PetscScalar *work,tau,t,*AA;
534: PetscBLASInt n0,n1,ni,inc=1,m,n_,lda_,ldq_;
535: PetscBool breakdown = PETSC_TRUE;
536:
538: if (n<3) {
539: if (n==1)Q[0]=1;
540: if (n==2) {Q[0] = Q[1+ldq] = 1; Q[1] = Q[ldq] = 0;}
541: return(0);
542: }
543: lda_ = PetscBLASIntCast(lda);
544: n_ = PetscBLASIntCast(n);
545: ldq_ = PetscBLASIntCast(ldq);
546: nwall = n*n+n;
547: nwu = 0;
548: if (!w || lw < nwall) {
549: PetscMalloc(nwall*sizeof(PetscScalar),&work);
550: }else work = w;
551: PetscMalloc(n*sizeof(PetscReal),&ss);
552: PetscMalloc(n*sizeof(PetscInt),&perm);
553: AA = work;
554: for (i=0;i<n;i++) {
555: PetscMemcpy(AA+i*n,A+i*lda,n*sizeof(PetscScalar));
556: }
557: nwu += n*n;
558: k=0;
559: while (breakdown && k<n) {
560: breakdown = PETSC_FALSE;
561: /* Classify (and flip) A and s according to sign */
562: if (flip) {
563: for (i=0;i<n;i++) {
564: perm[i] = n-1-perm_[i];
565: if (perm[i]==0) i0 = i;
566: if (perm[i]==k) ik = i;
567: }
568: } else {
569: for (i=0;i<n;i++) {
570: perm[i] = perm_[i];
571: if (perm[i]==0) i0 = i;
572: if (perm[i]==k) ik = i;
573: }
574: }
575: perm[ik] = 0;
576: perm[i0] = k;
577: i=1;
578: while (i<n-1 && s[perm[i-1]]==s[perm[0]]) {
579: if (s[perm[i]]!=s[perm[0]]) {
580: j=i+1;
581: while (j<n-1 && s[perm[j]]!=s[perm[0]])j++;
582: tmp = perm[i]; perm[i] = perm[j]; perm[j] = tmp;
583: }
584: i++;
585: }
586: for (i=0;i<n;i++) {
587: ss[i] = s[perm[i]];
588: }
589: if (flip) { ii = &j; jj = &i;} else { ii = &i; jj = &j;}
590: for (i=0;i<n;i++)
591: for (j=0;j<n;j++)
592: A[i+j*lda] = AA[perm[*ii]+perm[*jj]*n];
593: /* Initialize Q */
594: for (i=0;i<n;i++) {
595: PetscMemzero(Q+i*ldq,n*sizeof(PetscScalar));
596: Q[perm[i]+i*ldq] = 1.0;
597: }
598: for (ni=1;ni<n && ss[ni]==ss[0]; ni++);
599: n0 = ni-1; n1 = PetscBLASIntCast(n)-ni;
600: for (j=0;j<n-2;j++) {
601: m = PetscBLASIntCast(n-j-1);
602: /* Forming and applying reflectors */
603: if ( n0 > 1 ) {
604: LAPACKlarfg_(&n0, A+ni-n0+j*lda, A+ni-n0+j*lda+1,&inc,&tau);
605: /* Apply reflector */
606: if ( PetscAbsScalar(tau) != 0.0 ) {
607: t=*( A+ni-n0+j*lda); *(A+ni-n0+j*lda)=1.0;
608: LAPACKlarf_("R",&m,&n0,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu);
609: LAPACKlarf_("L",&n0,&m,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu);
610: /* Update Q */
611: LAPACKlarf_("R",&n_,&n0,A+ni-n0+j*lda,&inc,&tau,Q+(j+1)*ldq,&ldq_,work+nwu);
612: *(A+ni-n0+j*lda) = t;
613: for (i=1;i<n0;i++) {
614: *(A+ni-n0+j*lda+i) = 0.0; *(A+j+(ni-n0+i)*lda) = 0.0;
615: }
616: *(A+j+(ni-n0)*lda) = *(A+ni-n0+j*lda);
617: }
618: }
619: if ( n1 > 1 ) {
620: LAPACKlarfg_(&n1, A+n-n1+j*lda, A+n-n1+j*lda+1,&inc,&tau);
621: /* Apply reflector */
622: if ( PetscAbsScalar(tau) != 0.0 ) {
623: t=*( A+n-n1+j*lda); *(A+n-n1+j*lda)=1.0;
624: LAPACKlarf_("R",&m,&n1,A+n-n1+j*lda,&inc,&tau,A+j+1+(n-n1)*lda,&lda_,work+nwu);
625: LAPACKlarf_("L",&n1,&m,A+n-n1+j*lda,&inc,&tau,A+n-n1+(j+1)*lda,&lda_,work+nwu);
626: /* Update Q */
627: LAPACKlarf_("R",&n_,&n1,A+n-n1+j*lda,&inc,&tau,Q+(n-n1)*ldq,&ldq_,work+nwu);
628: *(A+n-n1+j*lda) = t;
629: for (i=1;i<n1;i++) {
630: *(A+n-n1+i+j*lda) = 0.0; *(A+j+(n-n1+i)*lda) = 0.0;
631: }
632: *(A+j+(n-n1)*lda) = *(A+n-n1+j*lda);
633: }
634: }
635: /* Hyperbolic rotation */
636: if ( n0 > 0 && n1 > 0) {
637: HRGen(PetscRealPart(A[ni-n0+j*lda]),PetscRealPart(A[n-n1+j*lda]),&type,&cs,&sn,&r,&cond);
638: /* Check condition number */
639: if (cond > 1.0/(10*PETSC_SQRT_MACHINE_EPSILON)) {
640: breakdown = PETSC_TRUE;
641: k++;
642: if (k==n || flip)
643: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Breakdown in construction of hyperbolic transformation");
644: break;
645: }
646: A[ni-n0+j*lda] = r; A[n-n1+j*lda] = 0.0;
647: A[j+(ni-n0)*lda] = r; A[j+(n-n1)*lda] = 0.0;
648: /* Apply to A */
649: HRApply(m, A+j+1+(ni-n0)*lda,1, A+j+1+(n-n1)*lda,1, cs, -sn);
650: HRApply(m, A+ni-n0+(j+1)*lda,lda, A+n-n1+(j+1)*lda,lda, cs, -sn);
651:
652: /* Update Q */
653: HRApply(n, Q+(ni-n0)*ldq,1, Q+(n-n1)*ldq,1, cs, -sn);
654: if (type==2) {
655: ss[ni-n0] = -ss[ni-n0]; ss[n-n1] = -ss[n-n1];
656: n0++;ni++;n1--;
657: }
658: }
659: if (n0>0) n0--;else n1--;
660: }
661: }
663: /* flip matrices */
664: if (flip) {
665: for (i=0;i<n-1;i++) {
666: d[i] = PetscRealPart(A[n-i-1+(n-i-1)*lda]);
667: e[i] = PetscRealPart(A[n-i-1+(n-i-2)*lda]);
668: s[i] = ss[n-i-1];
669: }
670: s[n-1] = ss[0];
671: d[n-1] = PetscRealPart(A[0]);
672: for (i=0;i<n;i++) {
673: ierr=PetscMemcpy(work+i*n,Q+i*ldq,n*sizeof(PetscScalar));
674: }
675: for (i=0;i<n;i++)
676: for (j=0;j<n;j++)
677: Q[i+j*ldq] = work[i+(n-j-1)*n];
678: } else {
679: for (i=0;i<n-1;i++) {
680: d[i] = PetscRealPart(A[i+i*lda]);
681: e[i] = PetscRealPart(A[i+1+i*lda]);
682: s[i] = ss[i];
683: }
684: s[n-1] = ss[n-1];
685: d[n-1] = PetscRealPart(A[n-1 + (n-1)*lda]);
686: }
688: PetscFree(ss);
689: PetscFree(perm);
690: return(0);
691: #endif
692: }
696: /*
697: compute x = x - y*ss^{-1}*y^T*s*x where ss=y^T*s*y
698: s diagonal (signature matrix)
699: */
700: static PetscErrorCode IndefOrthog(PetscReal *s, PetscScalar *y, PetscReal ss, PetscScalar *x, PetscScalar *h,PetscInt n)
701: {
702: PetscInt i;
703: PetscScalar h_,r;
706: if (y) {
707: h_ = 0.0; /* h_=(y^Tdiag(s)*y)^{-1}*y^T*diag(s)*x*/
708: for (i=0;i<n;i++) { h_+=y[i]*s[i]*x[i];}
709: h_ /= ss;
710: for (i=0;i<n;i++) {x[i] -= h_*y[i];} /* x = x-h_*y */
711: /* repeat */
712: r = 0.0;
713: for (i=0;i<n;i++) { r+=y[i]*s[i]*x[i];}
714: r /= ss;
715: for (i=0;i<n;i++) {x[i] -= r*y[i];}
716: h_ += r;
717: }else h_ = 0.0;
718: if (h) *h = h_;
719: return(0);
720: }
724: /*
725: normalization with a indefinite norm
726: */
727: static PetscErrorCode IndefNorm(PetscReal *s,PetscScalar *x, PetscReal *norm,PetscInt n)
728: {
729: PetscInt i;
730: PetscReal norm_;
733: /* s-normalization */
734: norm_ = 0.0;
735: for (i=0;i<n;i++) {norm_ += PetscRealPart(x[i]*s[i]*x[i]);}
736: if (norm_<0) {norm_ = -PetscSqrtReal(-norm_);}
737: else {norm_ = PetscSqrtReal(norm_);}
738: for (i=0;i<n;i++)x[i] /= norm_;
739: if (norm) *norm = norm_;
740: return(0);
741: }
745: static PetscErrorCode DSEigenVectorsPseudoOrthog(DS ds, DSMatType mat, PetscScalar *wr, PetscScalar *wi,PetscBool accum)
746: {
748: PetscInt i,j,k,off;
749: PetscBLASInt ld,n1,one=1;
750: PetscScalar PQ[4],xx,yx,xy,yy,*y,h,oneS=1.0,zeroS=0.0,*X,*W,*B;
751: PetscReal *ss,*s,*d,*e,d1,d2,toldeg=PETSC_SQRT_MACHINE_EPSILON*100;
754: ld = PetscBLASIntCast(ds->ld);
755: n1 = PetscBLASIntCast(ds->n - ds->l);
756: DSAllocateWork_Private(ds,ld*ld+2*ld,ld,2*ld);
757: s = ds->rmat[DS_MAT_D];
758: d = ds->rmat[DS_MAT_T];
759: e = d + ld;
760: off = ds->l+ds->l*ld;
761: if (!ds->compact) {
762: B = ds->mat[DS_MAT_B];
763: for (i=ds->l;i<ds->n;i++) {
764: s[i] = PetscRealPart(B[i+i*ld]);
765: }
766: }
768: /* compute real s-orthonormal base */
769: X = ds->mat[mat];
770: ss = ds->rwork;
771: y = ds->work;
773: #if defined(PETSC_USE_COMPLEX)
774: /* with complex scalars we need to operate as in real scalar */
775: for (i=ds->l;i<ds->n;i++) {
776: if (PetscImaginaryPart(wr[i])!=0.0) {
777: for (j=ds->l;j<ds->n;j++) {
778: X[j+(i+1)*ld] = PetscImaginaryPart(X[j+i*ld]);
779: X[j+i*ld] = PetscRealPart(X[j+i*ld]);
780: }
781: i++;
782: }
783: }
784: #endif
786: for (i=ds->l;i<ds->n;i++) {
787: #if defined(PETSC_USE_COMPLEX)
788: if (PetscImaginaryPart(wr[i])==0.0) { /* real */
789: #else
790: if (wi[i]==0.0) { /* real */
791: #endif
792: for (j=ds->l;j<i;j++) {
793: /* s-orthogonalization with close eigenvalues */
794: if (wi[j]==0.0) {
795: if ( PetscAbsScalar(wr[j]-wr[i])<toldeg) {
796: IndefOrthog(s+ds->l, X+j*ld+ds->l, ss[j],X+i*ld+ds->l, PETSC_NULL,n1);
797: }
798: }else j++;
799: }
800: IndefNorm(s+ds->l,X+i*ld+ds->l,&d1,n1);
801: ss[i] = (d1<0.0)?-1:1;
802: d[i] = PetscRealPart(wr[i]*ss[i]); e[i] = 0.0;
803: } else {
804: for (j=ds->l;j<i;j++) {
805: /* s-orthogonalization of Xi and Xi+1*/
806: #if defined(PETSC_USE_COMPLEX)
807: if (PetscImaginaryPart(wr[j])!=0.0) {
808: #else
809: if (wi[j]!=0.0) {
810: #endif
811: if (PetscAbsScalar(wr[j]-wr[i])<toldeg && PetscAbsScalar(PetscAbsScalar(wi[j])-PetscAbsScalar(wi[i]))<toldeg) {
812: for (k=ds->l;k<ds->n;k++) y[k] = s[k]*X[k+i*ld];
813: xx = BLASdot_(&n1,X+ds->l+j*ld,&one,y+ds->l,&one);
814: yx = BLASdot_(&n1,X+ds->l+(j+1)*ld,&one,y+ds->l,&one);
815: for (k=ds->l;k<ds->n;k++) y[k] = s[k]*X[k+(i+1)*ld];
816: xy = BLASdot_(&n1,X+ds->l+j*ld,&one,y+ds->l,&one);
817: yy = BLASdot_(&n1,X+ds->l+(j+1)*ld,&one,y+ds->l,&one);
818: PQ[0] = ss[j]*xx; PQ[1] = ss[j+1]*yx; PQ[2] = ss[j]*xy; PQ[3] = ss[j+1]*yy;
819: for (k=ds->l;k<ds->n;k++) {
820: X[k+i*ld] -= PQ[0]*X[k+j*ld]+PQ[1]*X[k+(j+1)*ld];
821: X[k+(i+1)*ld] -= PQ[2]*X[k+j*ld]+PQ[3]*X[k+(j+1)*ld];
822: }
823: /* Repeat */
824: for (k=ds->l;k<ds->n;k++) y[k] = s[k]*X[k+i*ld];
825: xx = BLASdot_(&n1,X+ds->l+j*ld,&one,y+ds->l,&one);
826: yx = BLASdot_(&n1,X+ds->l+(j+1)*ld,&one,y+ds->l,&one);
827: for (k=ds->l;k<ds->n;k++) y[k] = s[k]*X[k+(i+1)*ld];
828: xy = BLASdot_(&n1,X+ds->l+j*ld,&one,y+ds->l,&one);
829: yy = BLASdot_(&n1,X+ds->l+(j+1)*ld,&one,y+ds->l,&one);
830: PQ[0] = ss[j]*xx; PQ[1] = ss[j+1]*yx; PQ[2] = ss[j]*xy; PQ[3] = ss[j+1]*yy;
831: for (k=ds->l;k<ds->n;k++) {
832: X[k+i*ld] -= PQ[0]*X[k+j*ld]+PQ[1]*X[k+(j+1)*ld];
833: X[k+(i+1)*ld] -= PQ[2]*X[k+j*ld]+PQ[3]*X[k+(j+1)*ld];
834: }
835: }
836: j++;
837: }
838: }
839: IndefNorm(s+ds->l,X+i*ld+ds->l,&d1,n1);
840: ss[i] = (d1<0)?-1:1;
841: IndefOrthog(s+ds->l, X+i*ld+ds->l, ss[i],X+(i+1)*ld+ds->l, &h,n1);
842: IndefNorm(s+ds->l,X+(i+1)*ld+ds->l,&d2,n1);
843: ss[i+1] = (d2<0)?-1:1;
844: d[i] = PetscRealPart((wr[i]-wi[i]*h/d1)*ss[i]);
845: d[i+1] = PetscRealPart((wr[i]+wi[i]*h/d1)*ss[i+1]);
846: e[i] = PetscRealPart(wi[i]*d2/d1*ss[i]); e[i+1] = 0.0;
847: i++;
848: }
849: }
850: for (i=ds->l;i<ds->n;i++) s[i] = ss[i];
851: /* accumulate previous Q */
852: if (accum && mat!=DS_MAT_Q) {
853: DSAllocateMat_Private(ds,DS_MAT_W);
854: W = ds->mat[DS_MAT_W];
855: DSCopyMatrix_Private(ds,DS_MAT_W,DS_MAT_Q);
856: BLASgemm_("N","N",&n1,&n1,&n1,&oneS,W+off,&ld,ds->mat[DS_MAT_X]+off,&ld,&zeroS,ds->mat[DS_MAT_Q]+off,&ld);
857: }
858: if (!ds->compact) {DSSwitchFormat_GHIEP(ds,PETSC_FALSE);}
859: return(0);
860: }
864: /*
865: Get eigenvectors with inverse iteration.
866: The system matrix is in Hessenberg form.
867: */
868: PetscErrorCode DSGHIEPPseudoOrthogInverseIteration(DS ds,PetscScalar *wr,PetscScalar *wi)
869: {
870: #if defined(PETSC_MISSING_LAPACK_HSEIN)
872: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"HSEIN - Lapack routine is unavailable");
873: #else
875: PetscInt i,off;
876: PetscBLASInt *select,*infoC,ld,n1,mout,info;
877: PetscScalar *A,*B,*H,*X;
878: PetscReal *s,*d,*e;
881: ld = PetscBLASIntCast(ds->ld);
882: n1 = PetscBLASIntCast(ds->n - ds->l);
883: DSAllocateWork_Private(ds,ld*ld+2*ld,ld,2*ld);
884: DSAllocateMat_Private(ds,DS_MAT_W);
885: A = ds->mat[DS_MAT_A];
886: B = ds->mat[DS_MAT_B];
887: H = ds->mat[DS_MAT_W];
888: s = ds->rmat[DS_MAT_D];
889: d = ds->rmat[DS_MAT_T];
890: e = d + ld;
891: select = ds->iwork;
892: infoC = ds->iwork + ld;
893: off = ds->l+ds->l*ld;
894: if (ds->compact) {
895: H[off] = d[ds->l]*s[ds->l];
896: H[off+ld] = e[ds->l]*s[ds->l];
897: for (i=ds->l+1;i<ds->n-1;i++) {
898: H[i+(i-1)*ld] = e[i-1]*s[i];
899: H[i+i*ld] = d[i]*s[i];
900: H[i+(i+1)*ld] = e[i]*s[i];
901: }
902: H[ds->n-1+(ds->n-2)*ld] = e[ds->n-2]*s[ds->n-1];
903: H[ds->n-1+(ds->n-1)*ld] = d[ds->n-1]*s[ds->n-1];
904: } else {
905: s[ds->l] = PetscRealPart(B[off]);
906: H[off] = A[off]*s[ds->l];
907: H[off+ld] = A[off+ld]*s[ds->l];
908: for (i=ds->l+1;i<ds->n-1;i++) {
909: s[i] = PetscRealPart(B[i+i*ld]);
910: H[i+(i-1)*ld] = A[i+(i-1)*ld]*s[i];
911: H[i+i*ld] = A[i+i*ld]*s[i];
912: H[i+(i+1)*ld] = A[i+(i+1)*ld]*s[i];
913: }
914: s[ds->n-1] = PetscRealPart(B[ds->n-1+(ds->n-1)*ld]);
915: H[ds->n-1+(ds->n-2)*ld] = A[ds->n-1+(ds->n-2)*ld]*s[ds->n-1];
916: H[ds->n-1+(ds->n-1)*ld] = A[ds->n-1+(ds->n-1)*ld]*s[ds->n-1];
917: }
918: DSAllocateMat_Private(ds,DS_MAT_X);
919: X = ds->mat[DS_MAT_X];
920: for (i=0;i<n1;i++)select[i]=1;
921: #if !defined(PETSC_USE_COMPLEX)
922: LAPACKhsein_("R","N","N",select,&n1,H+off,&ld,wr+ds->l,wi+ds->l,PETSC_NULL,&ld,X+off,&ld,&n1,&mout,ds->work,PETSC_NULL,infoC,&info);
923: #else
924: LAPACKhsein_("R","N","N",select,&n1,H+off,&ld,wr+ds->l,PETSC_NULL,&ld,X+off,&ld,&n1,&mout,ds->work,ds->rwork,PETSC_NULL,infoC,&info);
925: #endif
926: if (info<0)SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in hsein routine %d",-i);
927: if (info>0) {
928: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Convergence error in hsein routine %d",i);
929: }
931: DSEigenVectorsPseudoOrthog(ds, DS_MAT_X, wr, wi,PETSC_TRUE);
932: return(0);
933: #endif
934: }
938: /*
939: Reduce to tridiagonal-diagonal pair by means of TridiagDiag_HHR.
940: */
941: PetscErrorCode DSIntermediate_GHIEP(DS ds)
942: {
944: PetscInt i,ld,off;
945: PetscScalar *A,*B,*Q;
946: PetscReal *d,*e,*s;
949: ld = ds->ld;
950: A = ds->mat[DS_MAT_A];
951: B = ds->mat[DS_MAT_B];
952: Q = ds->mat[DS_MAT_Q];
953: d = ds->rmat[DS_MAT_T];
954: e = ds->rmat[DS_MAT_T]+ld;
955: s = ds->rmat[DS_MAT_D];
956: off = ds->l+ds->l*ld;
957: PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
958: DSAllocateWork_Private(ds,ld*ld,0,0);
960: for (i=0;i<ds->n;i++) Q[i+i*ld]=1.0;
961: for (i=0;i<ds->n-ds->l;i++) *(ds->perm+i)=i;
962: if (ds->compact) {
963: if (ds->state < DS_STATE_INTERMEDIATE) {
964: DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
965: TridiagDiag_HHR(ds->k-ds->l+1,A+off,ld,s+ds->l,Q+off,ld,PETSC_TRUE,d+ds->l,e+ds->l,ds->perm,ds->work,ld*ld);
966: ds->k = ds->l;
967: PetscMemzero(d+2*ld+ds->l,(ds->n-ds->l)*sizeof(PetscReal));
968: }
969: } else {
970: if (ds->state < DS_STATE_INTERMEDIATE) {
971: for (i=0;i<ds->n;i++)
972: s[i] = PetscRealPart(B[i+i*ld]);
973: TridiagDiag_HHR(ds->n-ds->l,A+off,ld,s+ds->l,Q+off,ld,PETSC_FALSE,d+ds->l,e+ds->l,ds->perm,ds->work,ld*ld);
974: PetscMemzero(d+2*ld,(ds->n)*sizeof(PetscReal));
975: ds->k = ds->l;
976: DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
977: } else { DSSwitchFormat_GHIEP(ds,PETSC_TRUE); }
978: }
979: return(0);
980: }
984: /*
985: Undo 2x2 blocks that have real eigenvalues.
986: */
987: PetscErrorCode DSGHIEPRealBlocks(DS ds)
988: {
990: PetscInt i;
991: PetscReal e,d1,d2,s1,s2,ss1,ss2,t,dd,ss;
992: PetscReal maxy,ep,scal1,scal2,snorm;
993: PetscReal *T,*D,b[4],M[4],wr1,wr2,wi;
994: PetscScalar *A,*B,Y[4],oneS = 1.0,zeroS = 0.0;
995: PetscBLASInt m,two=2,ld;
996: PetscBool isreal;
999: ld = PetscBLASIntCast(ds->ld);
1000: m = PetscBLASIntCast(ds->n-ds->l);
1001: A = ds->mat[DS_MAT_A];
1002: B = ds->mat[DS_MAT_B];
1003: T = ds->rmat[DS_MAT_T];
1004: D = ds->rmat[DS_MAT_D];
1005: DSAllocateWork_Private(ds,2*m,0,0);
1006: for (i=ds->l;i<ds->n-1;i++) {
1007: e = (ds->compact)?T[ld+i]:PetscRealPart(A[(i+1)+ld*i]);
1008: if (e != 0.0) { /* 2x2 block */
1009: if (ds->compact) {
1010: s1 = D[i];
1011: d1 = T[i];
1012: s2 = D[i+1];
1013: d2 = T[i+1];
1014: } else {
1015: s1 = PetscRealPart(B[i*ld+i]);
1016: d1 = PetscRealPart(A[i*ld+i]);
1017: s2 = PetscRealPart(B[(i+1)*ld+i+1]);
1018: d2 = PetscRealPart(A[(i+1)*ld+i+1]);
1019: }
1020: isreal = PETSC_FALSE;
1021: if (s1==s2) { /* apply a Jacobi rotation to compute the eigendecomposition */
1022: dd = d1-d2;
1023: if (2*PetscAbsReal(e) <= dd) {
1024: t = 2*e/dd;
1025: t = t/(1 + PetscSqrtReal(1+t*t));
1026: } else {
1027: t = dd/(2*e);
1028: ss = (t>=0)?1.0:-1.0;
1029: t = ss/(PetscAbsReal(t)+PetscSqrtReal(1+t*t));
1030: }
1031: Y[0] = 1/PetscSqrtReal(1 + t*t); Y[3] = Y[0]; /* c */
1032: Y[1] = Y[0]*t; Y[2] = -Y[1]; /* s */
1033: wr1 = d1+t*e;
1034: wr2 = d2-t*e;
1035: ss1 = s1; ss2 = s2;
1036: isreal = PETSC_TRUE;
1037: } else {
1038: ss1 = 1.0; ss2 = 1.0,
1039: M[0] = d1; M[1] = e; M[2] = e; M[3]= d2;
1040: b[0] = s1; b[1] = 0.0; b[2] = 0.0; b[3] = s2;
1041: ep = LAPACKlamch_("S");
1042: /* Compute eigenvalues of the block */
1043: LAPACKlag2_(M, &two, b, &two,&ep , &scal1, &scal2, &wr1, &wr2, &wi);
1044: if (wi==0.0) { /* Real eigenvalues */
1045: isreal = PETSC_TRUE;
1046: if (scal1<ep||scal2<ep) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FP,"Nearly infinite eigenvalue");
1047: wr1 /= scal1; wr2 /= scal2;
1048: if ( PetscAbsReal(s1*d1-wr1)<PetscAbsReal(s2*d2-wr1)) { Y[0] = wr1-s2*d2; Y[1] =s2*e;}
1049: else{ Y[0] = s1*e; Y[1] = wr1-s1*d1; }
1050: /* normalize with a signature*/
1051: maxy = PetscMax(PetscAbsScalar(Y[0]),PetscAbsScalar(Y[1]));
1052: scal1 = PetscRealPart(Y[0])/maxy; scal2 = PetscRealPart(Y[1])/maxy;
1053: snorm = scal1*scal1*s1 + scal2*scal2*s2;
1054: if (snorm<0) {ss1 = -1.0; snorm = -snorm;}
1055: snorm = maxy*PetscSqrtReal(snorm); Y[0] = Y[0]/snorm; Y[1] = Y[1]/snorm;
1056: if ( PetscAbsReal(s1*d1-wr2)<PetscAbsReal(s2*d2-wr2)) { Y[2] = wr2-s2*d2; Y[3] =s2*e;}
1057: else{ Y[2] = s1*e; Y[3] = wr2-s1*d1; }
1058: maxy = PetscMax(PetscAbsScalar(Y[2]),PetscAbsScalar(Y[3]));
1059: scal1 = PetscRealPart(Y[2])/maxy; scal2 = PetscRealPart(Y[3])/maxy;
1060: snorm = scal1*scal1*s1 + scal2*scal2*s2;
1061: if (snorm<0) {ss2 = -1.0; snorm = -snorm;}
1062: snorm = maxy*PetscSqrtReal(snorm);Y[2] = Y[2]/snorm; Y[3] = Y[3]/snorm;
1063: }
1064: wr1 *= ss1; wr2 *= ss2;
1065: }
1066: if (isreal) {
1067: if (ds->compact) {
1068: D[i] = ss1;;
1069: T[i] = wr1;
1070: D[i+1] = ss2;
1071: T[i+1] = wr2;
1072: T[ld+i] = 0.0;
1073: }else {
1074: B[i*ld+i] = ss1;
1075: A[i*ld+i] = wr1;
1076: B[(i+1)*ld+i+1] = ss2;
1077: A[(i+1)*ld+i+1] = wr2;
1078: A[(i+1)+ld*i] = 0.0;
1079: A[i+ld*(i+1)] = 0.0;
1080: }
1081: BLASgemm_("N","N",&m,&two,&two,&oneS,ds->mat[DS_MAT_Q]+ds->l+i*ld,&ld,Y,&two,&zeroS,ds->work,&m);
1082: PetscMemcpy(ds->mat[DS_MAT_Q]+ds->l+i*ld,ds->work,m*sizeof(PetscScalar));
1083: PetscMemcpy(ds->mat[DS_MAT_Q]+ds->l+(i+1)*ld,ds->work+m,m*sizeof(PetscScalar));
1084: }
1085: i++;
1086: }
1087: }
1088: return(0);
1089: }
1093: PetscErrorCode DSSolve_GHIEP_QR_II(DS ds,PetscScalar *wr,PetscScalar *wi)
1094: {
1095: #if defined(PETSC_MISSING_LAPACK_HSEQR)
1097: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"HSEQR - Lapack routine is unavailable");
1098: #else
1100: PetscInt i,off;
1101: PetscBLASInt n1,ld,one,info,lwork;
1102: PetscScalar *H,*A,*B,*Q,*work;
1103: PetscReal *d,*e,*s;
1106: one = 1;
1107: n1 = PetscBLASIntCast(ds->n - ds->l);
1108: ld = PetscBLASIntCast(ds->ld);
1109: off = ds->l + ds->l*ld;
1110: A = ds->mat[DS_MAT_A];
1111: B = ds->mat[DS_MAT_B];
1112: Q = ds->mat[DS_MAT_Q];
1113: d = ds->rmat[DS_MAT_T];
1114: e = ds->rmat[DS_MAT_T] + ld;
1115: s = ds->rmat[DS_MAT_D];
1116: DSAllocateWork_Private(ds,ld*ld,2*ld,ld*2);
1117: work = ds->work;
1118: lwork = ld*ld;
1120: /* Quick return if possible */
1121: if (n1 == 1) {
1122: *(Q+off) = 1;
1123: if (ds->compact) {
1124: wr[ds->l] = d[ds->l]/s[ds->l];
1125: wi[ds->l] = 0.0;
1126: } else {
1127: d[ds->l] = PetscRealPart(A[off]);
1128: s[ds->l] = PetscRealPart(B[off]);
1129: wr[ds->l] = d[ds->l]/s[ds->l];
1130: wi[ds->l] = 0.0;
1131: }
1132: return(0);
1133: }
1134: /* Reduce to pseudotriadiagonal form */
1135: DSIntermediate_GHIEP( ds);
1137: /* Compute Eigenvalues (QR)*/
1138: DSAllocateMat_Private(ds,DS_MAT_W);
1139: H = ds->mat[DS_MAT_W];
1140: if (ds->compact) {
1141: H[off] = d[ds->l]*s[ds->l];
1142: H[off+ld] = e[ds->l]*s[ds->l];
1143: for (i=ds->l+1;i<ds->n-1;i++) {
1144: H[i+(i-1)*ld] = e[i-1]*s[i];
1145: H[i+i*ld] = d[i]*s[i];
1146: H[i+(i+1)*ld] = e[i]*s[i];
1147: }
1148: H[ds->n-1+(ds->n-2)*ld] = e[ds->n-2]*s[ds->n-1];
1149: H[ds->n-1+(ds->n-1)*ld] = d[ds->n-1]*s[ds->n-1];
1150: } else {
1151: s[ds->l] = PetscRealPart(B[off]);
1152: H[off] = A[off]*s[ds->l];
1153: H[off+ld] = A[off+ld]*s[ds->l];
1154: for (i=ds->l+1;i<ds->n-1;i++) {
1155: s[i] = PetscRealPart(B[i+i*ld]);
1156: H[i+(i-1)*ld] = A[i+(i-1)*ld]*s[i];
1157: H[i+i*ld] = A[i+i*ld]*s[i];
1158: H[i+(i+1)*ld] = A[i+(i+1)*ld]*s[i];
1159: }
1160: s[ds->n-1] = PetscRealPart(B[ds->n-1+(ds->n-1)*ld]);
1161: H[ds->n-1+(ds->n-2)*ld] = A[ds->n-1+(ds->n-2)*ld]*s[ds->n-1];
1162: H[ds->n-1+(ds->n-1)*ld] = A[ds->n-1+(ds->n-1)*ld]*s[ds->n-1];
1163: }
1165: #if !defined(PETSC_USE_COMPLEX)
1166: LAPACKhseqr_("E","N",&n1,&one,&n1,H+off,&ld,wr+ds->l,wi+ds->l,PETSC_NULL,&ld,work,&lwork,&info);
1167: #else
1168: LAPACKhseqr_("E","N",&n1,&one,&n1,H+off,&ld,wr,PETSC_NULL,&ld,work,&lwork,&info);
1169: #endif
1170: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xHSEQR %d",&info);
1172: /* Compute Eigenvectors with Inverse Iteration */
1173: DSGHIEPPseudoOrthogInverseIteration(ds,wr,wi);
1175: /* Recover eigenvalues from diagonal */
1176: DSGHIEPComplexEigs(ds, 0, ds->l, wr, wi);
1177: return(0);
1178: #endif
1179: }
1183: PetscErrorCode DSSolve_GHIEP_QR(DS ds,PetscScalar *wr,PetscScalar *wi)
1184: {
1185: #if defined(SLEPC_MISSING_LAPACK_GEHRD) || defined(SLEPC_MISSING_LAPACK_ORGHR) || defined(PETSC_MISSING_LAPACK_HSEQR)
1187: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEHRD/ORGHR/HSEQR - Lapack routines are unavailable");
1188: #else
1190: PetscInt i,j,off;
1191: PetscBLASInt lwork,info,n1,one=1,mout,ld;
1192: PetscScalar *A,*B,*H,*Q,*work,*tau;
1193: PetscReal *d,*e,*s;
1196: n1 = PetscBLASIntCast(ds->n - ds->l);
1197: ld = PetscBLASIntCast(ds->ld);
1198: off = ds->l + ds->l*ld;
1199: A = ds->mat[DS_MAT_A];
1200: B = ds->mat[DS_MAT_B];
1201: Q = ds->mat[DS_MAT_Q];
1202: d = ds->rmat[DS_MAT_T];
1203: e = ds->rmat[DS_MAT_T] + ld;
1204: s = ds->rmat[DS_MAT_D];
1205: DSAllocateMat_Private(ds,DS_MAT_W);
1206: H = ds->mat[DS_MAT_W];
1207: DSAllocateWork_Private(ds,ld+ld*ld,ld,0);
1208: tau = ds->work;
1209: work = ds->work+ld;
1210: lwork = ld*ld;
1212: /* initialize orthogonal matrix */
1213: PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
1214: for (i=0;i< ds->n;i++)
1215: Q[i+i*ld] = 1.0;
1216: /* quick return */
1217: if (n1 == 1) {
1218: if (ds->compact) {
1219: wr[ds->l] = d[ds->l]/s[ds->l];
1220: wi[ds->l] = 0.0;
1221: } else {
1222: d[ds->l] = PetscRealPart(A[off]);
1223: s[ds->l] = PetscRealPart(B[off]);
1224: wr[ds->l] = d[ds->l]/s[ds->l];
1225: wi[ds->l] = 0.0;
1226: }
1227: return(0);
1228: }
1230: /* form standard problem in H */
1231: if (ds->compact) {
1232: PetscMemzero(H,ld*ld*sizeof(PetscScalar));
1233: for (i=ds->l; i < ds->n-1; i++) {
1234: H[i+i*ld] = d[i]/s[i];
1235: H[(i+1)+i*ld] = e[i]/s[i+1];
1236: H[i+(i+1)*ld] = e[i]/s[i];
1237: }
1238: H[ds->n-1 + (ds->n-1)*ld] = d[ds->n-1]/s[ds->n-1];
1240: for (i=ds->l; i < ds->k; i++) {
1241: H[ds->k+i*ld] = *(ds->rmat[DS_MAT_T]+2*ld+i)/s[ds->k];
1242: H[i+ds->k*ld] = *(ds->rmat[DS_MAT_T]+2*ld+i)/s[i];
1243: }
1244: } else {
1245: for (j=ds->l; j<ds->n; j++) {
1246: for (i=ds->l; i<ds->n; i++) {
1247: H[i+j*ld] = A[i+j*ld]/B[i+i*ld];
1248: }
1249: }
1250: }
1251: /* reduce to upper Hessenberg form */
1252: if (ds->state<DS_STATE_INTERMEDIATE) {
1253: LAPACKgehrd_(&n1,&one,&n1,H+off,&ld,tau,work,&lwork,&info);
1254: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEHRD %d",&info);
1255: for (j=ds->l;j<ds->n-1;j++) {
1256: for (i=j+2;i<ds->n;i++) {
1257: Q[i+j*ld] = H[i+j*ld];
1258: H[i+j*ld] = 0.0;
1259: }
1260: }
1261: LAPACKorghr_(&n1,&one,&n1,Q+off,&ld,tau,work,&lwork,&info);
1262: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGHR %d",&info);
1263: }
1265: /* Compute the real Schur form */
1266: #if !defined(PETSC_USE_COMPLEX)
1267: LAPACKhseqr_("S","V",&n1,&one,&n1,H+off,&ld,wr+ds->l,wi+ds->l,Q+off,&ld,work,&lwork,&info);
1268: #else
1269: LAPACKhseqr_("S","V",&n1,&one,&n1,H+off,&ld,wr,Q+off,&ld,work,&lwork,&info);
1270: #endif
1271: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xHSEQR %d",&info);
1272:
1273: /* Compute eigenvectors */
1274: #if !defined(PETSC_USE_COMPLEX)
1275: LAPACKtrevc_("R","B",PETSC_NULL,&n1,H+off,&ld,PETSC_NULL,&ld,Q+off,&ld,&n1,&mout,ds->work,&info);
1276: #else
1277: LAPACKtrevc_("R","B",PETSC_NULL,&n1,H+off,&ld,PETSC_NULL,&ld,Q+off,&ld,&n1,&mout,work,ds->rwork,&info);
1278: #endif
1279: if (info) SETERRQ1(((PetscObject)ds)->comm,PETSC_ERR_LIB,"Error in Lapack xTREVC %i",&info);
1281: /* Compute real s-orthonormal basis */
1282: DSEigenVectorsPseudoOrthog(ds, DS_MAT_Q, wr, wi,PETSC_FALSE);
1284: /* Undo from diagonal the blocks whith real eigenvalues*/
1285: DSGHIEPRealBlocks(ds);
1287: /* Recover eigenvalues from diagonal */
1288: DSGHIEPComplexEigs(ds, 0, ds->l, wr, wi);
1289: return(0);
1290: #endif
1291: }
1295: PetscErrorCode DSNormalize_GHIEP(DS ds,DSMatType mat,PetscInt col)
1296: {
1298: PetscInt i,i0,i1;
1299: PetscBLASInt ld,n,one = 1;
1300: PetscScalar *A = ds->mat[DS_MAT_A],norm,*x;
1301: #if !defined(PETSC_USE_COMPLEX)
1302: PetscScalar norm0;
1303: #endif
1306: switch (mat) {
1307: case DS_MAT_X:
1308: case DS_MAT_Y:
1309: case DS_MAT_Q:
1310: /* Supported matrices */
1311: break;
1312: case DS_MAT_U:
1313: case DS_MAT_VT:
1314: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
1315: break;
1316: default:
1317: SETERRQ(((PetscObject)ds)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
1318: }
1320: n = PetscBLASIntCast(ds->n);
1321: ld = PetscBLASIntCast(ds->ld);
1322: DSGetArray(ds,mat,&x);
1323: if (col < 0) {
1324: i0 = 0; i1 = ds->n;
1325: } else if (col>0 && A[ds->ld*(col-1)+col] != 0.0) {
1326: i0 = col-1; i1 = col+1;
1327: } else {
1328: i0 = col; i1 = col+1;
1329: }
1330: for (i=i0; i<i1; i++) {
1331: #if !defined(PETSC_USE_COMPLEX)
1332: if (i<n-1 && A[ds->ld*i+i+1] != 0.0) {
1333: norm = BLASnrm2_(&n,&x[ld*i],&one);
1334: norm0 = BLASnrm2_(&n,&x[ld*(i+1)],&one);
1335: norm = 1.0/SlepcAbsEigenvalue(norm,norm0);
1336: BLASscal_(&n,&norm,&x[ld*i],&one);
1337: BLASscal_(&n,&norm,&x[ld*(i+1)],&one);
1338: i++;
1339: } else
1340: #endif
1341: {
1342: norm = BLASnrm2_(&n,&x[ld*i],&one);
1343: norm = 1.0/norm;
1344: BLASscal_(&n,&norm,&x[ld*i],&one);
1345: }
1346: }
1347: return(0);
1348: }
1350: extern PetscErrorCode DSSolve_GHIEP_HZ(DS,PetscScalar*,PetscScalar*);
1351: extern PetscErrorCode DSSolve_GHIEP_DQDS_II(DS,PetscScalar*,PetscScalar*);
1353: EXTERN_C_BEGIN
1356: PetscErrorCode DSCreate_GHIEP(DS ds)
1357: {
1359: ds->ops->allocate = DSAllocate_GHIEP;
1360: ds->ops->view = DSView_GHIEP;
1361: ds->ops->vectors = DSVectors_GHIEP;
1362: ds->ops->solve[0] = DSSolve_GHIEP_HZ;
1363: ds->ops->solve[1] = DSSolve_GHIEP_QR_II;
1364: ds->ops->solve[2] = DSSolve_GHIEP_QR;
1365: ds->ops->solve[3] = DSSolve_GHIEP_DQDS_II;
1366: ds->ops->sort = DSSort_GHIEP;
1367: ds->ops->normalize = DSNormalize_GHIEP;
1368: return(0);
1369: }
1370: EXTERN_C_END