Actual source code: dsghiep.c

  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2012, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:       
  8:    SLEPc is free software: you can redistribute it and/or modify it under  the
  9:    terms of version 3 of the GNU Lesser General Public License as published by
 10:    the Free Software Foundation.

 12:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY 
 13:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS 
 14:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for 
 15:    more details.

 17:    You  should have received a copy of the GNU Lesser General  Public  License
 18:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 19:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 20: */
 21: #include <slepc-private/dsimpl.h>      /*I "slepcds.h" I*/
 22: #include <slepcblaslapack.h>

 26: PetscErrorCode DSAllocate_GHIEP(DS ds,PetscInt ld)
 27: {

 31:   DSAllocateMat_Private(ds,DS_MAT_A);
 32:   DSAllocateMat_Private(ds,DS_MAT_B);
 33:   DSAllocateMat_Private(ds,DS_MAT_Q);
 34:   DSAllocateMatReal_Private(ds,DS_MAT_T);
 35:   DSAllocateMatReal_Private(ds,DS_MAT_D);
 36:   PetscFree(ds->perm);
 37:   PetscMalloc(ld*sizeof(PetscInt),&ds->perm);
 38:   PetscLogObjectMemory(ds,ld*sizeof(PetscInt));
 39:   return(0);
 40: }

 44: PetscErrorCode DSSwitchFormat_GHIEP(DS ds,PetscBool tocompact)
 45: {
 47:   PetscReal      *T,*S;
 48:   PetscScalar    *A,*B;
 49:   PetscInt       i,n,ld;

 52:   A = ds->mat[DS_MAT_A];
 53:   B = ds->mat[DS_MAT_B];
 54:   T = ds->rmat[DS_MAT_T];
 55:   S = ds->rmat[DS_MAT_D];
 56:   n = ds->n;
 57:   ld = ds->ld;
 58:   if (tocompact) { /* switch from dense (arrow) to compact storage */
 59:     PetscMemzero(T,3*ld*sizeof(PetscReal));
 60:     PetscMemzero(S,ld*sizeof(PetscReal));
 61:     for (i=0;i<n-1;i++) {
 62:       T[i] = PetscRealPart(A[i+i*ld]);
 63:       T[ld+i] = PetscRealPart(A[i+1+i*ld]);
 64:       S[i] = PetscRealPart(B[i+i*ld]);
 65:     }
 66:     T[n-1] = PetscRealPart(A[n-1+(n-1)*ld]);
 67:     S[n-1] = PetscRealPart(B[n-1+(n-1)*ld]);
 68:     for (i=ds->l;i< ds->k;i++) T[2*ld+i] = PetscRealPart(A[ds->k+i*ld]);
 69:   }else { /* switch from compact (arrow) to dense storage */
 70:     PetscMemzero(A,ld*ld*sizeof(PetscScalar));
 71:     PetscMemzero(B,ld*ld*sizeof(PetscScalar));
 72:     for (i=0;i<n-1;i++) {
 73:       A[i+i*ld] = T[i];
 74:       A[i+1+i*ld] = T[ld+i];
 75:       A[i+(i+1)*ld] = T[ld+i];
 76:       B[i+i*ld] = S[i];
 77:     }
 78:     A[n-1+(n-1)*ld] = T[n-1];
 79:     B[n-1+(n-1)*ld] = S[n-1];
 80:     for (i=ds->l;i<ds->k;i++) {
 81:       A[ds->k+i*ld] = T[2*ld+i];
 82:       A[i+ds->k*ld] = T[2*ld+i];
 83:     }
 84:   }
 85:   return(0);
 86: }

 90: PetscErrorCode DSView_GHIEP(DS ds,PetscViewer viewer)
 91: {
 92:   PetscErrorCode    ierr;
 93:   PetscViewerFormat format;
 94:   PetscInt          i,j;
 95:   PetscReal         value;
 96:   const char *methodname[] = {
 97:                      "HR method",
 98:                      "QR + Inverse Iteration",
 99:                      "QR",
100:                      "DQDS + Inverse Iteration "
101:   };

104:   PetscViewerGetFormat(viewer,&format);
105:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
106:     PetscViewerASCIIPrintf(viewer,"solving the problem with: %s\n",methodname[ds->method]);
107:     return(0);
108:   }
109:   if (ds->compact) {
110:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
111:     if (format == PETSC_VIEWER_ASCII_MATLAB) {
112:       PetscViewerASCIIPrintf(viewer,"%% Size = %D %D\n",ds->n,ds->n);
113:       PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",3*ds->n);
114:       PetscViewerASCIIPrintf(viewer,"zzz = [\n");
115:       for (i=0;i<ds->n;i++) {
116:         PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",i+1,i+1,*(ds->rmat[DS_MAT_T]+i));
117:       }
118:       for (i=0;i<ds->n-1;i++) {
119:         if (*(ds->rmat[DS_MAT_T]+ds->ld+i) !=0 && i!=ds->k-1) {
120:           PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",i+2,i+1,*(ds->rmat[DS_MAT_T]+ds->ld+i));
121:           PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",i+1,i+2,*(ds->rmat[DS_MAT_T]+ds->ld+i));
122:         }
123:       }
124:       for (i = ds->l;i<ds->k;i++) {
125:         PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",ds->k+1,i+1,*(ds->rmat[DS_MAT_T]+2*ds->ld+i));
126:           PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",i+1,ds->k+1,*(ds->rmat[DS_MAT_T]+2*ds->ld+i));
127:       }
128:       PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(zzz);\n",DSMatName[DS_MAT_A]);
129: 
130:       PetscViewerASCIIPrintf(viewer,"%% Size = %D %D\n",ds->n,ds->n);
131:       PetscViewerASCIIPrintf(viewer,"omega = zeros(%D,3);\n",3*ds->n);
132:       PetscViewerASCIIPrintf(viewer,"omega = [\n");
133:       for (i=0;i<ds->n;i++) {
134:         PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",i+1,i+1,*(ds->rmat[DS_MAT_D]+i));
135:       }
136:       PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(omega);\n",DSMatName[DS_MAT_B]);

138:     } else {
139:       PetscViewerASCIIPrintf(viewer,"T\n");
140:       for (i=0;i<ds->n;i++) {
141:         for (j=0;j<ds->n;j++) {
142:           if (i==j) value = *(ds->rmat[DS_MAT_T]+i);
143:           else if (i==j+1 || j==i+1) value = *(ds->rmat[DS_MAT_T]+ds->ld+PetscMin(i,j));
144:           else if ((i<ds->k && j==ds->k) || (i==ds->k && j<ds->k)) value = *(ds->rmat[DS_MAT_T]+2*ds->ld+PetscMin(i,j));
145:           else value = 0.0;
146:           PetscViewerASCIIPrintf(viewer," %18.16e ",value);
147:         }
148:         PetscViewerASCIIPrintf(viewer,"\n");
149:       }
150:       PetscViewerASCIIPrintf(viewer,"omega\n");
151:       for (i=0;i<ds->n;i++) {
152:         for (j=0;j<ds->n;j++) {
153:           if (i==j) value = *(ds->rmat[DS_MAT_D]+i);
154:           else value = 0.0;
155:           PetscViewerASCIIPrintf(viewer," %18.16e ",value);
156:         }
157:         PetscViewerASCIIPrintf(viewer,"\n");
158:       }
159:     }
160:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
161:     PetscViewerFlush(viewer);
162:   } else {
163:     DSViewMat_Private(ds,viewer,DS_MAT_A);
164:     DSViewMat_Private(ds,viewer,DS_MAT_B);
165:   }
166:   if (ds->state>DS_STATE_INTERMEDIATE) {
167:     DSViewMat_Private(ds,viewer,DS_MAT_Q);
168:   }
169:   return(0);
170: }

174: static PetscErrorCode DSVectors_GHIEP_Eigen_Some(DS ds,PetscInt *idx,PetscReal *rnorm)
175: {
177:   PetscReal      b[4],M[4],d1,d2,s1,s2,e;
178:   PetscReal      scal1,scal2,wr1,wr2,wi,ep,norm;
179:   PetscScalar    *Q,*X,Y[4],alpha,zeroS = 0.0;
180:   PetscInt       k;
181:   PetscBLASInt   two = 2,n_,ld,one=1;
182: #if !defined(PETSC_USE_COMPLEX)
183:   PetscBLASInt   four=4;
184: #endif
185: 
187:   X = ds->mat[DS_MAT_X];
188:   Q = ds->mat[DS_MAT_Q];
189:   k = *idx;
190:   n_ = PetscBLASIntCast(ds->n);
191:   ld = PetscBLASIntCast(ds->ld);
192:   if (k < ds->n-1) {
193:    e = (ds->compact)?*(ds->rmat[DS_MAT_T]+ld+k):PetscRealPart(*(ds->mat[DS_MAT_A]+(k+1)+ld*k));
194:   } else e = 0.0;
195:   if (e == 0.0) {/* Real */
196:      if (ds->state>=DS_STATE_CONDENSED) {
197:        PetscMemcpy(X+k*ld,Q+k*ld,ld*sizeof(PetscScalar));
198:      } else {
199:        PetscMemzero(X+k*ds->ld,ds->ld*sizeof(PetscScalar));
200:        X[k+k*ds->ld] = 1.0;
201:      }
202:      if (rnorm) {
203:        *rnorm = PetscAbsScalar(X[ds->n-1+k*ld]);
204:      }
205:   } else { /* 2x2 block */
206:     if (ds->compact) {
207:       s1 = *(ds->rmat[DS_MAT_D]+k);
208:       d1 = *(ds->rmat[DS_MAT_T]+k);
209:       s2 = *(ds->rmat[DS_MAT_D]+k+1);
210:       d2 = *(ds->rmat[DS_MAT_T]+k+1);
211:     } else {
212:       s1 = PetscRealPart(*(ds->mat[DS_MAT_B]+k*ld+k));
213:       d1 = PetscRealPart(*(ds->mat[DS_MAT_A]+k+k*ld));
214:       s2 = PetscRealPart(*(ds->mat[DS_MAT_B]+(k+1)*ld+k+1));
215:       d2 = PetscRealPart(*(ds->mat[DS_MAT_A]+k+1+(k+1)*ld));
216:     }
217:     M[0] = d1; M[1] = e; M[2] = e; M[3]= d2;
218:     b[0] = s1; b[1] = 0.0; b[2] = 0.0; b[3] = s2;
219:     ep = LAPACKlamch_("S");
220:     /* Compute eigenvalues of the block */
221:     LAPACKlag2_(M, &two, b, &two, &ep, &scal1, &scal2, &wr1, &wr2, &wi);
222:     if (wi==0.0) { /* Real eigenvalues */
223:       SETERRQ(PETSC_COMM_SELF,1,"Real block in DSVectors_GHIEP");
224:     } else { /* Complex eigenvalues */
225:       if (scal1<ep) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FP,"Nearly infinite eigenvalue");
226:       wr1 /= scal1; wi /= scal1;
227: #if !defined(PETSC_USE_COMPLEX)
228:       if ( SlepcAbs(s1*d1-wr1,wi)<SlepcAbs(s2*d2-wr1,wi)) {
229:         Y[0] = wr1-s2*d2; Y[1] = s2*e; Y[2] = wi; Y[3] = 0.0;
230:       } else {
231:         Y[0] = s1*e; Y[1] = wr1-s1*d1; Y[2] = 0.0; Y[3] = wi;
232:       }
233:       norm = BLASnrm2_(&four,Y,&one);
234:       norm = 1/norm;
235:       if (ds->state >= DS_STATE_CONDENSED) {
236:         alpha = norm;
237:         BLASgemm_("N","N",&n_,&two,&two,&alpha,ds->mat[DS_MAT_Q]+k*ld,&ld,Y,&two,&zeroS,X+k*ld,&ld);
238:         if (rnorm) *rnorm = SlepcAbsEigenvalue(X[ds->n-1+k*ld],X[ds->n-1+(k+1)*ld]);
239:       } else {
240:         PetscMemzero(X+k*ld,2*ld*sizeof(PetscScalar));
241:         X[k*ld+k] = Y[0]*norm; X[k*ld+k+1] = Y[1]*norm;
242:         X[(k+1)*ld+k] = Y[2]*norm; X[(k+1)*ld+k+1] = Y[3]*norm;
243:       }
244: #else
245:       if ( SlepcAbs(s1*d1-wr1,wi)<SlepcAbs(s2*d2-wr1,wi)) {
246:         Y[0] = wr1-s2*d2+PETSC_i*wi; Y[1] = s2*e;
247:       } else {
248:         Y[0] = s1*e; Y[1] = wr1-s1*d1+PETSC_i*wi;
249:       }
250:       norm = BLASnrm2_(&two,Y,&one);
251:       norm = 1/norm;
252:       if (ds->state >= DS_STATE_CONDENSED) {
253:         alpha = norm;
254:         BLASgemv_("N",&n_,&two,&alpha,ds->mat[DS_MAT_Q]+k*ld,&ld,Y,&one,&zeroS,X+k*ld,&one);
255:         if (rnorm) *rnorm = PetscAbsScalar(X[ds->n-1+k*ld]);
256:       } else {
257:         PetscMemzero(X+k*ld,2*ld*sizeof(PetscScalar));
258:         X[k*ld+k] = Y[0]*norm; X[k*ld+k+1] = Y[1]*norm;
259:       }
260:       X[(k+1)*ld+k] = PetscConj(X[k*ld+k]); X[(k+1)*ld+k+1] = PetscConj(X[k*ld+k+1]);
261: #endif
262:       (*idx)++;
263:     }
264:   }
265:   return(0);
266: }

270: PetscErrorCode DSVectors_GHIEP(DS ds,DSMatType mat,PetscInt *k,PetscReal *rnorm)
271: {
272:   PetscInt       i;
273:   PetscReal      e;

277:   switch (mat) {
278:     case DS_MAT_X:
279:       if (k) {
280:         DSVectors_GHIEP_Eigen_Some(ds,k,rnorm);
281:       } else {
282:         for (i=0; i<ds->n; i++) {
283:           e = (ds->compact)?*(ds->rmat[DS_MAT_T]+ds->ld+i):PetscRealPart(*(ds->mat[DS_MAT_A]+(i+1)+ds->ld*i));
284:           if (e == 0.0) {/* real */
285:             if (ds->state >= DS_STATE_CONDENSED) {
286:               PetscMemcpy(ds->mat[mat]+i*ds->ld,ds->mat[DS_MAT_Q]+i*ds->ld,ds->ld*sizeof(PetscScalar));
287:             } else {
288:               PetscMemzero(ds->mat[mat]+i*ds->ld,ds->ld*sizeof(PetscScalar));
289:               *(ds->mat[mat]+i+i*ds->ld) = 1.0;
290:             }
291:           } else {
292:             DSVectors_GHIEP_Eigen_Some(ds,&i,rnorm);
293:           }
294:         }
295:       }
296:       break;
297:     case DS_MAT_Y:
298:     case DS_MAT_U:
299:     case DS_MAT_VT:
300:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
301:       break;
302:     default:
303:       SETERRQ(((PetscObject)ds)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
304:   }
305:   return(0);
306: }

310: /*
311:   Extract the eigenvalues contained in the block-diagonal of the indefinite problem.
312:   Only the index range n0..n1 is processed.
313: */
314: PetscErrorCode DSGHIEPComplexEigs(DS ds, PetscInt n0, PetscInt n1, PetscScalar *wr, PetscScalar *wi)
315: {
316:   PetscInt     k,ld;
317:   PetscBLASInt two=2;
318:   PetscScalar  *A,*B;
319:   PetscReal    *D,*T;
320:   PetscReal    b[4],M[4],d1,d2,s1,s2,e;
321:   PetscReal    scal1,scal2,ep,wr1,wr2,wi1;

324:   ld = ds->ld;
325:   A = ds->mat[DS_MAT_A];
326:   B = ds->mat[DS_MAT_B];
327:   D = ds->rmat[DS_MAT_D];
328:   T = ds->rmat[DS_MAT_T];
329:   for (k=n0;k<n1;k++) {
330:     if (k < n1-1) {
331:       e = (ds->compact)?T[ld+k]:PetscRealPart(A[(k+1)+ld*k]);
332:     }else e = 0.0;
333:     if (e==0.0) {
334:       /* real eigenvalue */
335:       wr[k] = (ds->compact)?T[k]/D[k]:A[k+k*ld]/B[k+k*ld];
336: #if !defined(PETSC_USE_COMPLEX)
337:       wi[k] = 0.0 ;
338: #endif
339:     } else {
340:       /* diagonal block */
341:       if (ds->compact) {
342:         s1 = D[k];
343:         d1 = T[k];
344:         s2 = D[k+1];
345:         d2 = T[k+1];
346:       } else {
347:         s1 = PetscRealPart(B[k*ld+k]);
348:         d1 = PetscRealPart(A[k+k*ld]);
349:         s2 = PetscRealPart(B[(k+1)*ld+k+1]);
350:         d2 = PetscRealPart(A[k+1+(k+1)*ld]);
351:       }
352:       M[0] = d1; M[1] = e; M[2] = e; M[3]= d2;
353:       b[0] = s1; b[1] = 0.0; b[2] = 0.0; b[3] = s2;
354:       ep = LAPACKlamch_("S");
355:       /* Compute eigenvalues of the block */
356:       LAPACKlag2_(M, &two, b, &two, &ep, &scal1, &scal2, &wr1, &wr2, &wi1);
357:       if (scal1<ep) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FP,"Nearly infinite eigenvalue");
358:       wr[k] = wr1/scal1;
359:       if (wi1==0.0) { /* Real eigenvalues */
360:         if (scal2<ep) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FP,"Nearly infinite eigenvalue");
361:         wr[k+1] = wr2/scal2;
362: #if !defined(PETSC_USE_COMPLEX)
363:         wi[k] = 0.0;
364:         wi[k+1] = 0.0;
365: #endif
366:       } else { /* Complex eigenvalues */
367: #if !defined(PETSC_USE_COMPLEX)
368:         wr[k+1] = wr[k];
369:         wi[k] = wi1/scal1;
370:         wi[k+1] = -wi[k];
371: #else
372:         wr[k] += PETSC_i*wi1/scal1;
373:         wr[k+1] = PetscConj(wr[k]);
374: #endif
375:       }
376:       k++;
377:     }
378:   }
379: #if defined(PETSC_USE_COMPLEX)
380:   if (wi) {
381:     for (k=n0;k<n1;k++) wi[k] = 0.0;
382:   }
383: #endif
384:   return(0);
385: }

389: PetscErrorCode DSSort_GHIEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
390: {
392:   PetscInt       n,i,*perm;
393:   PetscReal      *d,*e,*s;

396: #if !defined(PETSC_USE_COMPLEX)
398: #endif
399:   n = ds->n;
400:   d = ds->rmat[DS_MAT_T];
401:   e = d + ds->ld;
402:   s = ds->rmat[DS_MAT_D];
403:   DSAllocateWork_Private(ds,ds->ld,ds->ld,0);
404:   perm = ds->perm;
405:   if (!rr) {
406:     rr = wr;
407:     ri = wi;
408:   }
409:   DSSortEigenvalues_Private(ds,rr,ri,perm,PETSC_TRUE);
410:   if (!ds->compact) {DSSwitchFormat_GHIEP(ds,PETSC_TRUE);}
411:   PetscMemcpy(ds->work,wr,n*sizeof(PetscScalar));
412:   for (i=ds->l;i<n;i++) {
413:     wr[i] = *(ds->work + perm[i]);
414:   }
415: #if !defined(PETSC_USE_COMPLEX)
416:   PetscMemcpy(ds->work,wi,n*sizeof(PetscScalar));
417:   for (i=ds->l;i<n;i++) {
418:     wi[i] = *(ds->work + perm[i]);
419:   }
420: #endif
421:   PetscMemcpy(ds->rwork,s,n*sizeof(PetscReal));
422:   for (i=ds->l;i<n;i++) {
423:     s[i] = *(ds->rwork+perm[i]);
424:   }
425:   PetscMemcpy(ds->rwork,d,n*sizeof(PetscReal));
426:   for (i=ds->l;i<n;i++) {
427:     d[i] = *(ds->rwork  + perm[i]);
428:   }
429:   PetscMemcpy(ds->rwork,e,(n-1)*sizeof(PetscReal));
430:   PetscMemzero(e+ds->l,(n-1-ds->l)*sizeof(PetscScalar));
431:   for (i=ds->l;i<n-1;i++) {
432:     if (perm[i]<n-1) e[i] = *(ds->rwork + perm[i]);
433:   }
434:   if (!ds->compact) { DSSwitchFormat_GHIEP(ds,PETSC_FALSE);}
435:   DSPermuteColumns_Private(ds,ds->l,n,DS_MAT_Q,perm);
436:   return(0);
437: }

441: /*
442:   Generates a hyperbolic rotation
443:     if x1*x1 - x2*x2 != 0 
444:       r = sqrt( |x1*x1 - x2*x2| )
445:       c = x1/r  s = x2/r
446:      
447:       | c -s||x1|   |d*r|
448:       |-s  c||x2| = | 0 | 
449:       where d = 1 for type==1 and -1 for type==2
450:   Returns the condition number of the reduction
451: */
452: static PetscErrorCode HRGen(PetscReal x1,PetscReal x2,PetscInt *type,PetscReal *c,PetscReal *s,PetscReal *r,PetscReal *cond)
453: {
454:   PetscReal t,n2,xa,xb;
455:   PetscInt  type_;

458:   if (x2==0) {
459:     *r = PetscAbsReal(x1);
460:     *c = (x1>=0)?1.0:-1.0;
461:     *s = 0.0;
462:     if (type) *type = 1;
463:     return(0);
464:   }
465:   if (PetscAbsReal(x1) == PetscAbsReal(x2)) {
466:     /* hyperbolic rotation doesn't exist */
467:     *c = 0;
468:     *s = 0;
469:     *r = 0;
470:     if (type) *type = 0;
471:     *cond = PETSC_MAX_REAL;
472:     return(0);
473:   }
474: 
475:   if (PetscAbsReal(x1)>PetscAbsReal(x2)) {
476:     xa = x1; xb = x2; type_ = 1;
477:   } else {
478:     xa = x2; xb = x1; type_ = 2;
479:   }
480:   t = xb/xa;
481:   n2 = PetscAbsReal(1 - t*t);
482:   *r = PetscSqrtReal(n2)*PetscAbsReal(xa);
483:   *c = x1/(*r);
484:   *s = x2/(*r);
485:   if (type_ == 2) *r *= -1;
486:   if (type) *type = type_;
487:   if (cond) *cond = (PetscAbsReal(*c) + PetscAbsReal(*s))/PetscAbsReal(PetscAbsReal(*c) - PetscAbsReal(*s));
488:   return(0);
489: }

493: /*
494:                                 |c  s|
495:   Applies an hyperbolic rotator |s  c|
496:            |c  s|
497:     [x1 x2]|s  c| 
498: */
499: PetscErrorCode HRApply(PetscInt n, PetscScalar *x1,PetscInt inc1, PetscScalar *x2, PetscInt inc2,PetscReal c, PetscReal s)
500: {
501:   PetscInt    i;
502:   PetscReal   t;
503:   PetscScalar tmp;
504: 
506:   if (PetscAbsReal(c)>PetscAbsReal(s)) { /* Type I */
507:     t = s/c;
508:     for (i=0;i<n;i++) {
509:       x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
510:       x2[i*inc2] = t*x1[i*inc1] + x2[i*inc2]/c;
511:     }
512:   } else { /* Type II */
513:     t = c/s;
514:     for (i=0;i<n;i++) {
515:       tmp = x1[i*inc1];
516:       x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
517:       x2[i*inc2] = t*x1[i*inc1] + tmp/s;
518:     }
519:   }
520:   return(0);
521: }

525: /*
526:   Reduction to tridiagonal-diagonal form (see F. Tisseur, SIMAX 26(1), 2004).

528:   Input:
529:     A symmetric (only lower triangular part is refered)
530:     s vector +1 and -1 (signature matrix)
531:   Output:
532:     d,e
533:     s
534:     Q s-orthogonal matrix whith Q^T*A*Q = T (symmetric tridiagonal matrix)
535: */
536: static PetscErrorCode TridiagDiag_HHR(PetscInt n,PetscScalar *A,PetscInt lda,PetscReal *s,PetscScalar* Q,PetscInt ldq,PetscBool flip,PetscReal *d,PetscReal *e,PetscInt *perm_,PetscScalar *w,PetscInt lw)
537: {
538: #if defined(PETSC_MISSING_LAPACK_LARFG) || defined(PETSC_MISSING_LAPACK_LARF)
540:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARFG/LARF - Lapack routines are unavailable");
541: #else
543:   PetscInt       i,j,k,*ii,*jj,i0=0,ik=0,tmp,type,*perm,nwall,nwu;
544:   PetscReal      *ss,cond=1.0,cs,sn,r;
545:   PetscScalar    *work,tau,t,*AA;
546:   PetscBLASInt   n0,n1,ni,inc=1,m,n_,lda_,ldq_;
547:   PetscBool      breakdown = PETSC_TRUE;
548: 
550:   if (n<3) {
551:     if (n==1)Q[0]=1;
552:     if (n==2) {Q[0] = Q[1+ldq] = 1; Q[1] = Q[ldq] = 0;}
553:     return(0);
554:   }
555:   lda_ = PetscBLASIntCast(lda);
556:   n_   = PetscBLASIntCast(n);
557:   ldq_ = PetscBLASIntCast(ldq);
558:   nwall = n*n+n;
559:   nwu = 0;
560:   if (!w || lw < nwall) {
561:     PetscMalloc(nwall*sizeof(PetscScalar),&work);
562:   }else work = w;
563:   PetscMalloc(n*sizeof(PetscReal),&ss);
564:   PetscMalloc(n*sizeof(PetscInt),&perm);
565:   AA = work;
566:   for (i=0;i<n;i++) {
567:     PetscMemcpy(AA+i*n,A+i*lda,n*sizeof(PetscScalar));
568:   }
569:   nwu += n*n;
570:   k=0;
571:   while (breakdown && k<n) {
572:     breakdown = PETSC_FALSE;
573:     /* Classify (and flip) A and s according to sign */
574:     if (flip) {
575:       for (i=0;i<n;i++) {
576:         perm[i] = n-1-perm_[i];
577:         if (perm[i]==0) i0 = i;
578:         if (perm[i]==k) ik = i;
579:       }
580:     } else {
581:       for (i=0;i<n;i++) {
582:         perm[i] = perm_[i];
583:         if (perm[i]==0) i0 = i;
584:         if (perm[i]==k) ik = i;
585:       }
586:     }
587:     perm[ik] = 0;
588:     perm[i0] = k;
589:     i=1;
590:     while (i<n-1 && s[perm[i-1]]==s[perm[0]]) {
591:       if (s[perm[i]]!=s[perm[0]]) {
592:         j=i+1;
593:         while (j<n-1 && s[perm[j]]!=s[perm[0]])j++;
594:         tmp = perm[i]; perm[i] = perm[j]; perm[j] = tmp;
595:       }
596:       i++;
597:     }
598:     for (i=0;i<n;i++) {
599:       ss[i] = s[perm[i]];
600:     }
601:     if (flip) { ii = &j; jj = &i;} else { ii = &i; jj = &j;}
602:     for (i=0;i<n;i++)
603:       for (j=0;j<n;j++)
604:         A[i+j*lda] = AA[perm[*ii]+perm[*jj]*n];
605:     /* Initialize Q */
606:     for (i=0;i<n;i++) {
607:       PetscMemzero(Q+i*ldq,n*sizeof(PetscScalar));
608:       Q[perm[i]+i*ldq] = 1.0;
609:     }
610:     for (ni=1;ni<n && ss[ni]==ss[0]; ni++);
611:     n0 = ni-1; n1 = PetscBLASIntCast(n)-ni;
612:     for (j=0;j<n-2;j++) {
613:       m = PetscBLASIntCast(n-j-1);
614:       /* Forming and applying reflectors */
615:       if ( n0 > 1 ) {
616:         LAPACKlarfg_(&n0, A+ni-n0+j*lda, A+ni-n0+j*lda+1,&inc,&tau);
617:         /* Apply reflector */
618:         if ( PetscAbsScalar(tau) != 0.0 ) {
619:           t=*( A+ni-n0+j*lda);  *(A+ni-n0+j*lda)=1.0;
620:           LAPACKlarf_("R",&m,&n0,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu);
621:           LAPACKlarf_("L",&n0,&m,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu);
622:           /* Update Q */
623:           LAPACKlarf_("R",&n_,&n0,A+ni-n0+j*lda,&inc,&tau,Q+(j+1)*ldq,&ldq_,work+nwu);
624:           *(A+ni-n0+j*lda) = t;
625:           for (i=1;i<n0;i++) {
626:             *(A+ni-n0+j*lda+i) = 0.0;  *(A+j+(ni-n0+i)*lda) = 0.0;
627:           }
628:           *(A+j+(ni-n0)*lda) = *(A+ni-n0+j*lda);
629:         }
630:       }
631:       if ( n1 > 1 ) {
632:         LAPACKlarfg_(&n1, A+n-n1+j*lda, A+n-n1+j*lda+1,&inc,&tau);
633:         /* Apply reflector */
634:         if ( PetscAbsScalar(tau) != 0.0 ) {
635:           t=*( A+n-n1+j*lda);  *(A+n-n1+j*lda)=1.0;
636:           LAPACKlarf_("R",&m,&n1,A+n-n1+j*lda,&inc,&tau,A+j+1+(n-n1)*lda,&lda_,work+nwu);
637:           LAPACKlarf_("L",&n1,&m,A+n-n1+j*lda,&inc,&tau,A+n-n1+(j+1)*lda,&lda_,work+nwu);
638:           /* Update Q */
639:           LAPACKlarf_("R",&n_,&n1,A+n-n1+j*lda,&inc,&tau,Q+(n-n1)*ldq,&ldq_,work+nwu);
640:           *(A+n-n1+j*lda) = t;
641:           for (i=1;i<n1;i++) {
642:             *(A+n-n1+i+j*lda) = 0.0;  *(A+j+(n-n1+i)*lda) = 0.0;
643:           }
644:           *(A+j+(n-n1)*lda) = *(A+n-n1+j*lda);
645:         }
646:       }
647:       /* Hyperbolic rotation */
648:       if ( n0 > 0 && n1 > 0) {
649:         HRGen(PetscRealPart(A[ni-n0+j*lda]),PetscRealPart(A[n-n1+j*lda]),&type,&cs,&sn,&r,&cond);
650:         /* Check condition number */
651:         if (cond > 1.0/(10*PETSC_SQRT_MACHINE_EPSILON)) {
652:           breakdown = PETSC_TRUE;
653:           k++;
654:           if (k==n || flip)
655:             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Breakdown in construction of hyperbolic transformation");
656:           break;
657:         }
658:         A[ni-n0+j*lda] = r; A[n-n1+j*lda] = 0.0;
659:         A[j+(ni-n0)*lda] = r; A[j+(n-n1)*lda] = 0.0;
660:         /* Apply to A */
661:         HRApply(m, A+j+1+(ni-n0)*lda,1, A+j+1+(n-n1)*lda,1, cs, -sn);
662:         HRApply(m, A+ni-n0+(j+1)*lda,lda, A+n-n1+(j+1)*lda,lda, cs, -sn);
663: 
664:         /* Update Q */
665:         HRApply(n, Q+(ni-n0)*ldq,1, Q+(n-n1)*ldq,1, cs, -sn);
666:         if (type==2) {
667:           ss[ni-n0] = -ss[ni-n0]; ss[n-n1] = -ss[n-n1];
668:           n0++;ni++;n1--;
669:         }
670:       }
671:       if (n0>0) n0--;else n1--;
672:     }
673:   }

675: /* flip matrices */
676:     if (flip) {
677:       for (i=0;i<n-1;i++) {
678:         d[i] = PetscRealPart(A[n-i-1+(n-i-1)*lda]);
679:         e[i] = PetscRealPart(A[n-i-1+(n-i-2)*lda]);
680:         s[i] = ss[n-i-1];
681:       }
682:       s[n-1] = ss[0];
683:       d[n-1] = PetscRealPart(A[0]);
684:       for (i=0;i<n;i++) {
685:         ierr=PetscMemcpy(work+i*n,Q+i*ldq,n*sizeof(PetscScalar));
686:       }
687:       for (i=0;i<n;i++)
688:         for (j=0;j<n;j++)
689:           Q[i+j*ldq] = work[i+(n-j-1)*n];
690:     } else {
691:       for (i=0;i<n-1;i++) {
692:         d[i] = PetscRealPart(A[i+i*lda]);
693:         e[i] = PetscRealPart(A[i+1+i*lda]);
694:         s[i] = ss[i];
695:       }
696:       s[n-1] = ss[n-1];
697:       d[n-1] = PetscRealPart(A[n-1 + (n-1)*lda]);
698:     }

700:   PetscFree(ss);
701:   PetscFree(perm);
702:   return(0);
703: #endif
704: }

708: /*
709:   compute x = x - y*ss^{-1}*y^T*s*x where ss=y^T*s*y
710:   s diagonal (signature matrix)
711: */
712: static PetscErrorCode IndefOrthog(PetscReal *s, PetscScalar *y, PetscReal ss, PetscScalar *x, PetscScalar *h,PetscInt n)
713: {
714:   PetscInt    i;
715:   PetscScalar h_,r;

718:   if (y) {
719:     h_ = 0.0; /* h_=(y^Tdiag(s)*y)^{-1}*y^T*diag(s)*x*/
720:     for (i=0;i<n;i++) { h_+=y[i]*s[i]*x[i];}
721:     h_ /= ss;
722:     for (i=0;i<n;i++) {x[i] -= h_*y[i];} /* x = x-h_*y */
723:     /* repeat */
724:     r = 0.0;
725:     for (i=0;i<n;i++) { r+=y[i]*s[i]*x[i];}
726:     r /= ss;
727:     for (i=0;i<n;i++) {x[i] -= r*y[i];}
728:     h_ += r;
729:   }else h_ = 0.0;
730:   if (h) *h = h_;
731:   return(0);
732: }

736: /* 
737:    normalization with a indefinite norm
738: */
739: static PetscErrorCode IndefNorm(PetscReal *s,PetscScalar *x, PetscReal *norm,PetscInt n)
740: {
741:   PetscInt  i;
742:   PetscReal norm_;

745:   /* s-normalization */
746:   norm_ = 0.0;
747:   for (i=0;i<n;i++) {norm_ += PetscRealPart(x[i]*s[i]*x[i]);}
748:   if (norm_<0) {norm_ = -PetscSqrtReal(-norm_);}
749:   else {norm_ = PetscSqrtReal(norm_);}
750:   for (i=0;i<n;i++)x[i] /= norm_;
751:   if (norm) *norm = norm_;
752:   return(0);
753: }

757: static PetscErrorCode DSEigenVectorsPseudoOrthog(DS ds, DSMatType mat, PetscScalar *wr, PetscScalar *wi,PetscBool accum)
758: {
760:   PetscInt       i,j,k,off;
761:   PetscBLASInt   ld,n1,one=1;
762:   PetscScalar    PQ[4],xx,yx,xy,yy,*y,h,oneS=1.0,zeroS=0.0,*X,*W,*B;
763:   PetscReal      *ss,*s,*d,*e,d1,d2,toldeg=PETSC_SQRT_MACHINE_EPSILON*100,vi,vj;

766:   ld = PetscBLASIntCast(ds->ld);
767:   n1 = PetscBLASIntCast(ds->n - ds->l);
768:   DSAllocateWork_Private(ds,ld*ld+2*ld,ld,2*ld);
769:   s = ds->rmat[DS_MAT_D];
770:   d = ds->rmat[DS_MAT_T];
771:   e = d + ld;
772:   off = ds->l+ds->l*ld;
773:   if (!ds->compact) {
774:     B = ds->mat[DS_MAT_B];
775:     for (i=ds->l;i<ds->n;i++) {
776:       s[i] = PetscRealPart(B[i+i*ld]);
777:     }
778:   }

780:   /* compute real s-orthonormal base */
781:   X = ds->mat[mat];
782:   ss = ds->rwork;
783:   y = ds->work;

785: #if defined(PETSC_USE_COMPLEX)
786:   /* with complex scalars we need to operate as in real scalar */
787:   for (i=ds->l;i<ds->n;i++) {
788:     if (PetscImaginaryPart(wr[i])!=0.0) {
789:       for (j=ds->l;j<ds->n;j++) {
790:         X[j+(i+1)*ld] = PetscImaginaryPart(X[j+i*ld]);
791:         X[j+i*ld] = PetscRealPart(X[j+i*ld]);
792:       }
793:       i++;
794:     }
795:   }
796: #endif

798:   for (i=ds->l;i<ds->n;i++) {
799: #if defined(PETSC_USE_COMPLEX)
800:     vi = PetscImaginaryPart(wr[i]);
801: #else
802:     vi = PetscRealPart(wi[i]);
803: #endif
804:     if (vi==0.0) { /* real */
805:       for (j=ds->l;j<i;j++) {
806: #if defined(PETSC_USE_COMPLEX)
807:         vj = PetscImaginaryPart(wr[j]);
808: #else
809:         vj = PetscRealPart(wi[j]);
810: #endif
811:          /* s-orthogonalization with close eigenvalues */
812:         if (vj==0.0) {
813:           if ( PetscAbsScalar(wr[j]-wr[i])<toldeg) {
814:             IndefOrthog(s+ds->l, X+j*ld+ds->l, ss[j],X+i*ld+ds->l, PETSC_NULL,n1);
815:           }
816:         }else j++;
817:       }
818:       IndefNorm(s+ds->l,X+i*ld+ds->l,&d1,n1);
819:       ss[i] = (d1<0.0)?-1:1;
820:       d[i] = PetscRealPart(wr[i]*ss[i]); e[i] = 0.0;
821:     } else { /* complex value */
822:       for (j=ds->l;j<i;j++) {
823:         /* s-orthogonalization of Xi and Xi+1*/
824: #if defined(PETSC_USE_COMPLEX)
825:         vj = PetscImaginaryPart(wr[j]);
826: #else
827:         vj = PetscRealPart(wi[j]);
828: #endif
829:         if (vj!=0.0) {
830:           if (PetscAbsScalar(wr[j]-wr[i])<toldeg && PetscAbsScalar(PetscAbsReal(vj)-PetscAbsReal(vi))<toldeg) {
831:             for (k=ds->l;k<ds->n;k++) y[k] = s[k]*X[k+i*ld];
832:             xx = BLASdot_(&n1,X+ds->l+j*ld,&one,y+ds->l,&one);
833:             yx = BLASdot_(&n1,X+ds->l+(j+1)*ld,&one,y+ds->l,&one);
834:             for (k=ds->l;k<ds->n;k++) y[k] = s[k]*X[k+(i+1)*ld];
835:             xy = BLASdot_(&n1,X+ds->l+j*ld,&one,y+ds->l,&one);
836:             yy = BLASdot_(&n1,X+ds->l+(j+1)*ld,&one,y+ds->l,&one);
837:             PQ[0] = ss[j]*xx; PQ[1] = ss[j+1]*yx; PQ[2] = ss[j]*xy; PQ[3] = ss[j+1]*yy;
838:             for (k=ds->l;k<ds->n;k++) {
839:               X[k+i*ld] -= PQ[0]*X[k+j*ld]+PQ[1]*X[k+(j+1)*ld];
840:               X[k+(i+1)*ld] -= PQ[2]*X[k+j*ld]+PQ[3]*X[k+(j+1)*ld];
841:             }
842:             /* Repeat */
843:             for (k=ds->l;k<ds->n;k++) y[k] = s[k]*X[k+i*ld];
844:             xx = BLASdot_(&n1,X+ds->l+j*ld,&one,y+ds->l,&one);
845:             yx = BLASdot_(&n1,X+ds->l+(j+1)*ld,&one,y+ds->l,&one);
846:             for (k=ds->l;k<ds->n;k++) y[k] = s[k]*X[k+(i+1)*ld];
847:             xy = BLASdot_(&n1,X+ds->l+j*ld,&one,y+ds->l,&one);
848:             yy = BLASdot_(&n1,X+ds->l+(j+1)*ld,&one,y+ds->l,&one);
849:             PQ[0] = ss[j]*xx; PQ[1] = ss[j+1]*yx; PQ[2] = ss[j]*xy; PQ[3] = ss[j+1]*yy;
850:             for (k=ds->l;k<ds->n;k++) {
851:               X[k+i*ld] -= PQ[0]*X[k+j*ld]+PQ[1]*X[k+(j+1)*ld];
852:               X[k+(i+1)*ld] -= PQ[2]*X[k+j*ld]+PQ[3]*X[k+(j+1)*ld];
853:             }
854:           }
855:           j++;
856:         }
857:       }
858:       IndefNorm(s+ds->l,X+i*ld+ds->l,&d1,n1);
859:       ss[i] = (d1<0)?-1:1;
860:       IndefOrthog(s+ds->l, X+i*ld+ds->l, ss[i],X+(i+1)*ld+ds->l, &h,n1);
861:       IndefNorm(s+ds->l,X+(i+1)*ld+ds->l,&d2,n1);
862:       ss[i+1] = (d2<0)?-1:1;
863:       d[i] = (PetscRealPart(wr[i]-vi*h/d1))*ss[i];
864:       d[i+1] = (PetscRealPart(wr[i]+vi*h/d1))*ss[i+1];
865:       e[i] = vi*d2/d1*ss[i]; e[i+1] = 0.0;
866:       i++;
867:     }
868:   }
869:   for (i=ds->l;i<ds->n;i++) s[i] = ss[i];
870:   /* accumulate previous Q */
871:   if (accum && mat!=DS_MAT_Q) {
872:     DSAllocateMat_Private(ds,DS_MAT_W);
873:     W = ds->mat[DS_MAT_W];
874:     DSCopyMatrix_Private(ds,DS_MAT_W,DS_MAT_Q);
875:     BLASgemm_("N","N",&n1,&n1,&n1,&oneS,W+off,&ld,ds->mat[DS_MAT_X]+off,&ld,&zeroS,ds->mat[DS_MAT_Q]+off,&ld);
876:   }
877:   if (!ds->compact) {DSSwitchFormat_GHIEP(ds,PETSC_FALSE);}
878:   return(0);
879: }

883: /*
884:   Get eigenvectors with inverse iteration.
885:   The system matrix is in Hessenberg form.
886: */
887: PetscErrorCode DSGHIEPPseudoOrthogInverseIteration(DS ds,PetscScalar *wr,PetscScalar *wi)
888: {
889: #if defined(PETSC_MISSING_LAPACK_HSEIN)
891:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"HSEIN - Lapack routine is unavailable");
892: #else
894:   PetscInt       i,off;
895:   PetscBLASInt   *select,*infoC,ld,n1,mout,info;
896:   PetscScalar    *A,*B,*H,*X;
897:   PetscReal      *s,*d,*e;

900:   ld = PetscBLASIntCast(ds->ld);
901:   n1 = PetscBLASIntCast(ds->n - ds->l);
902:   DSAllocateWork_Private(ds,ld*ld+2*ld,ld,2*ld);
903:   DSAllocateMat_Private(ds,DS_MAT_W);
904:   A = ds->mat[DS_MAT_A];
905:   B = ds->mat[DS_MAT_B];
906:   H = ds->mat[DS_MAT_W];
907:   s = ds->rmat[DS_MAT_D];
908:   d = ds->rmat[DS_MAT_T];
909:   e = d + ld;
910:   select = ds->iwork;
911:   infoC = ds->iwork + ld;
912:   off = ds->l+ds->l*ld;
913:   if (ds->compact) {
914:     H[off] = d[ds->l]*s[ds->l];
915:     H[off+ld] = e[ds->l]*s[ds->l];
916:     for (i=ds->l+1;i<ds->n-1;i++) {
917:       H[i+(i-1)*ld] = e[i-1]*s[i];
918:       H[i+i*ld] = d[i]*s[i];
919:       H[i+(i+1)*ld] = e[i]*s[i];
920:     }
921:     H[ds->n-1+(ds->n-2)*ld] = e[ds->n-2]*s[ds->n-1];
922:     H[ds->n-1+(ds->n-1)*ld] = d[ds->n-1]*s[ds->n-1];
923:   } else {
924:     s[ds->l] = PetscRealPart(B[off]);
925:     H[off] = A[off]*s[ds->l];
926:     H[off+ld] = A[off+ld]*s[ds->l];
927:     for (i=ds->l+1;i<ds->n-1;i++) {
928:       s[i] = PetscRealPart(B[i+i*ld]);
929:       H[i+(i-1)*ld] = A[i+(i-1)*ld]*s[i];
930:       H[i+i*ld]     = A[i+i*ld]*s[i];
931:       H[i+(i+1)*ld] = A[i+(i+1)*ld]*s[i];
932:     }
933:     s[ds->n-1] = PetscRealPart(B[ds->n-1+(ds->n-1)*ld]);
934:     H[ds->n-1+(ds->n-2)*ld] = A[ds->n-1+(ds->n-2)*ld]*s[ds->n-1];
935:     H[ds->n-1+(ds->n-1)*ld] = A[ds->n-1+(ds->n-1)*ld]*s[ds->n-1];
936:   }
937:   DSAllocateMat_Private(ds,DS_MAT_X);
938:   X = ds->mat[DS_MAT_X];
939:   for (i=0;i<n1;i++)select[i]=1;
940: #if !defined(PETSC_USE_COMPLEX)
941:   LAPACKhsein_("R","N","N",select,&n1,H+off,&ld,wr+ds->l,wi+ds->l,PETSC_NULL,&ld,X+off,&ld,&n1,&mout,ds->work,PETSC_NULL,infoC,&info);
942: #else
943:   LAPACKhsein_("R","N","N",select,&n1,H+off,&ld,wr+ds->l,PETSC_NULL,&ld,X+off,&ld,&n1,&mout,ds->work,ds->rwork,PETSC_NULL,infoC,&info);
944: #endif
945:   if (info<0)SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in hsein routine %d",-i);
946:   if (info>0) {
947:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Convergence error in hsein routine %d",i);
948:   }

950:   DSEigenVectorsPseudoOrthog(ds, DS_MAT_X, wr, wi,PETSC_TRUE);
951:   return(0);
952: #endif
953: }

957: /*
958:    Reduce to tridiagonal-diagonal pair by means of TridiagDiag_HHR.
959: */
960: PetscErrorCode DSIntermediate_GHIEP(DS ds)
961: {
963:   PetscInt       i,ld,off;
964:   PetscScalar    *A,*B,*Q;
965:   PetscReal      *d,*e,*s;

968:   ld = ds->ld;
969:   A = ds->mat[DS_MAT_A];
970:   B = ds->mat[DS_MAT_B];
971:   Q = ds->mat[DS_MAT_Q];
972:   d = ds->rmat[DS_MAT_T];
973:   e = ds->rmat[DS_MAT_T]+ld;
974:   s = ds->rmat[DS_MAT_D];
975:   off = ds->l+ds->l*ld;
976:   PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
977:   DSAllocateWork_Private(ds,ld*ld,0,0);

979:   for (i=0;i<ds->n;i++) Q[i+i*ld]=1.0;
980:   for (i=0;i<ds->n-ds->l;i++) *(ds->perm+i)=i;
981:   if (ds->compact) {
982:     if (ds->state < DS_STATE_INTERMEDIATE) {
983:       DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
984:       TridiagDiag_HHR(ds->k-ds->l+1,A+off,ld,s+ds->l,Q+off,ld,PETSC_TRUE,d+ds->l,e+ds->l,ds->perm,ds->work,ld*ld);
985:       ds->k = ds->l;
986:       PetscMemzero(d+2*ld+ds->l,(ds->n-ds->l)*sizeof(PetscReal));
987:     }
988:   } else {
989:     if (ds->state < DS_STATE_INTERMEDIATE) {
990:       for (i=0;i<ds->n;i++)
991:         s[i] = PetscRealPart(B[i+i*ld]);
992:       TridiagDiag_HHR(ds->n-ds->l,A+off,ld,s+ds->l,Q+off,ld,PETSC_FALSE,d+ds->l,e+ds->l,ds->perm,ds->work,ld*ld);
993:       PetscMemzero(d+2*ld,(ds->n)*sizeof(PetscReal));
994:       ds->k = ds->l;
995:       DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
996:     } else { DSSwitchFormat_GHIEP(ds,PETSC_TRUE); }
997:   }
998:   return(0);
999: }

1003: /*
1004:    Undo 2x2 blocks that have real eigenvalues.
1005: */
1006: PetscErrorCode DSGHIEPRealBlocks(DS ds)
1007: {
1009:   PetscInt       i;
1010:   PetscReal      e,d1,d2,s1,s2,ss1,ss2,t,dd,ss;
1011:   PetscReal      maxy,ep,scal1,scal2,snorm;
1012:   PetscReal      *T,*D,b[4],M[4],wr1,wr2,wi;
1013:   PetscScalar    *A,*B,Y[4],oneS = 1.0,zeroS = 0.0;
1014:   PetscBLASInt   m,two=2,ld;
1015:   PetscBool      isreal;

1018:   ld = PetscBLASIntCast(ds->ld);
1019:   m = PetscBLASIntCast(ds->n-ds->l);
1020:   A = ds->mat[DS_MAT_A];
1021:   B = ds->mat[DS_MAT_B];
1022:   T = ds->rmat[DS_MAT_T];
1023:   D = ds->rmat[DS_MAT_D];
1024:   DSAllocateWork_Private(ds,2*m,0,0);
1025:   for (i=ds->l;i<ds->n-1;i++) {
1026:     e = (ds->compact)?T[ld+i]:PetscRealPart(A[(i+1)+ld*i]);
1027:     if (e != 0.0) { /* 2x2 block */
1028:       if (ds->compact) {
1029:         s1 = D[i];
1030:         d1 = T[i];
1031:         s2 = D[i+1];
1032:         d2 = T[i+1];
1033:       } else {
1034:         s1 = PetscRealPart(B[i*ld+i]);
1035:         d1 = PetscRealPart(A[i*ld+i]);
1036:         s2 = PetscRealPart(B[(i+1)*ld+i+1]);
1037:         d2 = PetscRealPart(A[(i+1)*ld+i+1]);
1038:       }
1039:       isreal = PETSC_FALSE;
1040:       if (s1==s2) { /* apply a Jacobi rotation to compute the eigendecomposition */
1041:         dd = d1-d2;
1042:         if (2*PetscAbsReal(e) <= dd) {
1043:           t = 2*e/dd;
1044:           t = t/(1 + PetscSqrtReal(1+t*t));
1045:         } else {
1046:           t = dd/(2*e);
1047:           ss = (t>=0)?1.0:-1.0;
1048:           t = ss/(PetscAbsReal(t)+PetscSqrtReal(1+t*t));
1049:         }
1050:         Y[0] = 1/PetscSqrtReal(1 + t*t); Y[3] = Y[0]; /* c */
1051:         Y[1] = Y[0]*t; Y[2] = -Y[1]; /* s */
1052:         wr1 = d1+t*e;
1053:         wr2 = d2-t*e;
1054:         ss1 = s1; ss2 = s2;
1055:         isreal = PETSC_TRUE;
1056:       } else {
1057:         ss1 = 1.0; ss2 = 1.0,
1058:         M[0] = d1; M[1] = e; M[2] = e; M[3]= d2;
1059:         b[0] = s1; b[1] = 0.0; b[2] = 0.0; b[3] = s2;
1060:         ep = LAPACKlamch_("S");
1061:         /* Compute eigenvalues of the block */
1062:         LAPACKlag2_(M, &two, b, &two,&ep , &scal1, &scal2, &wr1, &wr2, &wi);
1063:         if (wi==0.0) { /* Real eigenvalues */
1064:           isreal = PETSC_TRUE;
1065:           if (scal1<ep||scal2<ep) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FP,"Nearly infinite eigenvalue");
1066:           wr1 /= scal1; wr2 /= scal2;
1067:           if ( PetscAbsReal(s1*d1-wr1)<PetscAbsReal(s2*d2-wr1)) { Y[0] = wr1-s2*d2; Y[1] =s2*e;}
1068:           else{ Y[0] = s1*e; Y[1] = wr1-s1*d1; }
1069:           /* normalize with a signature*/
1070:           maxy = PetscMax(PetscAbsScalar(Y[0]),PetscAbsScalar(Y[1]));
1071:           scal1 = PetscRealPart(Y[0])/maxy; scal2 = PetscRealPart(Y[1])/maxy;
1072:           snorm = scal1*scal1*s1 + scal2*scal2*s2;
1073:           if (snorm<0) {ss1 = -1.0; snorm = -snorm;}
1074:           snorm = maxy*PetscSqrtReal(snorm); Y[0] = Y[0]/snorm; Y[1] = Y[1]/snorm;
1075:           if ( PetscAbsReal(s1*d1-wr2)<PetscAbsReal(s2*d2-wr2)) { Y[2] = wr2-s2*d2; Y[3] =s2*e;}
1076:           else{ Y[2] = s1*e; Y[3] = wr2-s1*d1; }
1077:           maxy = PetscMax(PetscAbsScalar(Y[2]),PetscAbsScalar(Y[3]));
1078:           scal1 = PetscRealPart(Y[2])/maxy; scal2 = PetscRealPart(Y[3])/maxy;
1079:           snorm = scal1*scal1*s1 + scal2*scal2*s2;
1080:           if (snorm<0) {ss2 = -1.0; snorm = -snorm;}
1081:           snorm = maxy*PetscSqrtReal(snorm);Y[2] = Y[2]/snorm; Y[3] = Y[3]/snorm;
1082:         }
1083:         wr1 *= ss1; wr2 *= ss2;
1084:       }
1085:       if (isreal) {
1086:         if (ds->compact) {
1087:           D[i] = ss1;;
1088:           T[i] = wr1;
1089:           D[i+1] = ss2;
1090:           T[i+1] = wr2;
1091:           T[ld+i] = 0.0;
1092:         }else {
1093:           B[i*ld+i] = ss1;
1094:           A[i*ld+i] = wr1;
1095:           B[(i+1)*ld+i+1] = ss2;
1096:           A[(i+1)*ld+i+1] = wr2;
1097:           A[(i+1)+ld*i] = 0.0;
1098:           A[i+ld*(i+1)] = 0.0;
1099:         }
1100:         BLASgemm_("N","N",&m,&two,&two,&oneS,ds->mat[DS_MAT_Q]+ds->l+i*ld,&ld,Y,&two,&zeroS,ds->work,&m);
1101:         PetscMemcpy(ds->mat[DS_MAT_Q]+ds->l+i*ld,ds->work,m*sizeof(PetscScalar));
1102:         PetscMemcpy(ds->mat[DS_MAT_Q]+ds->l+(i+1)*ld,ds->work+m,m*sizeof(PetscScalar));
1103:       }
1104:       i++;
1105:     }
1106:   }
1107:   return(0);
1108: }

1112: PetscErrorCode DSSolve_GHIEP_QR_II(DS ds,PetscScalar *wr,PetscScalar *wi)
1113: {
1114: #if defined(PETSC_MISSING_LAPACK_HSEQR)
1116:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"HSEQR - Lapack routine is unavailable");
1117: #else
1119:   PetscInt       i,off;
1120:   PetscBLASInt   n1,ld,one,info,lwork;
1121:   PetscScalar    *H,*A,*B,*Q,*work;
1122:   PetscReal      *d,*e,*s;

1125: #if !defined(PETSC_USE_COMPLEX)
1127: #endif
1128:   one = 1;
1129:   n1 = PetscBLASIntCast(ds->n - ds->l);
1130:   ld = PetscBLASIntCast(ds->ld);
1131:   off = ds->l + ds->l*ld;
1132:   A = ds->mat[DS_MAT_A];
1133:   B = ds->mat[DS_MAT_B];
1134:   Q = ds->mat[DS_MAT_Q];
1135:   d = ds->rmat[DS_MAT_T];
1136:   e = ds->rmat[DS_MAT_T] + ld;
1137:   s = ds->rmat[DS_MAT_D];
1138:   DSAllocateWork_Private(ds,ld*ld,2*ld,ld*2);
1139:   work = ds->work;
1140:   lwork = ld*ld;

1142:   /* Quick return if possible */
1143:   if (n1 == 1) {
1144:     *(Q+off) = 1;
1145:     if (!ds->compact) {
1146:       d[ds->l] = PetscRealPart(A[off]);
1147:       s[ds->l] = PetscRealPart(B[off]);
1148:     }
1149:     wr[ds->l] = d[ds->l]/s[ds->l];
1150:     if (wi) wi[ds->l] = 0.0;
1151:     return(0);
1152:   }
1153:   /* Reduce to pseudotriadiagonal form */
1154:   DSIntermediate_GHIEP( ds);

1156:   /* Compute Eigenvalues (QR)*/
1157:   DSAllocateMat_Private(ds,DS_MAT_W);
1158:   H = ds->mat[DS_MAT_W];
1159:   if (ds->compact) {
1160:     H[off] = d[ds->l]*s[ds->l];
1161:     H[off+ld] = e[ds->l]*s[ds->l];
1162:     for (i=ds->l+1;i<ds->n-1;i++) {
1163:       H[i+(i-1)*ld] = e[i-1]*s[i];
1164:       H[i+i*ld]     = d[i]*s[i];
1165:       H[i+(i+1)*ld] = e[i]*s[i];
1166:     }
1167:     H[ds->n-1+(ds->n-2)*ld] = e[ds->n-2]*s[ds->n-1];
1168:     H[ds->n-1+(ds->n-1)*ld] = d[ds->n-1]*s[ds->n-1];
1169:   } else {
1170:     s[ds->l] = PetscRealPart(B[off]);
1171:     H[off] = A[off]*s[ds->l];
1172:     H[off+ld] = A[off+ld]*s[ds->l];
1173:     for (i=ds->l+1;i<ds->n-1;i++) {
1174:       s[i] = PetscRealPart(B[i+i*ld]);
1175:       H[i+(i-1)*ld] = A[i+(i-1)*ld]*s[i];
1176:       H[i+i*ld]     = A[i+i*ld]*s[i];
1177:       H[i+(i+1)*ld] = A[i+(i+1)*ld]*s[i];
1178:     }
1179:     s[ds->n-1] = PetscRealPart(B[ds->n-1+(ds->n-1)*ld]);
1180:     H[ds->n-1+(ds->n-2)*ld] = A[ds->n-1+(ds->n-2)*ld]*s[ds->n-1];
1181:     H[ds->n-1+(ds->n-1)*ld] = A[ds->n-1+(ds->n-1)*ld]*s[ds->n-1];
1182:   }

1184: #if !defined(PETSC_USE_COMPLEX)
1185:   LAPACKhseqr_("E","N",&n1,&one,&n1,H+off,&ld,wr+ds->l,wi+ds->l,PETSC_NULL,&ld,work,&lwork,&info);
1186: #else
1187:   LAPACKhseqr_("E","N",&n1,&one,&n1,H+off,&ld,wr+ds->l,PETSC_NULL,&ld,work,&lwork,&info);
1188: #endif
1189:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xHSEQR %d",&info);

1191:   /* Compute Eigenvectors with Inverse Iteration */
1192:   DSGHIEPPseudoOrthogInverseIteration(ds,wr,wi);

1194:   /* Recover eigenvalues from diagonal */
1195:   DSGHIEPComplexEigs(ds, 0, ds->l, wr, wi);
1196: #if defined(PETSC_USE_COMPLEX)
1197:   if (wi) {
1198:     for (i=ds->l;i<ds->n;i++) wi[i] = 0.0;
1199:   }
1200: #endif
1201:   return(0);
1202: #endif
1203: }

1207: PetscErrorCode DSSolve_GHIEP_QR(DS ds,PetscScalar *wr,PetscScalar *wi)
1208: {
1209: #if defined(SLEPC_MISSING_LAPACK_GEHRD) || defined(SLEPC_MISSING_LAPACK_ORGHR) || defined(PETSC_MISSING_LAPACK_HSEQR)
1211:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEHRD/ORGHR/HSEQR - Lapack routines are unavailable");
1212: #else
1214:   PetscInt       i,j,off;
1215:   PetscBLASInt   lwork,info,n1,one=1,mout,ld;
1216:   PetscScalar    *A,*B,*H,*Q,*work,*tau;
1217:   PetscReal      *d,*e,*s;

1220: #if !defined(PETSC_USE_COMPLEX)
1222: #endif
1223:   n1 = PetscBLASIntCast(ds->n - ds->l);
1224:   ld = PetscBLASIntCast(ds->ld);
1225:   off = ds->l + ds->l*ld;
1226:   A = ds->mat[DS_MAT_A];
1227:   B = ds->mat[DS_MAT_B];
1228:   Q = ds->mat[DS_MAT_Q];
1229:   d = ds->rmat[DS_MAT_T];
1230:   e = ds->rmat[DS_MAT_T] + ld;
1231:   s = ds->rmat[DS_MAT_D];
1232:   DSAllocateMat_Private(ds,DS_MAT_W);
1233:   H = ds->mat[DS_MAT_W];
1234:   DSAllocateWork_Private(ds,ld+ld*ld,ld,0);
1235:   tau  = ds->work;
1236:   work = ds->work+ld;
1237:   lwork = ld*ld;

1239:    /* initialize orthogonal matrix */
1240:   PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
1241:   for (i=0;i< ds->n;i++)
1242:     Q[i+i*ld] = 1.0;
1243:   /* quick return */
1244:   if (n1 == 1) {
1245:     if (!ds->compact) {
1246:       d[ds->l] = PetscRealPart(A[off]);
1247:       s[ds->l] = PetscRealPart(B[off]);
1248:     }
1249:     wr[ds->l] = d[ds->l]/s[ds->l];
1250:     if (wi) wi[ds->l] = 0.0;
1251:     return(0);
1252:   }

1254:   /* form standard problem in H */
1255:   if (ds->compact) {
1256:     PetscMemzero(H,ld*ld*sizeof(PetscScalar));
1257:     for (i=ds->l; i < ds->n-1; i++) {
1258:       H[i+i*ld] = d[i]/s[i];
1259:       H[(i+1)+i*ld] = e[i]/s[i+1];
1260:       H[i+(i+1)*ld] = e[i]/s[i];
1261:     }
1262:     H[ds->n-1 + (ds->n-1)*ld] = d[ds->n-1]/s[ds->n-1];

1264:     for (i=ds->l; i < ds->k; i++) {
1265:       H[ds->k+i*ld] = *(ds->rmat[DS_MAT_T]+2*ld+i)/s[ds->k];
1266:       H[i+ds->k*ld] = *(ds->rmat[DS_MAT_T]+2*ld+i)/s[i];
1267:     }
1268:   } else {
1269:     for (j=ds->l; j<ds->n; j++) {
1270:       for (i=ds->l; i<ds->n; i++) {
1271:         H[i+j*ld] = A[i+j*ld]/B[i+i*ld];
1272:       }
1273:     }
1274:   }
1275:   /* reduce to upper Hessenberg form */
1276:   if (ds->state<DS_STATE_INTERMEDIATE) {
1277:     LAPACKgehrd_(&n1,&one,&n1,H+off,&ld,tau,work,&lwork,&info);
1278:     if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEHRD %d",&info);
1279:     for (j=ds->l;j<ds->n-1;j++) {
1280:       for (i=j+2;i<ds->n;i++) {
1281:         Q[i+j*ld] = H[i+j*ld];
1282:         H[i+j*ld] = 0.0;
1283:       }
1284:     }
1285:     LAPACKorghr_(&n1,&one,&n1,Q+off,&ld,tau,work,&lwork,&info);
1286:     if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGHR %d",&info);
1287:   }

1289:   /* Compute the real Schur form */
1290: #if !defined(PETSC_USE_COMPLEX)
1291:   LAPACKhseqr_("S","V",&n1,&one,&n1,H+off,&ld,wr+ds->l,wi+ds->l,Q+off,&ld,work,&lwork,&info);
1292: #else
1293:   LAPACKhseqr_("S","V",&n1,&one,&n1,H+off,&ld,wr+ds->l,Q+off,&ld,work,&lwork,&info);
1294: #endif
1295:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xHSEQR %d",&info);
1296: 
1297:   /* Compute eigenvectors */
1298: #if !defined(PETSC_USE_COMPLEX)
1299:   LAPACKtrevc_("R","B",PETSC_NULL,&n1,H+off,&ld,PETSC_NULL,&ld,Q+off,&ld,&n1,&mout,ds->work,&info);
1300: #else
1301:   LAPACKtrevc_("R","B",PETSC_NULL,&n1,H+off,&ld,PETSC_NULL,&ld,Q+off,&ld,&n1,&mout,work,ds->rwork,&info);
1302: #endif
1303:   if (info) SETERRQ1(((PetscObject)ds)->comm,PETSC_ERR_LIB,"Error in Lapack xTREVC %i",&info);

1305:   /* Compute real s-orthonormal basis */
1306:   DSEigenVectorsPseudoOrthog(ds, DS_MAT_Q, wr, wi,PETSC_FALSE);

1308:   /* Undo from diagonal the blocks whith real eigenvalues*/
1309:   DSGHIEPRealBlocks(ds);

1311:   /* Recover eigenvalues from diagonal */
1312:   DSGHIEPComplexEigs(ds, 0, ds->l, wr, wi);
1313: #if defined(PETSC_USE_COMPLEX)
1314:   if (wi) {
1315:     for (i=ds->l;i<ds->n;i++) wi[i] = 0.0;
1316:   }
1317: #endif
1318:   return(0);
1319: #endif
1320: }

1324: PetscErrorCode DSNormalize_GHIEP(DS ds,DSMatType mat,PetscInt col)
1325: {
1327:   PetscInt       i,i0,i1;
1328:   PetscBLASInt   ld,n,one = 1;
1329:   PetscScalar    *A = ds->mat[DS_MAT_A],norm,*x;
1330: #if !defined(PETSC_USE_COMPLEX)
1331:   PetscScalar    norm0;
1332: #endif

1335:   switch (mat) {
1336:     case DS_MAT_X:
1337:     case DS_MAT_Y:
1338:     case DS_MAT_Q:
1339:       /* Supported matrices */
1340:       break;
1341:     case DS_MAT_U:
1342:     case DS_MAT_VT:
1343:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
1344:       break;
1345:     default:
1346:       SETERRQ(((PetscObject)ds)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
1347:   }

1349:   n  = PetscBLASIntCast(ds->n);
1350:   ld = PetscBLASIntCast(ds->ld);
1351:   DSGetArray(ds,mat,&x);
1352:   if (col < 0) {
1353:     i0 = 0; i1 = ds->n;
1354:   } else if (col>0 && A[ds->ld*(col-1)+col] != 0.0) {
1355:     i0 = col-1; i1 = col+1;
1356:   } else {
1357:     i0 = col; i1 = col+1;
1358:   }
1359:   for (i=i0; i<i1; i++) {
1360: #if !defined(PETSC_USE_COMPLEX)
1361:     if (i<n-1 && A[ds->ld*i+i+1] != 0.0) {
1362:       norm = BLASnrm2_(&n,&x[ld*i],&one);
1363:       norm0 = BLASnrm2_(&n,&x[ld*(i+1)],&one);
1364:       norm = 1.0/SlepcAbsEigenvalue(norm,norm0);
1365:       BLASscal_(&n,&norm,&x[ld*i],&one);
1366:       BLASscal_(&n,&norm,&x[ld*(i+1)],&one);
1367:       i++;
1368:     } else
1369: #endif
1370:     {
1371:       norm = BLASnrm2_(&n,&x[ld*i],&one);
1372:       norm = 1.0/norm;
1373:       BLASscal_(&n,&norm,&x[ld*i],&one);
1374:      }
1375:   }
1376:   return(0);
1377: }

1379: extern PetscErrorCode DSSolve_GHIEP_HZ(DS,PetscScalar*,PetscScalar*);
1380: extern PetscErrorCode DSSolve_GHIEP_DQDS_II(DS,PetscScalar*,PetscScalar*);

1382: EXTERN_C_BEGIN
1385: PetscErrorCode DSCreate_GHIEP(DS ds)
1386: {
1388:   ds->ops->allocate      = DSAllocate_GHIEP;
1389:   ds->ops->view          = DSView_GHIEP;
1390:   ds->ops->vectors       = DSVectors_GHIEP;
1391:   ds->ops->solve[0]      = DSSolve_GHIEP_HZ;
1392:   ds->ops->solve[1]      = DSSolve_GHIEP_QR_II;
1393:   ds->ops->solve[2]      = DSSolve_GHIEP_QR;
1394:   ds->ops->solve[3]      = DSSolve_GHIEP_DQDS_II;
1395:   ds->ops->sort          = DSSort_GHIEP;
1396:   ds->ops->normalize     = DSNormalize_GHIEP;
1397:   return(0);
1398: }
1399: EXTERN_C_END