Actual source code: dsghiep.c

  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2012, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:       
  8:    SLEPc is free software: you can redistribute it and/or modify it under  the
  9:    terms of version 3 of the GNU Lesser General Public License as published by
 10:    the Free Software Foundation.

 12:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY 
 13:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS 
 14:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for 
 15:    more details.

 17:    You  should have received a copy of the GNU Lesser General  Public  License
 18:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 19:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 20: */
 21: #include <slepc-private/dsimpl.h>      /*I "slepcds.h" I*/
 22: #include <slepcblaslapack.h>

 26: PetscErrorCode DSAllocate_GHIEP(DS ds,PetscInt ld)
 27: {

 31:   DSAllocateMat_Private(ds,DS_MAT_A);
 32:   DSAllocateMat_Private(ds,DS_MAT_B);
 33:   DSAllocateMat_Private(ds,DS_MAT_Q);
 34:   DSAllocateMatReal_Private(ds,DS_MAT_T);
 35:   DSAllocateMatReal_Private(ds,DS_MAT_D);
 36:   PetscFree(ds->perm);
 37:   PetscMalloc(ld*sizeof(PetscInt),&ds->perm);
 38:   PetscLogObjectMemory(ds,ld*sizeof(PetscInt));
 39:   return(0);
 40: }

 44: PetscErrorCode DSSwitchFormat_GHIEP(DS ds,PetscBool tocompact)
 45: {
 47:   PetscReal      *T,*S;
 48:   PetscScalar    *A,*B;
 49:   PetscInt       i,n,ld;

 52:   A = ds->mat[DS_MAT_A];
 53:   B = ds->mat[DS_MAT_B];
 54:   T = ds->rmat[DS_MAT_T];
 55:   S = ds->rmat[DS_MAT_D];
 56:   n = ds->n;
 57:   ld = ds->ld;
 58:   if (tocompact) { /* switch from dense (arrow) to compact storage */
 59:     PetscMemzero(T,3*ld*sizeof(PetscReal));
 60:     PetscMemzero(S,ld*sizeof(PetscReal));
 61:     for (i=0;i<n-1;i++) {
 62:       T[i] = PetscRealPart(A[i+i*ld]);
 63:       T[ld+i] = PetscRealPart(A[i+1+i*ld]);
 64:       S[i] = PetscRealPart(B[i+i*ld]);
 65:     }
 66:     T[n-1] = PetscRealPart(A[n-1+(n-1)*ld]);
 67:     S[n-1] = PetscRealPart(B[n-1+(n-1)*ld]);
 68:     for (i=ds->l;i< ds->k;i++) T[2*ld+i] = PetscRealPart(A[ds->k+i*ld]);
 69:   }else { /* switch from compact (arrow) to dense storage */
 70:     PetscMemzero(A,ld*ld*sizeof(PetscScalar));
 71:     PetscMemzero(B,ld*ld*sizeof(PetscScalar));
 72:     for (i=0;i<n-1;i++) {
 73:       A[i+i*ld] = T[i];
 74:       A[i+1+i*ld] = T[ld+i];
 75:       A[i+(i+1)*ld] = T[ld+i];
 76:       B[i+i*ld] = S[i];
 77:     }
 78:     A[n-1+(n-1)*ld] = T[n-1];
 79:     B[n-1+(n-1)*ld] = S[n-1];
 80:     for (i=ds->l;i<ds->k;i++) {
 81:       A[ds->k+i*ld] = T[2*ld+i];
 82:       A[i+ds->k*ld] = T[2*ld+i];
 83:     }
 84:   }
 85:   return(0);
 86: }

 90: PetscErrorCode DSView_GHIEP(DS ds,PetscViewer viewer)
 91: {
 92:   PetscErrorCode    ierr;
 93:   PetscViewerFormat format;
 94:   PetscInt          i,j;
 95:   PetscReal         value;
 96:   const char *methodname[] = {
 97:                      "HR method",
 98:                      "QR + Inverse Iteration",
 99:                      "QR",
100:                      "DQDS + Inverse Iteration "
101:   };

104:   PetscViewerGetFormat(viewer,&format);
105:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
106:     PetscViewerASCIIPrintf(viewer,"solving the problem with: %s\n",methodname[ds->method]);
107:     return(0);
108:   }
109:   if (ds->compact) {
110:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
111:     if (format == PETSC_VIEWER_ASCII_MATLAB) {
112:       PetscViewerASCIIPrintf(viewer,"%% Size = %D %D\n",ds->n,ds->n);
113:       PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",3*ds->n);
114:       PetscViewerASCIIPrintf(viewer,"zzz = [\n");
115:       for (i=0;i<ds->n;i++) {
116:         PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",i+1,i+1,*(ds->rmat[DS_MAT_T]+i));
117:       }
118:       for (i=0;i<ds->n-1;i++) {
119:         if (*(ds->rmat[DS_MAT_T]+ds->ld+i) !=0 && i!=ds->k-1) {
120:           PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",i+2,i+1,*(ds->rmat[DS_MAT_T]+ds->ld+i));
121:           PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",i+1,i+2,*(ds->rmat[DS_MAT_T]+ds->ld+i));
122:         }
123:       }
124:       for (i = ds->l;i<ds->k;i++) {
125:         PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",ds->k+1,i+1,*(ds->rmat[DS_MAT_T]+2*ds->ld+i));
126:           PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",i+1,ds->k+1,*(ds->rmat[DS_MAT_T]+2*ds->ld+i));
127:       }
128:       PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(zzz);\n",DSMatName[DS_MAT_A]);
129: 
130:       PetscViewerASCIIPrintf(viewer,"%% Size = %D %D\n",ds->n,ds->n);
131:       PetscViewerASCIIPrintf(viewer,"omega = zeros(%D,3);\n",3*ds->n);
132:       PetscViewerASCIIPrintf(viewer,"omega = [\n");
133:       for (i=0;i<ds->n;i++) {
134:         PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",i+1,i+1,*(ds->rmat[DS_MAT_D]+i));
135:       }
136:       PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(omega);\n",DSMatName[DS_MAT_B]);

138:     } else {
139:       PetscViewerASCIIPrintf(viewer,"T\n");
140:       for (i=0;i<ds->n;i++) {
141:         for (j=0;j<ds->n;j++) {
142:           if (i==j) value = *(ds->rmat[DS_MAT_T]+i);
143:           else if (i==j+1 || j==i+1) value = *(ds->rmat[DS_MAT_T]+ds->ld+PetscMin(i,j));
144:           else if ((i<ds->k && j==ds->k) || (i==ds->k && j<ds->k)) value = *(ds->rmat[DS_MAT_T]+2*ds->ld+PetscMin(i,j));
145:           else value = 0.0;
146:           PetscViewerASCIIPrintf(viewer," %18.16e ",value);
147:         }
148:         PetscViewerASCIIPrintf(viewer,"\n");
149:       }
150:       PetscViewerASCIIPrintf(viewer,"omega\n");
151:       for (i=0;i<ds->n;i++) {
152:         for (j=0;j<ds->n;j++) {
153:           if (i==j) value = *(ds->rmat[DS_MAT_D]+i);
154:           else value = 0.0;
155:           PetscViewerASCIIPrintf(viewer," %18.16e ",value);
156:         }
157:         PetscViewerASCIIPrintf(viewer,"\n");
158:       }
159:     }
160:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
161:     PetscViewerFlush(viewer);
162:   } else {
163:     DSViewMat_Private(ds,viewer,DS_MAT_A);
164:     DSViewMat_Private(ds,viewer,DS_MAT_B);
165:   }
166:   if (ds->state>DS_STATE_INTERMEDIATE) {
167:     DSViewMat_Private(ds,viewer,DS_MAT_Q);
168:   }
169:   return(0);
170: }

174: static PetscErrorCode DSVectors_GHIEP_Eigen_Some(DS ds,PetscInt *idx,PetscReal *rnorm)
175: {
177:   PetscReal      b[4],M[4],d1,d2,s1,s2,e;
178:   PetscReal      scal1,scal2,wr1,wr2,wi,ep,norm;
179:   PetscScalar    *Q,*X,Y[4],alpha,zeroS = 0.0;
180:   PetscInt       k;
181:   PetscBLASInt   two = 2,n_,ld,one=1;
182: #if !defined(PETSC_USE_COMPLEX)
183:   PetscBLASInt   four=4;
184: #endif
185: 
187:   X = ds->mat[DS_MAT_X];
188:   Q = ds->mat[DS_MAT_Q];
189:   k = *idx;
190:   n_ = PetscBLASIntCast(ds->n);
191:   ld = PetscBLASIntCast(ds->ld);
192:   if (k < ds->n-1) {
193:    e = (ds->compact)?*(ds->rmat[DS_MAT_T]+ld+k):PetscRealPart(*(ds->mat[DS_MAT_A]+(k+1)+ld*k));
194:   } else e = 0.0;
195:   if (e == 0.0) {/* Real */
196:      if (ds->state>=DS_STATE_CONDENSED) {
197:        PetscMemcpy(X+k*ld,Q+k*ld,ld*sizeof(PetscScalar));
198:      } else {
199:        PetscMemzero(X+k*ds->ld,ds->ld*sizeof(PetscScalar));
200:        X[k+k*ds->ld] = 1.0;
201:      }
202:      if (rnorm) {
203:        *rnorm = PetscAbsScalar(X[ds->n-1+k*ld]);
204:      }
205:   } else { /* 2x2 block */
206:     if (ds->compact) {
207:       s1 = *(ds->rmat[DS_MAT_D]+k);
208:       d1 = *(ds->rmat[DS_MAT_T]+k);
209:       s2 = *(ds->rmat[DS_MAT_D]+k+1);
210:       d2 = *(ds->rmat[DS_MAT_T]+k+1);
211:     } else {
212:       s1 = PetscRealPart(*(ds->mat[DS_MAT_B]+k*ld+k));
213:       d1 = PetscRealPart(*(ds->mat[DS_MAT_A]+k+k*ld));
214:       s2 = PetscRealPart(*(ds->mat[DS_MAT_B]+(k+1)*ld+k+1));
215:       d2 = PetscRealPart(*(ds->mat[DS_MAT_A]+k+1+(k+1)*ld));
216:     }
217:     M[0] = d1; M[1] = e; M[2] = e; M[3]= d2;
218:     b[0] = s1; b[1] = 0.0; b[2] = 0.0; b[3] = s2;
219:     ep = LAPACKlamch_("S");
220:     /* Compute eigenvalues of the block */
221:     LAPACKlag2_(M, &two, b, &two, &ep, &scal1, &scal2, &wr1, &wr2, &wi);
222:     if (wi==0.0) { /* Real eigenvalues */
223:       SETERRQ(PETSC_COMM_SELF,1,"Real block in DSVectors_GHIEP");
224:     } else { /* Complex eigenvalues */
225:       if (scal1<ep) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FP,"Nearly infinite eigenvalue");
226:       wr1 /= scal1; wi /= scal1;
227: #if !defined(PETSC_USE_COMPLEX)
228:       if ( SlepcAbs(s1*d1-wr1,wi)<SlepcAbs(s2*d2-wr1,wi)) {
229:         Y[0] = wr1-s2*d2; Y[1] = s2*e; Y[2] = wi; Y[3] = 0.0;
230:       } else {
231:         Y[0] = s1*e; Y[1] = wr1-s1*d1; Y[2] = 0.0; Y[3] = wi;
232:       }
233:       norm = BLASnrm2_(&four,Y,&one);
234:       norm = 1/norm;
235:       if (ds->state >= DS_STATE_CONDENSED) {
236:         alpha = norm;
237:         BLASgemm_("N","N",&n_,&two,&two,&alpha,ds->mat[DS_MAT_Q]+k*ld,&ld,Y,&two,&zeroS,X+k*ld,&ld);
238:         if (rnorm) *rnorm = SlepcAbsEigenvalue(X[ds->n-1+k*ld],X[ds->n-1+(k+1)*ld]);
239:       } else {
240:         PetscMemzero(X+k*ld,2*ld*sizeof(PetscScalar));
241:         X[k*ld+k] = Y[0]*norm; X[k*ld+k+1] = Y[1]*norm;
242:         X[(k+1)*ld+k] = Y[2]*norm; X[(k+1)*ld+k+1] = Y[3]*norm;
243:       }
244: #else
245:       if ( SlepcAbs(s1*d1-wr1,wi)<SlepcAbs(s2*d2-wr1,wi)) {
246:         Y[0] = wr1-s2*d2+PETSC_i*wi; Y[1] = s2*e;
247:       } else {
248:         Y[0] = s1*e; Y[1] = wr1-s1*d1+PETSC_i*wi;
249:       }
250:       norm = BLASnrm2_(&two,Y,&one);
251:       norm = 1/norm;
252:       if (ds->state >= DS_STATE_CONDENSED) {
253:         alpha = norm;
254:         BLASgemv_("N",&n_,&two,&alpha,ds->mat[DS_MAT_Q]+k*ld,&ld,Y,&one,&zeroS,X+k*ld,&one);
255:         if (rnorm) *rnorm = PetscAbsScalar(X[ds->n-1+k*ld]);
256:       } else {
257:         PetscMemzero(X+k*ld,2*ld*sizeof(PetscScalar));
258:         X[k*ld+k] = Y[0]*norm; X[k*ld+k+1] = Y[1]*norm;
259:       }
260:       X[(k+1)*ld+k] = PetscConj(X[k*ld+k]); X[(k+1)*ld+k+1] = PetscConj(X[k*ld+k+1]);
261: #endif
262:       (*idx)++;
263:     }
264:   }
265:   return(0);
266: }

270: PetscErrorCode DSVectors_GHIEP(DS ds,DSMatType mat,PetscInt *k,PetscReal *rnorm)
271: {
272:   PetscInt       i;
273:   PetscReal      e;

277:   switch (mat) {
278:     case DS_MAT_X:
279:       if (k) {
280:         DSVectors_GHIEP_Eigen_Some(ds,k,rnorm);
281:       } else {
282:         for (i=0; i<ds->n; i++) {
283:           e = (ds->compact)?*(ds->rmat[DS_MAT_T]+ds->ld+i):PetscRealPart(*(ds->mat[DS_MAT_A]+(i+1)+ds->ld*i));
284:           if (e == 0.0) {/* real */
285:             if (ds->state >= DS_STATE_CONDENSED) {
286:               PetscMemcpy(ds->mat[mat]+i*ds->ld,ds->mat[DS_MAT_Q]+i*ds->ld,ds->ld*sizeof(PetscScalar));
287:             } else {
288:               PetscMemzero(ds->mat[mat]+i*ds->ld,ds->ld*sizeof(PetscScalar));
289:               *(ds->mat[mat]+i+i*ds->ld) = 1.0;
290:             }
291:           } else {
292:             DSVectors_GHIEP_Eigen_Some(ds,&i,rnorm);
293:           }
294:         }
295:       }
296:       break;
297:     case DS_MAT_Y:
298:     case DS_MAT_U:
299:     case DS_MAT_VT:
300:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
301:       break;
302:     default:
303:       SETERRQ(((PetscObject)ds)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
304:   }
305:   return(0);
306: }

310: /*
311:   Extract the eigenvalues contained in the block-diagonal of the indefinite problem.
312:   Only the index range n0..n1 is processed.
313: */
314: PetscErrorCode DSGHIEPComplexEigs(DS ds, PetscInt n0, PetscInt n1, PetscScalar *wr, PetscScalar *wi)
315: {
316:   PetscInt     k,ld;
317:   PetscBLASInt two=2;
318:   PetscScalar  *A,*B;
319:   PetscReal    *D,*T;
320:   PetscReal    b[4],M[4],d1,d2,s1,s2,e;
321:   PetscReal    scal1,scal2,ep,wr1,wr2,wi1;

324:   ld = ds->ld;
325:   A = ds->mat[DS_MAT_A];
326:   B = ds->mat[DS_MAT_B];
327:   D = ds->rmat[DS_MAT_D];
328:   T = ds->rmat[DS_MAT_T];
329:   for (k=n0;k<n1;k++) {
330:     if (k < n1-1) {
331:       e = (ds->compact)?T[ld+k]:PetscRealPart(A[(k+1)+ld*k]);
332:     }else e = 0.0;
333:     if (e==0.0) {
334:       /* real eigenvalue */
335:       wr[k] = (ds->compact)?T[k]/D[k]:A[k+k*ld]/B[k+k*ld];
336:       wi[k] = 0.0 ;
337:     } else {
338:       /* diagonal block */
339:       if (ds->compact) {
340:         s1 = D[k];
341:         d1 = T[k];
342:         s2 = D[k+1];
343:         d2 = T[k+1];
344:       } else {
345:         s1 = PetscRealPart(B[k*ld+k]);
346:         d1 = PetscRealPart(A[k+k*ld]);
347:         s2 = PetscRealPart(B[(k+1)*ld+k+1]);
348:         d2 = PetscRealPart(A[k+1+(k+1)*ld]);
349:       }
350:       M[0] = d1; M[1] = e; M[2] = e; M[3]= d2;
351:       b[0] = s1; b[1] = 0.0; b[2] = 0.0; b[3] = s2;
352:       ep = LAPACKlamch_("S");
353:       /* Compute eigenvalues of the block */
354:       LAPACKlag2_(M, &two, b, &two, &ep, &scal1, &scal2, &wr1, &wr2, &wi1);
355:       if (scal1<ep) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FP,"Nearly infinite eigenvalue");
356:       wr[k] = wr1/scal1;
357:       if (wi1==0.0) { /* Real eigenvalues */
358:         if (scal2<ep) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FP,"Nearly infinite eigenvalue");
359:         wr[k+1] = wr2/scal2;
360:         wi[k] = 0.0;
361:         wi[k+1] = 0.0;
362:       } else { /* Complex eigenvalues */
363: #if !defined(PETSC_USE_COMPLEX)
364:         wr[k+1] = wr[k];
365:         wi[k] = wi1/scal1;
366:         wi[k+1] = -wi[k];
367: #else
368:         wr[k] += PETSC_i*wi1/scal1;
369:         wr[k+1] = PetscConj(wr[k]);
370:         wi[k] = 0.0;
371:         wi[k+1] = 0.0;
372: #endif
373:       }
374:       k++;
375:     }
376:   }
377:   return(0);
378: }

382: PetscErrorCode DSSort_GHIEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
383: {
385:   PetscInt       n,i,*perm;
386:   PetscReal      *d,*e,*s;

389:   n = ds->n;
390:   d = ds->rmat[DS_MAT_T];
391:   e = d + ds->ld;
392:   s = ds->rmat[DS_MAT_D];
393:   DSAllocateWork_Private(ds,ds->ld,ds->ld,0);
394:   perm = ds->perm;
395:   if (!rr) {
396:     rr = wr;
397:     ri = wi;
398:   }
399:   DSSortEigenvalues_Private(ds,rr,ri,perm,PETSC_TRUE);
400:   if (!ds->compact) {DSSwitchFormat_GHIEP(ds,PETSC_TRUE);}
401:   PetscMemcpy(ds->work,wr,n*sizeof(PetscScalar));
402:   for (i=ds->l;i<n;i++) {
403:     wr[i] = *(ds->work + perm[i]);
404:   }
405:   PetscMemcpy(ds->work,wi,n*sizeof(PetscScalar));
406:   for (i=ds->l;i<n;i++) {
407:     wi[i] = *(ds->work + perm[i]);
408:   }
409:   PetscMemcpy(ds->rwork,s,n*sizeof(PetscReal));
410:   for (i=ds->l;i<n;i++) {
411:     s[i] = *(ds->rwork+perm[i]);
412:   }
413:   PetscMemcpy(ds->rwork,d,n*sizeof(PetscReal));
414:   for (i=ds->l;i<n;i++) {
415:     d[i] = *(ds->rwork  + perm[i]);
416:   }
417:   PetscMemcpy(ds->rwork,e,(n-1)*sizeof(PetscReal));
418:   PetscMemzero(e+ds->l,(n-1-ds->l)*sizeof(PetscScalar));
419:   for (i=ds->l;i<n-1;i++) {
420:     if (perm[i]<n-1) e[i] = *(ds->rwork + perm[i]);
421:   }
422:   if (!ds->compact) { DSSwitchFormat_GHIEP(ds,PETSC_FALSE);}
423:   DSPermuteColumns_Private(ds,ds->l,n,DS_MAT_Q,perm);
424:   return(0);
425: }

429: /*
430:   Generates a hyperbolic rotation
431:     if x1*x1 - x2*x2 != 0 
432:       r = sqrt( |x1*x1 - x2*x2| )
433:       c = x1/r  s = x2/r
434:      
435:       | c -s||x1|   |d*r|
436:       |-s  c||x2| = | 0 | 
437:       where d = 1 for type==1 and -1 for type==2
438:   Returns the condition number of the reduction
439: */
440: static PetscErrorCode HRGen(PetscReal x1,PetscReal x2,PetscInt *type,PetscReal *c,PetscReal *s,PetscReal *r,PetscReal *cond)
441: {
442:   PetscReal t,n2,xa,xb;
443:   PetscInt  type_;

446:   if (x2==0) {
447:     *r = PetscAbsReal(x1);
448:     *c = (x1>=0)?1.0:-1.0;
449:     *s = 0.0;
450:     if (type) *type = 1;
451:     return(0);
452:   }
453:   if (PetscAbsReal(x1) == PetscAbsReal(x2)) {
454:     /* hyperbolic rotation doesn't exist */
455:     *c = 0;
456:     *s = 0;
457:     *r = 0;
458:     if (type) *type = 0;
459:     *cond = PETSC_MAX_REAL;
460:     return(0);
461:   }
462: 
463:   if (PetscAbsReal(x1)>PetscAbsReal(x2)) {
464:     xa = x1; xb = x2; type_ = 1;
465:   } else {
466:     xa = x2; xb = x1; type_ = 2;
467:   }
468:   t = xb/xa;
469:   n2 = PetscAbsReal(1 - t*t);
470:   *r = PetscSqrtReal(n2)*PetscAbsReal(xa);
471:   *c = x1/(*r);
472:   *s = x2/(*r);
473:   if (type_ == 2) *r *= -1;
474:   if (type) *type = type_;
475:   if (cond) *cond = (PetscAbsReal(*c) + PetscAbsReal(*s))/PetscAbsReal(PetscAbsReal(*c) - PetscAbsReal(*s));
476:   return(0);
477: }

481: /*
482:                                 |c  s|
483:   Applies an hyperbolic rotator |s  c|
484:            |c  s|
485:     [x1 x2]|s  c| 
486: */
487: PetscErrorCode HRApply(PetscInt n, PetscScalar *x1,PetscInt inc1, PetscScalar *x2, PetscInt inc2,PetscReal c, PetscReal s)
488: {
489:   PetscInt    i;
490:   PetscReal   t;
491:   PetscScalar tmp;
492: 
494:   if (PetscAbsReal(c)>PetscAbsReal(s)) { /* Type I */
495:     t = s/c;
496:     for (i=0;i<n;i++) {
497:       x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
498:       x2[i*inc2] = t*x1[i*inc1] + x2[i*inc2]/c;
499:     }
500:   } else { /* Type II */
501:     t = c/s;
502:     for (i=0;i<n;i++) {
503:       tmp = x1[i*inc1];
504:       x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
505:       x2[i*inc2] = t*x1[i*inc1] + tmp/s;
506:     }
507:   }
508:   return(0);
509: }

513: /*
514:   Reduction to tridiagonal-diagonal form (see F. Tisseur, SIMAX 26(1), 2004).

516:   Input:
517:     A symmetric (only lower triangular part is refered)
518:     s vector +1 and -1 (signature matrix)
519:   Output:
520:     d,e
521:     s
522:     Q s-orthogonal matrix whith Q^T*A*Q = T (symmetric tridiagonal matrix)
523: */
524: static PetscErrorCode TridiagDiag_HHR(PetscInt n,PetscScalar *A,PetscInt lda,PetscReal *s,PetscScalar* Q,PetscInt ldq,PetscBool flip,PetscReal *d,PetscReal *e,PetscInt *perm_,PetscScalar *w,PetscInt lw)
525: {
526: #if defined(PETSC_MISSING_LAPACK_LARFG) || defined(PETSC_MISSING_LAPACK_LARF)
528:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARFG/LARF - Lapack routines are unavailable");
529: #else
531:   PetscInt       i,j,k,*ii,*jj,i0,ik,tmp,type,*perm,nwall,nwu;
532:   PetscReal      *ss,cond=1.0,cs,sn,r;
533:   PetscScalar    *work,tau,t,*AA;
534:   PetscBLASInt   n0,n1,ni,inc=1,m,n_,lda_,ldq_;
535:   PetscBool      breakdown = PETSC_TRUE;
536: 
538:   if (n<3) {
539:     if (n==1)Q[0]=1;
540:     if (n==2) {Q[0] = Q[1+ldq] = 1; Q[1] = Q[ldq] = 0;}
541:     return(0);
542:   }
543:   lda_ = PetscBLASIntCast(lda);
544:   n_   = PetscBLASIntCast(n);
545:   ldq_ = PetscBLASIntCast(ldq);
546:   nwall = n*n+n;
547:   nwu = 0;
548:   if (!w || lw < nwall) {
549:     PetscMalloc(nwall*sizeof(PetscScalar),&work);
550:   }else work = w;
551:   PetscMalloc(n*sizeof(PetscReal),&ss);
552:   PetscMalloc(n*sizeof(PetscInt),&perm);
553:   AA = work;
554:   for (i=0;i<n;i++) {
555:     PetscMemcpy(AA+i*n,A+i*lda,n*sizeof(PetscScalar));
556:   }
557:   nwu += n*n;
558:   k=0;
559:   while (breakdown && k<n) {
560:     breakdown = PETSC_FALSE;
561:     /* Classify (and flip) A and s according to sign */
562:     if (flip) {
563:       for (i=0;i<n;i++) {
564:         perm[i] = n-1-perm_[i];
565:         if (perm[i]==0) i0 = i;
566:         if (perm[i]==k) ik = i;
567:       }
568:     } else {
569:       for (i=0;i<n;i++) {
570:         perm[i] = perm_[i];
571:         if (perm[i]==0) i0 = i;
572:         if (perm[i]==k) ik = i;
573:       }
574:     }
575:     perm[ik] = 0;
576:     perm[i0] = k;
577:     i=1;
578:     while (i<n-1 && s[perm[i-1]]==s[perm[0]]) {
579:       if (s[perm[i]]!=s[perm[0]]) {
580:         j=i+1;
581:         while (j<n-1 && s[perm[j]]!=s[perm[0]])j++;
582:         tmp = perm[i]; perm[i] = perm[j]; perm[j] = tmp;
583:       }
584:       i++;
585:     }
586:     for (i=0;i<n;i++) {
587:       ss[i] = s[perm[i]];
588:     }
589:     if (flip) { ii = &j; jj = &i;} else { ii = &i; jj = &j;}
590:     for (i=0;i<n;i++)
591:       for (j=0;j<n;j++)
592:         A[i+j*lda] = AA[perm[*ii]+perm[*jj]*n];
593:     /* Initialize Q */
594:     for (i=0;i<n;i++) {
595:       PetscMemzero(Q+i*ldq,n*sizeof(PetscScalar));
596:       Q[perm[i]+i*ldq] = 1.0;
597:     }
598:     for (ni=1;ni<n && ss[ni]==ss[0]; ni++);
599:     n0 = ni-1; n1 = PetscBLASIntCast(n)-ni;
600:     for (j=0;j<n-2;j++) {
601:       m = PetscBLASIntCast(n-j-1);
602:       /* Forming and applying reflectors */
603:       if ( n0 > 1 ) {
604:         LAPACKlarfg_(&n0, A+ni-n0+j*lda, A+ni-n0+j*lda+1,&inc,&tau);
605:         /* Apply reflector */
606:         if ( PetscAbsScalar(tau) != 0.0 ) {
607:           t=*( A+ni-n0+j*lda);  *(A+ni-n0+j*lda)=1.0;
608:           LAPACKlarf_("R",&m,&n0,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu);
609:           LAPACKlarf_("L",&n0,&m,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu);
610:           /* Update Q */
611:           LAPACKlarf_("R",&n_,&n0,A+ni-n0+j*lda,&inc,&tau,Q+(j+1)*ldq,&ldq_,work+nwu);
612:           *(A+ni-n0+j*lda) = t;
613:           for (i=1;i<n0;i++) {
614:             *(A+ni-n0+j*lda+i) = 0.0;  *(A+j+(ni-n0+i)*lda) = 0.0;
615:           }
616:           *(A+j+(ni-n0)*lda) = *(A+ni-n0+j*lda);
617:         }
618:       }
619:       if ( n1 > 1 ) {
620:         LAPACKlarfg_(&n1, A+n-n1+j*lda, A+n-n1+j*lda+1,&inc,&tau);
621:         /* Apply reflector */
622:         if ( PetscAbsScalar(tau) != 0.0 ) {
623:           t=*( A+n-n1+j*lda);  *(A+n-n1+j*lda)=1.0;
624:           LAPACKlarf_("R",&m,&n1,A+n-n1+j*lda,&inc,&tau,A+j+1+(n-n1)*lda,&lda_,work+nwu);
625:           LAPACKlarf_("L",&n1,&m,A+n-n1+j*lda,&inc,&tau,A+n-n1+(j+1)*lda,&lda_,work+nwu);
626:           /* Update Q */
627:           LAPACKlarf_("R",&n_,&n1,A+n-n1+j*lda,&inc,&tau,Q+(n-n1)*ldq,&ldq_,work+nwu);
628:           *(A+n-n1+j*lda) = t;
629:           for (i=1;i<n1;i++) {
630:             *(A+n-n1+i+j*lda) = 0.0;  *(A+j+(n-n1+i)*lda) = 0.0;
631:           }
632:           *(A+j+(n-n1)*lda) = *(A+n-n1+j*lda);
633:         }
634:       }
635:       /* Hyperbolic rotation */
636:       if ( n0 > 0 && n1 > 0) {
637:         HRGen(PetscRealPart(A[ni-n0+j*lda]),PetscRealPart(A[n-n1+j*lda]),&type,&cs,&sn,&r,&cond);
638:         /* Check condition number */
639:         if (cond > 1.0/(10*PETSC_SQRT_MACHINE_EPSILON)) {
640:           breakdown = PETSC_TRUE;
641:           k++;
642:           if (k==n || flip)
643:             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Breakdown in construction of hyperbolic transformation");
644:           break;
645:         }
646:         A[ni-n0+j*lda] = r; A[n-n1+j*lda] = 0.0;
647:         A[j+(ni-n0)*lda] = r; A[j+(n-n1)*lda] = 0.0;
648:         /* Apply to A */
649:         HRApply(m, A+j+1+(ni-n0)*lda,1, A+j+1+(n-n1)*lda,1, cs, -sn);
650:         HRApply(m, A+ni-n0+(j+1)*lda,lda, A+n-n1+(j+1)*lda,lda, cs, -sn);
651: 
652:         /* Update Q */
653:         HRApply(n, Q+(ni-n0)*ldq,1, Q+(n-n1)*ldq,1, cs, -sn);
654:         if (type==2) {
655:           ss[ni-n0] = -ss[ni-n0]; ss[n-n1] = -ss[n-n1];
656:           n0++;ni++;n1--;
657:         }
658:       }
659:       if (n0>0) n0--;else n1--;
660:     }
661:   }

663: /* flip matrices */
664:     if (flip) {
665:       for (i=0;i<n-1;i++) {
666:         d[i] = PetscRealPart(A[n-i-1+(n-i-1)*lda]);
667:         e[i] = PetscRealPart(A[n-i-1+(n-i-2)*lda]);
668:         s[i] = ss[n-i-1];
669:       }
670:       s[n-1] = ss[0];
671:       d[n-1] = PetscRealPart(A[0]);
672:       for (i=0;i<n;i++) {
673:         ierr=PetscMemcpy(work+i*n,Q+i*ldq,n*sizeof(PetscScalar));
674:       }
675:       for (i=0;i<n;i++)
676:         for (j=0;j<n;j++)
677:           Q[i+j*ldq] = work[i+(n-j-1)*n];
678:     } else {
679:       for (i=0;i<n-1;i++) {
680:         d[i] = PetscRealPart(A[i+i*lda]);
681:         e[i] = PetscRealPart(A[i+1+i*lda]);
682:         s[i] = ss[i];
683:       }
684:       s[n-1] = ss[n-1];
685:       d[n-1] = PetscRealPart(A[n-1 + (n-1)*lda]);
686:     }

688:   PetscFree(ss);
689:   PetscFree(perm);
690:   return(0);
691: #endif
692: }

696: /*
697:   compute x = x - y*ss^{-1}*y^T*s*x where ss=y^T*s*y
698:   s diagonal (signature matrix)
699: */
700: static PetscErrorCode IndefOrthog(PetscReal *s, PetscScalar *y, PetscReal ss, PetscScalar *x, PetscScalar *h,PetscInt n)
701: {
702:   PetscInt    i;
703:   PetscScalar h_,r;

706:   if (y) {
707:     h_ = 0.0; /* h_=(y^Tdiag(s)*y)^{-1}*y^T*diag(s)*x*/
708:     for (i=0;i<n;i++) { h_+=y[i]*s[i]*x[i];}
709:     h_ /= ss;
710:     for (i=0;i<n;i++) {x[i] -= h_*y[i];} /* x = x-h_*y */
711:     /* repeat */
712:     r = 0.0;
713:     for (i=0;i<n;i++) { r+=y[i]*s[i]*x[i];}
714:     r /= ss;
715:     for (i=0;i<n;i++) {x[i] -= r*y[i];}
716:     h_ += r;
717:   }else h_ = 0.0;
718:   if (h) *h = h_;
719:   return(0);
720: }

724: /* 
725:    normalization with a indefinite norm
726: */
727: static PetscErrorCode IndefNorm(PetscReal *s,PetscScalar *x, PetscReal *norm,PetscInt n)
728: {
729:   PetscInt  i;
730:   PetscReal norm_;

733:   /* s-normalization */
734:   norm_ = 0.0;
735:   for (i=0;i<n;i++) {norm_ += PetscRealPart(x[i]*s[i]*x[i]);}
736:   if (norm_<0) {norm_ = -PetscSqrtReal(-norm_);}
737:   else {norm_ = PetscSqrtReal(norm_);}
738:   for (i=0;i<n;i++)x[i] /= norm_;
739:   if (norm) *norm = norm_;
740:   return(0);
741: }

745: static PetscErrorCode DSEigenVectorsPseudoOrthog(DS ds, DSMatType mat, PetscScalar *wr, PetscScalar *wi,PetscBool accum)
746: {
748:   PetscInt       i,j,k,off;
749:   PetscBLASInt   ld,n1,one=1;
750:   PetscScalar    PQ[4],xx,yx,xy,yy,*y,h,oneS=1.0,zeroS=0.0,*X,*W,*B;
751:   PetscReal      *ss,*s,*d,*e,d1,d2,toldeg=PETSC_SQRT_MACHINE_EPSILON*100;

754:   ld = PetscBLASIntCast(ds->ld);
755:   n1 = PetscBLASIntCast(ds->n - ds->l);
756:   DSAllocateWork_Private(ds,ld*ld+2*ld,ld,2*ld);
757:   s = ds->rmat[DS_MAT_D];
758:   d = ds->rmat[DS_MAT_T];
759:   e = d + ld;
760:   off = ds->l+ds->l*ld;
761:   if (!ds->compact) {
762:     B = ds->mat[DS_MAT_B];
763:     for (i=ds->l;i<ds->n;i++) {
764:       s[i] = PetscRealPart(B[i+i*ld]);
765:     }
766:   }

768:   /* compute real s-orthonormal base */
769:   X = ds->mat[mat];
770:   ss = ds->rwork;
771:   y = ds->work;

773: #if defined(PETSC_USE_COMPLEX)
774:   /* with complex scalars we need to operate as in real scalar */
775:   for (i=ds->l;i<ds->n;i++) {
776:     if (PetscImaginaryPart(wr[i])!=0.0) {
777:       for (j=ds->l;j<ds->n;j++) {
778:         X[j+(i+1)*ld] = PetscImaginaryPart(X[j+i*ld]);
779:         X[j+i*ld] = PetscRealPart(X[j+i*ld]);
780:       }
781:       i++;
782:     }
783:   }
784: #endif

786:   for (i=ds->l;i<ds->n;i++) {
787: #if defined(PETSC_USE_COMPLEX)
788:     if (PetscImaginaryPart(wr[i])==0.0) { /* real */
789: #else
790:     if (wi[i]==0.0) { /* real */
791: #endif
792:       for (j=ds->l;j<i;j++) {
793:          /* s-orthogonalization with close eigenvalues */
794:         if (wi[j]==0.0) {
795:           if ( PetscAbsScalar(wr[j]-wr[i])<toldeg) {
796:             IndefOrthog(s+ds->l, X+j*ld+ds->l, ss[j],X+i*ld+ds->l, PETSC_NULL,n1);
797:           }
798:         }else j++;
799:       }
800:       IndefNorm(s+ds->l,X+i*ld+ds->l,&d1,n1);
801:       ss[i] = (d1<0.0)?-1:1;
802:       d[i] = PetscRealPart(wr[i]*ss[i]); e[i] = 0.0;
803:     } else {
804:       for (j=ds->l;j<i;j++) {
805:         /* s-orthogonalization of Xi and Xi+1*/
806: #if defined(PETSC_USE_COMPLEX)
807:         if (PetscImaginaryPart(wr[j])!=0.0) {
808: #else
809:         if (wi[j]!=0.0) {
810: #endif
811:           if (PetscAbsScalar(wr[j]-wr[i])<toldeg && PetscAbsScalar(PetscAbsScalar(wi[j])-PetscAbsScalar(wi[i]))<toldeg) {
812:             for (k=ds->l;k<ds->n;k++) y[k] = s[k]*X[k+i*ld];
813:             xx = BLASdot_(&n1,X+ds->l+j*ld,&one,y+ds->l,&one);
814:             yx = BLASdot_(&n1,X+ds->l+(j+1)*ld,&one,y+ds->l,&one);
815:             for (k=ds->l;k<ds->n;k++) y[k] = s[k]*X[k+(i+1)*ld];
816:             xy = BLASdot_(&n1,X+ds->l+j*ld,&one,y+ds->l,&one);
817:             yy = BLASdot_(&n1,X+ds->l+(j+1)*ld,&one,y+ds->l,&one);
818:             PQ[0] = ss[j]*xx; PQ[1] = ss[j+1]*yx; PQ[2] = ss[j]*xy; PQ[3] = ss[j+1]*yy;
819:             for (k=ds->l;k<ds->n;k++) {
820:               X[k+i*ld] -= PQ[0]*X[k+j*ld]+PQ[1]*X[k+(j+1)*ld];
821:               X[k+(i+1)*ld] -= PQ[2]*X[k+j*ld]+PQ[3]*X[k+(j+1)*ld];
822:             }
823:             /* Repeat */
824:             for (k=ds->l;k<ds->n;k++) y[k] = s[k]*X[k+i*ld];
825:             xx = BLASdot_(&n1,X+ds->l+j*ld,&one,y+ds->l,&one);
826:             yx = BLASdot_(&n1,X+ds->l+(j+1)*ld,&one,y+ds->l,&one);
827:             for (k=ds->l;k<ds->n;k++) y[k] = s[k]*X[k+(i+1)*ld];
828:             xy = BLASdot_(&n1,X+ds->l+j*ld,&one,y+ds->l,&one);
829:             yy = BLASdot_(&n1,X+ds->l+(j+1)*ld,&one,y+ds->l,&one);
830:             PQ[0] = ss[j]*xx; PQ[1] = ss[j+1]*yx; PQ[2] = ss[j]*xy; PQ[3] = ss[j+1]*yy;
831:             for (k=ds->l;k<ds->n;k++) {
832:               X[k+i*ld] -= PQ[0]*X[k+j*ld]+PQ[1]*X[k+(j+1)*ld];
833:               X[k+(i+1)*ld] -= PQ[2]*X[k+j*ld]+PQ[3]*X[k+(j+1)*ld];
834:             }
835:           }
836:           j++;
837:         }
838:       }
839:       IndefNorm(s+ds->l,X+i*ld+ds->l,&d1,n1);
840:       ss[i] = (d1<0)?-1:1;
841:       IndefOrthog(s+ds->l, X+i*ld+ds->l, ss[i],X+(i+1)*ld+ds->l, &h,n1);
842:       IndefNorm(s+ds->l,X+(i+1)*ld+ds->l,&d2,n1);
843:       ss[i+1] = (d2<0)?-1:1;
844:       d[i] = PetscRealPart((wr[i]-wi[i]*h/d1)*ss[i]);
845:       d[i+1] = PetscRealPart((wr[i]+wi[i]*h/d1)*ss[i+1]);
846:       e[i] = PetscRealPart(wi[i]*d2/d1*ss[i]); e[i+1] = 0.0;
847:       i++;
848:     }
849:   }
850:   for (i=ds->l;i<ds->n;i++) s[i] = ss[i];
851:   /* accumulate previous Q */
852:   if (accum && mat!=DS_MAT_Q) {
853:     DSAllocateMat_Private(ds,DS_MAT_W);
854:     W = ds->mat[DS_MAT_W];
855:     DSCopyMatrix_Private(ds,DS_MAT_W,DS_MAT_Q);
856:     BLASgemm_("N","N",&n1,&n1,&n1,&oneS,W+off,&ld,ds->mat[DS_MAT_X]+off,&ld,&zeroS,ds->mat[DS_MAT_Q]+off,&ld);
857:   }
858:   if (!ds->compact) {DSSwitchFormat_GHIEP(ds,PETSC_FALSE);}
859:   return(0);
860: }

864: /*
865:   Get eigenvectors with inverse iteration.
866:   The system matrix is in Hessenberg form.
867: */
868: PetscErrorCode DSGHIEPPseudoOrthogInverseIteration(DS ds,PetscScalar *wr,PetscScalar *wi)
869: {
870: #if defined(PETSC_MISSING_LAPACK_HSEIN)
872:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"HSEIN - Lapack routine is unavailable");
873: #else
875:   PetscInt       i,off;
876:   PetscBLASInt   *select,*infoC,ld,n1,mout,info;
877:   PetscScalar    *A,*B,*H,*X;
878:   PetscReal      *s,*d,*e;

881:   ld = PetscBLASIntCast(ds->ld);
882:   n1 = PetscBLASIntCast(ds->n - ds->l);
883:   DSAllocateWork_Private(ds,ld*ld+2*ld,ld,2*ld);
884:   DSAllocateMat_Private(ds,DS_MAT_W);
885:   A = ds->mat[DS_MAT_A];
886:   B = ds->mat[DS_MAT_B];
887:   H = ds->mat[DS_MAT_W];
888:   s = ds->rmat[DS_MAT_D];
889:   d = ds->rmat[DS_MAT_T];
890:   e = d + ld;
891:   select = ds->iwork;
892:   infoC = ds->iwork + ld;
893:   off = ds->l+ds->l*ld;
894:   if (ds->compact) {
895:     H[off] = d[ds->l]*s[ds->l];
896:     H[off+ld] = e[ds->l]*s[ds->l];
897:     for (i=ds->l+1;i<ds->n-1;i++) {
898:       H[i+(i-1)*ld] = e[i-1]*s[i];
899:       H[i+i*ld] = d[i]*s[i];
900:       H[i+(i+1)*ld] = e[i]*s[i];
901:     }
902:     H[ds->n-1+(ds->n-2)*ld] = e[ds->n-2]*s[ds->n-1];
903:     H[ds->n-1+(ds->n-1)*ld] = d[ds->n-1]*s[ds->n-1];
904:   } else {
905:     s[ds->l] = PetscRealPart(B[off]);
906:     H[off] = A[off]*s[ds->l];
907:     H[off+ld] = A[off+ld]*s[ds->l];
908:     for (i=ds->l+1;i<ds->n-1;i++) {
909:       s[i] = PetscRealPart(B[i+i*ld]);
910:       H[i+(i-1)*ld] = A[i+(i-1)*ld]*s[i];
911:       H[i+i*ld]     = A[i+i*ld]*s[i];
912:       H[i+(i+1)*ld] = A[i+(i+1)*ld]*s[i];
913:     }
914:     s[ds->n-1] = PetscRealPart(B[ds->n-1+(ds->n-1)*ld]);
915:     H[ds->n-1+(ds->n-2)*ld] = A[ds->n-1+(ds->n-2)*ld]*s[ds->n-1];
916:     H[ds->n-1+(ds->n-1)*ld] = A[ds->n-1+(ds->n-1)*ld]*s[ds->n-1];
917:   }
918:   DSAllocateMat_Private(ds,DS_MAT_X);
919:   X = ds->mat[DS_MAT_X];
920:   for (i=0;i<n1;i++)select[i]=1;
921: #if !defined(PETSC_USE_COMPLEX)
922:   LAPACKhsein_("R","N","N",select,&n1,H+off,&ld,wr+ds->l,wi+ds->l,PETSC_NULL,&ld,X+off,&ld,&n1,&mout,ds->work,PETSC_NULL,infoC,&info);
923: #else
924:   LAPACKhsein_("R","N","N",select,&n1,H+off,&ld,wr+ds->l,PETSC_NULL,&ld,X+off,&ld,&n1,&mout,ds->work,ds->rwork,PETSC_NULL,infoC,&info);
925: #endif
926:   if (info<0)SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in hsein routine %d",-i);
927:   if (info>0) {
928:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Convergence error in hsein routine %d",i);
929:   }

931:   DSEigenVectorsPseudoOrthog(ds, DS_MAT_X, wr, wi,PETSC_TRUE);
932:   return(0);
933: #endif
934: }

938: /*
939:    Reduce to tridiagonal-diagonal pair by means of TridiagDiag_HHR.
940: */
941: PetscErrorCode DSIntermediate_GHIEP(DS ds)
942: {
944:   PetscInt       i,ld,off;
945:   PetscScalar    *A,*B,*Q;
946:   PetscReal      *d,*e,*s;

949:   ld = ds->ld;
950:   A = ds->mat[DS_MAT_A];
951:   B = ds->mat[DS_MAT_B];
952:   Q = ds->mat[DS_MAT_Q];
953:   d = ds->rmat[DS_MAT_T];
954:   e = ds->rmat[DS_MAT_T]+ld;
955:   s = ds->rmat[DS_MAT_D];
956:   off = ds->l+ds->l*ld;
957:   PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
958:   DSAllocateWork_Private(ds,ld*ld,0,0);

960:   for (i=0;i<ds->n;i++) Q[i+i*ld]=1.0;
961:   for (i=0;i<ds->n-ds->l;i++) *(ds->perm+i)=i;
962:   if (ds->compact) {
963:     if (ds->state < DS_STATE_INTERMEDIATE) {
964:       DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
965:       TridiagDiag_HHR(ds->k-ds->l+1,A+off,ld,s+ds->l,Q+off,ld,PETSC_TRUE,d+ds->l,e+ds->l,ds->perm,ds->work,ld*ld);
966:       ds->k = ds->l;
967:       PetscMemzero(d+2*ld+ds->l,(ds->n-ds->l)*sizeof(PetscReal));
968:     }
969:   } else {
970:     if (ds->state < DS_STATE_INTERMEDIATE) {
971:       for (i=0;i<ds->n;i++)
972:         s[i] = PetscRealPart(B[i+i*ld]);
973:       TridiagDiag_HHR(ds->n-ds->l,A+off,ld,s+ds->l,Q+off,ld,PETSC_FALSE,d+ds->l,e+ds->l,ds->perm,ds->work,ld*ld);
974:       PetscMemzero(d+2*ld,(ds->n)*sizeof(PetscReal));
975:       ds->k = ds->l;
976:       DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
977:     } else { DSSwitchFormat_GHIEP(ds,PETSC_TRUE); }
978:   }
979:   return(0);
980: }

984: /*
985:    Undo 2x2 blocks that have real eigenvalues.
986: */
987: PetscErrorCode DSGHIEPRealBlocks(DS ds)
988: {
990:   PetscInt       i;
991:   PetscReal      e,d1,d2,s1,s2,ss1,ss2,t,dd,ss;
992:   PetscReal      maxy,ep,scal1,scal2,snorm;
993:   PetscReal      *T,*D,b[4],M[4],wr1,wr2,wi;
994:   PetscScalar    *A,*B,Y[4],oneS = 1.0,zeroS = 0.0;
995:   PetscBLASInt   m,two=2,ld;
996:   PetscBool      isreal;

999:   ld = PetscBLASIntCast(ds->ld);
1000:   m = PetscBLASIntCast(ds->n-ds->l);
1001:   A = ds->mat[DS_MAT_A];
1002:   B = ds->mat[DS_MAT_B];
1003:   T = ds->rmat[DS_MAT_T];
1004:   D = ds->rmat[DS_MAT_D];
1005:   DSAllocateWork_Private(ds,2*m,0,0);
1006:   for (i=ds->l;i<ds->n-1;i++) {
1007:     e = (ds->compact)?T[ld+i]:PetscRealPart(A[(i+1)+ld*i]);
1008:     if (e != 0.0) { /* 2x2 block */
1009:       if (ds->compact) {
1010:         s1 = D[i];
1011:         d1 = T[i];
1012:         s2 = D[i+1];
1013:         d2 = T[i+1];
1014:       } else {
1015:         s1 = PetscRealPart(B[i*ld+i]);
1016:         d1 = PetscRealPart(A[i*ld+i]);
1017:         s2 = PetscRealPart(B[(i+1)*ld+i+1]);
1018:         d2 = PetscRealPart(A[(i+1)*ld+i+1]);
1019:       }
1020:       isreal = PETSC_FALSE;
1021:       if (s1==s2) { /* apply a Jacobi rotation to compute the eigendecomposition */
1022:         dd = d1-d2;
1023:         if (2*PetscAbsReal(e) <= dd) {
1024:           t = 2*e/dd;
1025:           t = t/(1 + PetscSqrtReal(1+t*t));
1026:         } else {
1027:           t = dd/(2*e);
1028:           ss = (t>=0)?1.0:-1.0;
1029:           t = ss/(PetscAbsReal(t)+PetscSqrtReal(1+t*t));
1030:         }
1031:         Y[0] = 1/PetscSqrtReal(1 + t*t); Y[3] = Y[0]; /* c */
1032:         Y[1] = Y[0]*t; Y[2] = -Y[1]; /* s */
1033:         wr1 = d1+t*e;
1034:         wr2 = d2-t*e;
1035:         ss1 = s1; ss2 = s2;
1036:         isreal = PETSC_TRUE;
1037:       } else {
1038:         ss1 = 1.0; ss2 = 1.0,
1039:         M[0] = d1; M[1] = e; M[2] = e; M[3]= d2;
1040:         b[0] = s1; b[1] = 0.0; b[2] = 0.0; b[3] = s2;
1041:         ep = LAPACKlamch_("S");
1042:         /* Compute eigenvalues of the block */
1043:         LAPACKlag2_(M, &two, b, &two,&ep , &scal1, &scal2, &wr1, &wr2, &wi);
1044:         if (wi==0.0) { /* Real eigenvalues */
1045:           isreal = PETSC_TRUE;
1046:           if (scal1<ep||scal2<ep) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FP,"Nearly infinite eigenvalue");
1047:           wr1 /= scal1; wr2 /= scal2;
1048:           if ( PetscAbsReal(s1*d1-wr1)<PetscAbsReal(s2*d2-wr1)) { Y[0] = wr1-s2*d2; Y[1] =s2*e;}
1049:           else{ Y[0] = s1*e; Y[1] = wr1-s1*d1; }
1050:           /* normalize with a signature*/
1051:           maxy = PetscMax(PetscAbsScalar(Y[0]),PetscAbsScalar(Y[1]));
1052:           scal1 = PetscRealPart(Y[0])/maxy; scal2 = PetscRealPart(Y[1])/maxy;
1053:           snorm = scal1*scal1*s1 + scal2*scal2*s2;
1054:           if (snorm<0) {ss1 = -1.0; snorm = -snorm;}
1055:           snorm = maxy*PetscSqrtReal(snorm); Y[0] = Y[0]/snorm; Y[1] = Y[1]/snorm;
1056:           if ( PetscAbsReal(s1*d1-wr2)<PetscAbsReal(s2*d2-wr2)) { Y[2] = wr2-s2*d2; Y[3] =s2*e;}
1057:           else{ Y[2] = s1*e; Y[3] = wr2-s1*d1; }
1058:           maxy = PetscMax(PetscAbsScalar(Y[2]),PetscAbsScalar(Y[3]));
1059:           scal1 = PetscRealPart(Y[2])/maxy; scal2 = PetscRealPart(Y[3])/maxy;
1060:           snorm = scal1*scal1*s1 + scal2*scal2*s2;
1061:           if (snorm<0) {ss2 = -1.0; snorm = -snorm;}
1062:           snorm = maxy*PetscSqrtReal(snorm);Y[2] = Y[2]/snorm; Y[3] = Y[3]/snorm;
1063:         }
1064:         wr1 *= ss1; wr2 *= ss2;
1065:       }
1066:       if (isreal) {
1067:         if (ds->compact) {
1068:           D[i] = ss1;;
1069:           T[i] = wr1;
1070:           D[i+1] = ss2;
1071:           T[i+1] = wr2;
1072:           T[ld+i] = 0.0;
1073:         }else {
1074:           B[i*ld+i] = ss1;
1075:           A[i*ld+i] = wr1;
1076:           B[(i+1)*ld+i+1] = ss2;
1077:           A[(i+1)*ld+i+1] = wr2;
1078:           A[(i+1)+ld*i] = 0.0;
1079:           A[i+ld*(i+1)] = 0.0;
1080:         }
1081:         BLASgemm_("N","N",&m,&two,&two,&oneS,ds->mat[DS_MAT_Q]+ds->l+i*ld,&ld,Y,&two,&zeroS,ds->work,&m);
1082:         PetscMemcpy(ds->mat[DS_MAT_Q]+ds->l+i*ld,ds->work,m*sizeof(PetscScalar));
1083:         PetscMemcpy(ds->mat[DS_MAT_Q]+ds->l+(i+1)*ld,ds->work+m,m*sizeof(PetscScalar));
1084:       }
1085:       i++;
1086:     }
1087:   }
1088:   return(0);
1089: }

1093: PetscErrorCode DSSolve_GHIEP_QR_II(DS ds,PetscScalar *wr,PetscScalar *wi)
1094: {
1095: #if defined(PETSC_MISSING_LAPACK_HSEQR)
1097:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"HSEQR - Lapack routine is unavailable");
1098: #else
1100:   PetscInt       i,off;
1101:   PetscBLASInt   n1,ld,one,info,lwork;
1102:   PetscScalar    *H,*A,*B,*Q,*work;
1103:   PetscReal      *d,*e,*s;

1106:   one = 1;
1107:   n1 = PetscBLASIntCast(ds->n - ds->l);
1108:   ld = PetscBLASIntCast(ds->ld);
1109:   off = ds->l + ds->l*ld;
1110:   A = ds->mat[DS_MAT_A];
1111:   B = ds->mat[DS_MAT_B];
1112:   Q = ds->mat[DS_MAT_Q];
1113:   d = ds->rmat[DS_MAT_T];
1114:   e = ds->rmat[DS_MAT_T] + ld;
1115:   s = ds->rmat[DS_MAT_D];
1116:   DSAllocateWork_Private(ds,ld*ld,2*ld,ld*2);
1117:   work = ds->work;
1118:   lwork = ld*ld;

1120:   /* Quick return if possible */
1121:   if (n1 == 1) {
1122:     *(Q+off) = 1;
1123:     if (ds->compact) {
1124:       wr[ds->l] = d[ds->l]/s[ds->l];
1125:       wi[ds->l] = 0.0;
1126:     } else {
1127:       d[ds->l] = PetscRealPart(A[off]);
1128:       s[ds->l] = PetscRealPart(B[off]);
1129:       wr[ds->l] = d[ds->l]/s[ds->l];
1130:       wi[ds->l] = 0.0;
1131:     }
1132:     return(0);
1133:   }
1134:   /* Reduce to pseudotriadiagonal form */
1135:   DSIntermediate_GHIEP( ds);

1137:   /* Compute Eigenvalues (QR)*/
1138:   DSAllocateMat_Private(ds,DS_MAT_W);
1139:   H = ds->mat[DS_MAT_W];
1140:   if (ds->compact) {
1141:     H[off] = d[ds->l]*s[ds->l];
1142:     H[off+ld] = e[ds->l]*s[ds->l];
1143:     for (i=ds->l+1;i<ds->n-1;i++) {
1144:       H[i+(i-1)*ld] = e[i-1]*s[i];
1145:       H[i+i*ld]     = d[i]*s[i];
1146:       H[i+(i+1)*ld] = e[i]*s[i];
1147:     }
1148:     H[ds->n-1+(ds->n-2)*ld] = e[ds->n-2]*s[ds->n-1];
1149:     H[ds->n-1+(ds->n-1)*ld] = d[ds->n-1]*s[ds->n-1];
1150:   } else {
1151:     s[ds->l] = PetscRealPart(B[off]);
1152:     H[off] = A[off]*s[ds->l];
1153:     H[off+ld] = A[off+ld]*s[ds->l];
1154:     for (i=ds->l+1;i<ds->n-1;i++) {
1155:       s[i] = PetscRealPart(B[i+i*ld]);
1156:       H[i+(i-1)*ld] = A[i+(i-1)*ld]*s[i];
1157:       H[i+i*ld]     = A[i+i*ld]*s[i];
1158:       H[i+(i+1)*ld] = A[i+(i+1)*ld]*s[i];
1159:     }
1160:     s[ds->n-1] = PetscRealPart(B[ds->n-1+(ds->n-1)*ld]);
1161:     H[ds->n-1+(ds->n-2)*ld] = A[ds->n-1+(ds->n-2)*ld]*s[ds->n-1];
1162:     H[ds->n-1+(ds->n-1)*ld] = A[ds->n-1+(ds->n-1)*ld]*s[ds->n-1];
1163:   }

1165: #if !defined(PETSC_USE_COMPLEX)
1166:   LAPACKhseqr_("E","N",&n1,&one,&n1,H+off,&ld,wr+ds->l,wi+ds->l,PETSC_NULL,&ld,work,&lwork,&info);
1167: #else
1168:   LAPACKhseqr_("E","N",&n1,&one,&n1,H+off,&ld,wr,PETSC_NULL,&ld,work,&lwork,&info);
1169: #endif
1170:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xHSEQR %d",&info);

1172:   /* Compute Eigenvectors with Inverse Iteration */
1173:   DSGHIEPPseudoOrthogInverseIteration(ds,wr,wi);

1175:   /* Recover eigenvalues from diagonal */
1176:   DSGHIEPComplexEigs(ds, 0, ds->l, wr, wi);
1177:   return(0);
1178: #endif
1179: }

1183: PetscErrorCode DSSolve_GHIEP_QR(DS ds,PetscScalar *wr,PetscScalar *wi)
1184: {
1185: #if defined(SLEPC_MISSING_LAPACK_GEHRD) || defined(SLEPC_MISSING_LAPACK_ORGHR) || defined(PETSC_MISSING_LAPACK_HSEQR)
1187:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEHRD/ORGHR/HSEQR - Lapack routines are unavailable");
1188: #else
1190:   PetscInt       i,j,off;
1191:   PetscBLASInt   lwork,info,n1,one=1,mout,ld;
1192:   PetscScalar    *A,*B,*H,*Q,*work,*tau;
1193:   PetscReal      *d,*e,*s;

1196:   n1 = PetscBLASIntCast(ds->n - ds->l);
1197:   ld = PetscBLASIntCast(ds->ld);
1198:   off = ds->l + ds->l*ld;
1199:   A = ds->mat[DS_MAT_A];
1200:   B = ds->mat[DS_MAT_B];
1201:   Q = ds->mat[DS_MAT_Q];
1202:   d = ds->rmat[DS_MAT_T];
1203:   e = ds->rmat[DS_MAT_T] + ld;
1204:   s = ds->rmat[DS_MAT_D];
1205:   DSAllocateMat_Private(ds,DS_MAT_W);
1206:   H = ds->mat[DS_MAT_W];
1207:   DSAllocateWork_Private(ds,ld+ld*ld,ld,0);
1208:   tau  = ds->work;
1209:   work = ds->work+ld;
1210:   lwork = ld*ld;

1212:    /* initialize orthogonal matrix */
1213:   PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
1214:   for (i=0;i< ds->n;i++)
1215:     Q[i+i*ld] = 1.0;
1216:   /* quick return */
1217:   if (n1 == 1) {
1218:     if (ds->compact) {
1219:       wr[ds->l] = d[ds->l]/s[ds->l];
1220:       wi[ds->l] = 0.0;
1221:     } else {
1222:       d[ds->l] = PetscRealPart(A[off]);
1223:       s[ds->l] = PetscRealPart(B[off]);
1224:       wr[ds->l] = d[ds->l]/s[ds->l];
1225:       wi[ds->l] = 0.0;
1226:     }
1227:     return(0);
1228:   }

1230:   /* form standard problem in H */
1231:   if (ds->compact) {
1232:     PetscMemzero(H,ld*ld*sizeof(PetscScalar));
1233:     for (i=ds->l; i < ds->n-1; i++) {
1234:       H[i+i*ld] = d[i]/s[i];
1235:       H[(i+1)+i*ld] = e[i]/s[i+1];
1236:       H[i+(i+1)*ld] = e[i]/s[i];
1237:     }
1238:     H[ds->n-1 + (ds->n-1)*ld] = d[ds->n-1]/s[ds->n-1];

1240:     for (i=ds->l; i < ds->k; i++) {
1241:       H[ds->k+i*ld] = *(ds->rmat[DS_MAT_T]+2*ld+i)/s[ds->k];
1242:       H[i+ds->k*ld] = *(ds->rmat[DS_MAT_T]+2*ld+i)/s[i];
1243:     }
1244:   } else {
1245:     for (j=ds->l; j<ds->n; j++) {
1246:       for (i=ds->l; i<ds->n; i++) {
1247:         H[i+j*ld] = A[i+j*ld]/B[i+i*ld];
1248:       }
1249:     }
1250:   }
1251:   /* reduce to upper Hessenberg form */
1252:   if (ds->state<DS_STATE_INTERMEDIATE) {
1253:     LAPACKgehrd_(&n1,&one,&n1,H+off,&ld,tau,work,&lwork,&info);
1254:     if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEHRD %d",&info);
1255:     for (j=ds->l;j<ds->n-1;j++) {
1256:       for (i=j+2;i<ds->n;i++) {
1257:         Q[i+j*ld] = H[i+j*ld];
1258:         H[i+j*ld] = 0.0;
1259:       }
1260:     }
1261:     LAPACKorghr_(&n1,&one,&n1,Q+off,&ld,tau,work,&lwork,&info);
1262:     if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGHR %d",&info);
1263:   }

1265:   /* Compute the real Schur form */
1266: #if !defined(PETSC_USE_COMPLEX)
1267:   LAPACKhseqr_("S","V",&n1,&one,&n1,H+off,&ld,wr+ds->l,wi+ds->l,Q+off,&ld,work,&lwork,&info);
1268: #else
1269:   LAPACKhseqr_("S","V",&n1,&one,&n1,H+off,&ld,wr,Q+off,&ld,work,&lwork,&info);
1270: #endif
1271:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xHSEQR %d",&info);
1272: 
1273:   /* Compute eigenvectors */
1274: #if !defined(PETSC_USE_COMPLEX)
1275:   LAPACKtrevc_("R","B",PETSC_NULL,&n1,H+off,&ld,PETSC_NULL,&ld,Q+off,&ld,&n1,&mout,ds->work,&info);
1276: #else
1277:   LAPACKtrevc_("R","B",PETSC_NULL,&n1,H+off,&ld,PETSC_NULL,&ld,Q+off,&ld,&n1,&mout,work,ds->rwork,&info);
1278: #endif
1279:   if (info) SETERRQ1(((PetscObject)ds)->comm,PETSC_ERR_LIB,"Error in Lapack xTREVC %i",&info);

1281:   /* Compute real s-orthonormal basis */
1282:   DSEigenVectorsPseudoOrthog(ds, DS_MAT_Q, wr, wi,PETSC_FALSE);

1284:   /* Undo from diagonal the blocks whith real eigenvalues*/
1285:   DSGHIEPRealBlocks(ds);

1287:   /* Recover eigenvalues from diagonal */
1288:   DSGHIEPComplexEigs(ds, 0, ds->l, wr, wi);
1289:   return(0);
1290: #endif
1291: }

1295: PetscErrorCode DSNormalize_GHIEP(DS ds,DSMatType mat,PetscInt col)
1296: {
1298:   PetscInt       i,i0,i1;
1299:   PetscBLASInt   ld,n,one = 1;
1300:   PetscScalar    *A = ds->mat[DS_MAT_A],norm,*x;
1301: #if !defined(PETSC_USE_COMPLEX)
1302:   PetscScalar    norm0;
1303: #endif

1306:   switch (mat) {
1307:     case DS_MAT_X:
1308:     case DS_MAT_Y:
1309:     case DS_MAT_Q:
1310:       /* Supported matrices */
1311:       break;
1312:     case DS_MAT_U:
1313:     case DS_MAT_VT:
1314:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
1315:       break;
1316:     default:
1317:       SETERRQ(((PetscObject)ds)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
1318:   }

1320:   n  = PetscBLASIntCast(ds->n);
1321:   ld = PetscBLASIntCast(ds->ld);
1322:   DSGetArray(ds,mat,&x);
1323:   if (col < 0) {
1324:     i0 = 0; i1 = ds->n;
1325:   } else if (col>0 && A[ds->ld*(col-1)+col] != 0.0) {
1326:     i0 = col-1; i1 = col+1;
1327:   } else {
1328:     i0 = col; i1 = col+1;
1329:   }
1330:   for (i=i0; i<i1; i++) {
1331: #if !defined(PETSC_USE_COMPLEX)
1332:     if (i<n-1 && A[ds->ld*i+i+1] != 0.0) {
1333:       norm = BLASnrm2_(&n,&x[ld*i],&one);
1334:       norm0 = BLASnrm2_(&n,&x[ld*(i+1)],&one);
1335:       norm = 1.0/SlepcAbsEigenvalue(norm,norm0);
1336:       BLASscal_(&n,&norm,&x[ld*i],&one);
1337:       BLASscal_(&n,&norm,&x[ld*(i+1)],&one);
1338:       i++;
1339:     } else
1340: #endif
1341:     {
1342:       norm = BLASnrm2_(&n,&x[ld*i],&one);
1343:       norm = 1.0/norm;
1344:       BLASscal_(&n,&norm,&x[ld*i],&one);
1345:      }
1346:   }
1347:   return(0);
1348: }

1350: extern PetscErrorCode DSSolve_GHIEP_HZ(DS,PetscScalar*,PetscScalar*);
1351: extern PetscErrorCode DSSolve_GHIEP_DQDS_II(DS,PetscScalar*,PetscScalar*);

1353: EXTERN_C_BEGIN
1356: PetscErrorCode DSCreate_GHIEP(DS ds)
1357: {
1359:   ds->ops->allocate      = DSAllocate_GHIEP;
1360:   ds->ops->view          = DSView_GHIEP;
1361:   ds->ops->vectors       = DSVectors_GHIEP;
1362:   ds->ops->solve[0]      = DSSolve_GHIEP_HZ;
1363:   ds->ops->solve[1]      = DSSolve_GHIEP_QR_II;
1364:   ds->ops->solve[2]      = DSSolve_GHIEP_QR;
1365:   ds->ops->solve[3]      = DSSolve_GHIEP_DQDS_II;
1366:   ds->ops->sort          = DSSort_GHIEP;
1367:   ds->ops->normalize     = DSNormalize_GHIEP;
1368:   return(0);
1369: }
1370: EXTERN_C_END