Actual source code: arnoldi.c

  1: /*                       

  3:    SLEPc eigensolver: "arnoldi"

  5:    Method: Explicitly Restarted Arnoldi

  7:    Algorithm:

  9:        Arnoldi method with explicit restart and deflation.

 11:    References:

 13:        [1] "Arnoldi Methods in SLEPc", SLEPc Technical Report STR-4, 
 14:            available at http://www.grycap.upv.es/slepc.

 16:    Last update: Oct 2006

 18:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 19:       SLEPc - Scalable Library for Eigenvalue Problem Computations
 20:       Copyright (c) 2002-2007, Universidad Politecnica de Valencia, Spain

 22:       This file is part of SLEPc. See the README file for conditions of use
 23:       and additional information.
 24:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 25: */

 27:  #include src/eps/epsimpl.h
 28:  #include slepcblaslapack.h

 30: typedef struct {
 31:   PetscTruth delayed;
 32: } EPS_ARNOLDI;

 36: PetscErrorCode EPSSetUp_ARNOLDI(EPS eps)
 37: {
 39:   PetscInt       N;

 42:   VecGetSize(eps->vec_initial,&N);
 43:   if (eps->ncv) {
 44:     if (eps->ncv<eps->nev) SETERRQ(1,"The value of ncv must be at least nev");
 45:   }
 46:   else eps->ncv = PetscMin(N,PetscMax(2*eps->nev,eps->nev+15));
 47:   if (!eps->max_it) eps->max_it = PetscMax(100,2*N/eps->ncv);
 48:   if (eps->ishermitian && (eps->which==EPS_LARGEST_IMAGINARY || eps->which==EPS_SMALLEST_IMAGINARY))
 49:     SETERRQ(1,"Wrong value of eps->which");

 51:   EPSAllocateSolution(eps);
 52:   PetscFree(eps->T);
 53:   PetscMalloc(eps->ncv*eps->ncv*sizeof(PetscScalar),&eps->T);
 54:   if (eps->solverclass==EPS_TWO_SIDE) {
 55:     PetscFree(eps->Tl);
 56:     PetscMalloc(eps->ncv*eps->ncv*sizeof(PetscScalar),&eps->Tl);
 57:   }
 58:   EPSDefaultGetWork(eps,2);
 59:   return(0);
 60: }

 64: /*
 65:    EPSBasicArnoldi - Computes an m-step Arnoldi factorization. The first k
 66:    columns are assumed to be locked and therefore they are not modified. On
 67:    exit, the following relation is satisfied:

 69:                     OP * V - V * H = f * e_m^T

 71:    where the columns of V are the Arnoldi vectors (which are B-orthonormal),
 72:    H is an upper Hessenberg matrix, f is the residual vector and e_m is
 73:    the m-th vector of the canonical basis. The vector f is B-orthogonal to
 74:    the columns of V. On exit, beta contains the B-norm of f and the next 
 75:    Arnoldi vector can be computed as v_{m+1} = f / beta. 
 76: */
 77: PetscErrorCode EPSBasicArnoldi(EPS eps,PetscTruth trans,PetscScalar *H,Vec *V,int k,int *M,Vec f,PetscReal *beta,PetscTruth *breakdown)
 78: {
 80:   int            j,m = *M;
 81:   PetscReal      norm;

 84:   for (j=k;j<m-1;j++) {
 85:     if (trans) { STApplyTranspose(eps->OP,V[j],V[j+1]); }
 86:     else { STApply(eps->OP,V[j],V[j+1]); }
 87:     IPOrthogonalize(eps->ip,eps->nds,PETSC_NULL,eps->DS,V[j+1],PETSC_NULL,PETSC_NULL,PETSC_NULL,eps->work[0]);
 88:     IPOrthogonalize(eps->ip,j+1,PETSC_NULL,V,V[j+1],H+m*j,&norm,breakdown,eps->work[0]);
 89:     H[(m+1)*j+1] = norm;
 90:     if (*breakdown) {
 91:       *M = j+1;
 92:       *beta = norm;
 93:       return(0);
 94:     } else {
 95:       VecScale(V[j+1],1/norm);
 96:     }
 97:   }
 98:   STApply(eps->OP,V[m-1],f);
 99:   IPOrthogonalize(eps->ip,eps->nds,PETSC_NULL,eps->DS,f,PETSC_NULL,PETSC_NULL,PETSC_NULL,eps->work[0]);
100:   IPOrthogonalize(eps->ip,m,PETSC_NULL,V,f,H+m*(m-1),beta,PETSC_NULL,eps->work[0]);
101:   return(0);
102: }

106: /*
107:    EPSDelayedArnoldi - This function is equivalent to EPSBasicArnoldi but
108:    performs the computation in a different way. The main idea is that
109:    reorthogonalization is delayed to the next Arnoldi step. This version is
110:    more scalable but in some cases convergence may stagnate.
111: */
112: PetscErrorCode EPSDelayedArnoldi(EPS eps,PetscScalar *H,Vec *V,int k,int *M,Vec f,PetscReal *beta,PetscTruth *breakdown)
113: {
115:   int            i,j,m=*M;
116:   Vec            w,u,t;
117:   PetscScalar    shh[100],*lhh,dot,dot2;
118:   PetscReal      norm1=0.0,norm2;

121:   if (m<=100) lhh = shh;
122:   else { PetscMalloc(m*sizeof(PetscScalar),&lhh); }
123:   VecDuplicate(f,&w);
124:   VecDuplicate(f,&u);
125:   VecDuplicate(f,&t);

127:   for (j=k;j<m;j++) {
128:     STApply(eps->OP,V[j],f);
129:     IPOrthogonalize(eps->ip,eps->nds,PETSC_NULL,eps->DS,f,PETSC_NULL,PETSC_NULL,PETSC_NULL,eps->work[0]);

131:     IPMInnerProductBegin(eps->ip,f,j+1,V,H+m*j);
132:     if (j>k) {
133:       IPMInnerProductBegin(eps->ip,V[j],j,V,lhh);
134:       IPInnerProductBegin(eps->ip,V[j],V[j],&dot);
135:     }
136:     if (j>k+1) {
137:       IPNormBegin(eps->ip,u,&norm2);
138:       VecDotBegin(u,V[j-2],&dot2);
139:     }
140: 
141:     IPMInnerProductEnd(eps->ip,f,j+1,V,H+m*j);
142:     if (j>k) {
143:       IPMInnerProductEnd(eps->ip,V[j],j,V,lhh);
144:       IPInnerProductEnd(eps->ip,V[j],V[j],&dot);
145:     }
146:     if (j>k+1) {
147:       IPNormEnd(eps->ip,u,&norm2);
148:       VecDotEnd(u,V[j-2],&dot2);
149:       if (PetscAbsScalar(dot2/norm2) > PETSC_MACHINE_EPSILON) {
150:         *breakdown = PETSC_TRUE;
151:         *M = j-1;
152:         *beta = norm2;

154:         if (m>100) { PetscFree(lhh); }
155:         VecDestroy(w);
156:         VecDestroy(u);
157:         VecDestroy(t);
158:         return(0);
159:       }
160:     }
161: 
162:     if (j>k) {
163:       norm1 = sqrt(PetscRealPart(dot));
164:       for (i=0;i<j;i++)
165:         H[m*j+i] = H[m*j+i]/norm1;
166:       H[m*j+j] = H[m*j+j]/dot;
167: 
168:       VecCopy(V[j],t);
169:       VecScale(V[j],1.0/norm1);
170:       VecScale(f,1.0/norm1);
171:     }

173:     VecSet(w,0.0);
174:     VecMAXPY(w,j+1,H+m*j,V);
175:     VecAXPY(f,-1.0,w);

177:     if (j>k) {
178:       VecSet(w,0.0);
179:       VecMAXPY(w,j,lhh,V);
180:       VecAXPY(t,-1.0,w);
181:       for (i=0;i<j;i++)
182:         H[m*(j-1)+i] += lhh[i];
183:     }

185:     if (j>k+1) {
186:       VecCopy(u,V[j-1]);
187:       VecScale(V[j-1],1.0/norm2);
188:       H[m*(j-2)+j-1] = norm2;
189:     }

191:     if (j<m-1) {
192:       VecCopy(f,V[j+1]);
193:       VecCopy(t,u);
194:     }
195:   }

197:   IPNorm(eps->ip,t,&norm2);
198:   VecScale(t,1.0/norm2);
199:   VecCopy(t,V[m-1]);
200:   H[m*(m-2)+m-1] = norm2;

202:   IPMInnerProduct(eps->ip,f,m,V,lhh);
203: 
204:   VecSet(w,0.0);
205:   VecMAXPY(w,m,lhh,V);
206:   VecAXPY(f,-1.0,w);
207:   for (i=0;i<m;i++)
208:     H[m*(m-1)+i] += lhh[i];

210:   IPNorm(eps->ip,f,beta);
211:   VecScale(f,1.0 / *beta);
212:   *breakdown = PETSC_FALSE;
213: 
214:   if (m>100) { PetscFree(lhh); }
215:   VecDestroy(w);
216:   VecDestroy(u);
217:   VecDestroy(t);

219:   return(0);
220: }

224: /*
225:    EPSDelayedArnoldi1 - This function is similar to EPSDelayedArnoldi1,
226:    but without reorthogonalization (only delayed normalization).
227: */
228: PetscErrorCode EPSDelayedArnoldi1(EPS eps,PetscScalar *H,Vec *V,int k,int *M,Vec f,PetscReal *beta,PetscTruth *breakdown)
229: {
231:   int            i,j,m=*M;
232:   Vec            w;
233:   PetscScalar    dot;
234:   PetscReal      norm=0.0;

237:   VecDuplicate(f,&w);

239:   for (j=k;j<m;j++) {
240:     STApply(eps->OP,V[j],f);
241:     IPOrthogonalize(eps->ip,eps->nds,PETSC_NULL,eps->DS,f,PETSC_NULL,PETSC_NULL,PETSC_NULL,eps->work[0]);

243:     IPMInnerProductBegin(eps->ip,f,j+1,V,H+m*j);
244:     if (j>k) {
245:       IPInnerProductBegin(eps->ip,V[j],V[j],&dot);
246:     }
247: 
248:     IPMInnerProductEnd(eps->ip,f,j+1,V,H+m*j);
249:     if (j>k) {
250:       IPInnerProductEnd(eps->ip,V[j],V[j],&dot);
251:     }
252: 
253:     if (j>k) {
254:       norm = sqrt(PetscRealPart(dot));
255:       VecScale(V[j],1.0/norm);
256:       H[m*(j-1)+j] = norm;

258:       for (i=0;i<j;i++)
259:         H[m*j+i] = H[m*j+i]/norm;
260:       H[m*j+j] = H[m*j+j]/dot;
261:       VecScale(f,1.0/norm);
262:     }

264:     VecSet(w,0.0);
265:     VecMAXPY(w,j+1,H+m*j,V);
266:     VecAXPY(f,-1.0,w);

268:     if (j<m-1) {
269:       VecCopy(f,V[j+1]);
270:     }
271:   }

273:   IPNorm(eps->ip,f,beta);
274:   VecScale(f,1.0 / *beta);
275:   *breakdown = PETSC_FALSE;
276: 
277:   VecDestroy(w);
278:   return(0);
279: }

283: /*
284:    EPSArnoldiResiduals - Computes the 2-norm of the residual vectors from
285:    the information provided by the m-step Arnoldi factorization,

287:                     OP * V - V * H = f * e_m^T

289:    For the approximate eigenpair (k_i,V*y_i), the residual norm is computed as
290:    |beta*y(end,i)| where beta is the norm of f and y is the corresponding 
291:    eigenvector of H.
292: */
293: PetscErrorCode ArnoldiResiduals(PetscScalar *H,int ldh,PetscScalar *U,PetscReal beta,int nconv,int ncv,PetscScalar *eigr,PetscScalar *eigi,PetscReal *errest,PetscScalar *work)
294: {
295: #if defined(SLEPC_MISSING_LAPACK_TREVC)
297:   SETERRQ(PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable.");
298: #else
300:   int            i,mout,info;
301:   PetscScalar    *Y=work+4*ncv;
302:   PetscReal      w;
303: #if defined(PETSC_USE_COMPLEX)
304:   PetscReal      *rwork=(PetscReal*)(work+3*ncv);
305: #endif


309:   /* Compute eigenvectors Y of H */
310:   PetscMemcpy(Y,U,ncv*ncv*sizeof(PetscScalar));
311:   PetscLogEventBegin(EPS_Dense,0,0,0,0);
312: #if !defined(PETSC_USE_COMPLEX)
313:   LAPACKtrevc_("R","B",PETSC_NULL,&ncv,H,&ldh,PETSC_NULL,&ncv,Y,&ncv,&ncv,&mout,work,&info,1,1);
314: #else
315:   LAPACKtrevc_("R","B",PETSC_NULL,&ncv,H,&ldh,PETSC_NULL,&ncv,Y,&ncv,&ncv,&mout,work,rwork,&info,1,1);
316: #endif
317:   PetscLogEventEnd(EPS_Dense,0,0,0,0);
318:   if (info) SETERRQ1(PETSC_ERR_LIB,"Error in Lapack xTREVC %i",info);

320:   /* Compute residual norm estimates as beta*abs(Y(m,:)) */
321:   for (i=nconv;i<ncv;i++) {
322: #if !defined(PETSC_USE_COMPLEX)
323:     if (eigi[i] != 0 && i<ncv-1) {
324:       errest[i] = beta*SlepcAbsEigenvalue(Y[i*ncv+ncv-1],Y[(i+1)*ncv+ncv-1]);
325:       w = SlepcAbsEigenvalue(eigr[i],eigi[i]);
326:       if (w > errest[i])
327:         errest[i] = errest[i] / w;
328:       errest[i+1] = errest[i];
329:       i++;
330:     } else
331: #endif
332:     {
333:       errest[i] = beta*PetscAbsScalar(Y[i*ncv+ncv-1]);
334:       w = PetscAbsScalar(eigr[i]);
335:       if (w > errest[i])
336:         errest[i] = errest[i] / w;
337:     }
338:   }
339:   return(0);
340: #endif
341: }

345: PetscErrorCode EPSSolve_ARNOLDI(EPS eps)
346: {
348:   int            i,k;
349:   Vec            f=eps->work[1];
350:   PetscScalar    *H=eps->T,*U,*work;
351:   PetscReal      beta;
352:   PetscTruth     breakdown;
353:   IPOrthogonalizationRefinementType orthog_ref;
354:   EPS_ARNOLDI    *arnoldi = (EPS_ARNOLDI *)eps->data;

357:   PetscMemzero(eps->T,eps->ncv*eps->ncv*sizeof(PetscScalar));
358:   PetscMalloc(eps->ncv*eps->ncv*sizeof(PetscScalar),&U);
359:   PetscMalloc((eps->ncv+4)*eps->ncv*sizeof(PetscScalar),&work);
360: 
361:   IPGetOrthogonalization(eps->ip,PETSC_NULL,&orthog_ref,PETSC_NULL);

363:   /* Get the starting Arnoldi vector */
364:   EPSGetStartVector(eps,0,eps->V[0],PETSC_NULL);
365: 
366:   /* Restart loop */
367:   while (eps->reason == EPS_CONVERGED_ITERATING) {
368:     eps->its++;

370:     /* Compute an nv-step Arnoldi factorization */
371:     eps->nv = eps->ncv;
372:     if (!arnoldi->delayed) {
373:       EPSBasicArnoldi(eps,PETSC_FALSE,H,eps->V,eps->nconv,&eps->nv,f,&beta,&breakdown);
374:     } else if (orthog_ref == IP_ORTH_REFINE_NEVER) {
375:       EPSDelayedArnoldi1(eps,H,eps->V,eps->nconv,&eps->nv,f,&beta,&breakdown);
376:     } else {
377:       EPSDelayedArnoldi(eps,H,eps->V,eps->nconv,&eps->nv,f,&beta,&breakdown);
378:     }

380:     /* Reduce H to (quasi-)triangular form, H <- U H U' */
381:     PetscMemzero(U,eps->nv*eps->nv*sizeof(PetscScalar));
382:     for (i=0;i<eps->nv;i++) { U[i*(eps->nv+1)] = 1.0; }
383:     EPSDenseSchur(eps->nv,eps->nconv,H,eps->ncv,U,eps->eigr,eps->eigi);

385:     /* Sort the remaining columns of the Schur form */
386:     EPSSortDenseSchur(eps->nv,eps->nconv,H,eps->ncv,U,eps->eigr,eps->eigi,eps->which);

388:     /* Compute residual norm estimates */
389:     ArnoldiResiduals(H,eps->ncv,U,beta,eps->nconv,eps->nv,eps->eigr,eps->eigi,eps->errest,work);
390: 
391:     /* Lock converged eigenpairs and update the corresponding vectors,
392:        including the restart vector: V(:,idx) = V*U(:,idx) */
393:     k = eps->nconv;
394:     while (k<eps->nv && eps->errest[k]<eps->tol) k++;
395:     for (i=eps->nconv;i<=k && i<eps->nv;i++) {
396:       VecSet(eps->AV[i],0.0);
397:       VecMAXPY(eps->AV[i],eps->nv,U+eps->nv*i,eps->V);
398:     }
399:     for (i=eps->nconv;i<=k && i<eps->nv;i++) {
400:       VecCopy(eps->AV[i],eps->V[i]);
401:     }
402:     eps->nconv = k;

404:     EPSMonitor(eps,eps->its,eps->nconv,eps->eigr,eps->eigi,eps->errest,eps->nv);
405:     if (breakdown) {
406:       PetscInfo2(eps,"Breakdown in Arnoldi method (it=%i norm=%g)\n",eps->its,beta);
407:       EPSGetStartVector(eps,k,eps->V[k],&breakdown);
408:       if (breakdown) {
409:         eps->reason = EPS_DIVERGED_BREAKDOWN;
410:         PetscInfo(eps,"Unable to generate more start vectors\n");
411:       }
412:     }
413:     if (eps->its >= eps->max_it) eps->reason = EPS_DIVERGED_ITS;
414:     if (eps->nconv >= eps->nev) eps->reason = EPS_CONVERGED_TOL;
415:   }
416: 
417:   PetscFree(U);
418:   PetscFree(work);
419:   return(0);
420: }

424: PetscErrorCode EPSSetFromOptions_ARNOLDI(EPS eps)
425: {
427:   EPS_ARNOLDI    *arnoldi = (EPS_ARNOLDI *)eps->data;

430:   PetscOptionsHead("ARNOLDI options");
431:   PetscOptionsTruth("-eps_arnoldi_delayed","Arnoldi with delayed reorthogonalization","EPSArnoldiSetDelayed",PETSC_FALSE,&arnoldi->delayed,PETSC_NULL);
432:   PetscOptionsTail();
433:   return(0);
434: }

439: PetscErrorCode EPSArnoldiSetDelayed_ARNOLDI(EPS eps,PetscTruth delayed)
440: {
441:   EPS_ARNOLDI    *arnoldi = (EPS_ARNOLDI *)eps->data;

444:   arnoldi->delayed = delayed;
445:   return(0);
446: }

451: /*@
452:    EPSArnoldiSetDelayed - Activates or deactivates delayed reorthogonalization 
453:    in the Arnoldi iteration. 

455:    Collective on EPS

457:    Input Parameters:
458: +  eps - the eigenproblem solver context
459: -  delayed - boolean flag

461:    Options Database Key:
462: .  -eps_arnoldi_delayed - Activates delayed reorthogonalization in Arnoldi
463:    
464:    Note:
465:    Delayed reorthogonalization is an aggressive optimization for the Arnoldi
466:    eigensolver than may provide better scalability, but sometimes makes the
467:    solver converge less than the default algorithm.

469:    Level: advanced

471: .seealso: EPSArnoldiGetDelayed()
472: @*/
473: PetscErrorCode EPSArnoldiSetDelayed(EPS eps,PetscTruth delayed)
474: {
475:   PetscErrorCode ierr, (*f)(EPS,PetscTruth);

479:   PetscObjectQueryFunction((PetscObject)eps,"EPSArnoldiSetDelayed_C",(void (**)())&f);
480:   if (f) {
481:     (*f)(eps,delayed);
482:   }
483:   return(0);
484: }

489: PetscErrorCode EPSArnoldiGetDelayed_ARNOLDI(EPS eps,PetscTruth *delayed)
490: {
491:   EPS_ARNOLDI    *arnoldi = (EPS_ARNOLDI *)eps->data;

494:   *delayed = arnoldi->delayed;
495:   return(0);
496: }

501: /*@C
502:    EPSArnoldiGetDelayed - Gets the type of reorthogonalization used during the Arnoldi
503:    iteration. 

505:    Collective on EPS

507:    Input Parameter:
508: .  eps - the eigenproblem solver context

510:    Input Parameter:
511: .  delayed - boolean flag indicating if delayed reorthogonalization has been enabled

513:    Level: advanced

515: .seealso: EPSArnoldiSetDelayed()
516: @*/
517: PetscErrorCode EPSArnoldiGetDelayed(EPS eps,PetscTruth *delayed)
518: {
519:   PetscErrorCode ierr, (*f)(EPS,PetscTruth*);

523:   PetscObjectQueryFunction((PetscObject)eps,"EPSArnoldiGetDelayed_C",(void (**)())&f);
524:   if (f) {
525:     (*f)(eps,delayed);
526:   }
527:   return(0);
528: }

532: PetscErrorCode EPSView_ARNOLDI(EPS eps,PetscViewer viewer)
533: {
535:   PetscTruth     isascii;
536:   EPS_ARNOLDI    *arnoldi = (EPS_ARNOLDI *)eps->data;

539:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
540:   if (!isascii) {
541:     SETERRQ1(1,"Viewer type %s not supported for EPSARNOLDI",((PetscObject)viewer)->type_name);
542:   }
543:   if (arnoldi->delayed) {
544:     PetscViewerASCIIPrintf(viewer,"using delayed reorthogonalization\n");
545:   }
546:   return(0);
547: }

549: EXTERN PetscErrorCode EPSSolve_TS_ARNOLDI(EPS);

554: PetscErrorCode EPSCreate_ARNOLDI(EPS eps)
555: {
557:   EPS_ARNOLDI    *arnoldi;
558: 
560:   PetscNew(EPS_ARNOLDI,&arnoldi);
561:   PetscLogObjectMemory(eps,sizeof(EPS_ARNOLDI));
562:   eps->data                      = (void *)arnoldi;
563:   eps->ops->solve                = EPSSolve_ARNOLDI;
564:   eps->ops->solvets              = EPSSolve_TS_ARNOLDI;
565:   eps->ops->setup                = EPSSetUp_ARNOLDI;
566:   eps->ops->setfromoptions       = EPSSetFromOptions_ARNOLDI;
567:   eps->ops->destroy              = EPSDestroy_Default;
568:   eps->ops->view                 = EPSView_ARNOLDI;
569:   eps->ops->backtransform        = EPSBackTransform_Default;
570:   eps->ops->computevectors       = EPSComputeVectors_Schur;
571:   arnoldi->delayed               = PETSC_FALSE;
572:   PetscObjectComposeFunctionDynamic((PetscObject)eps,"EPSArnoldiSetDelayed_C","EPSArnoldiSetDelayed_ARNOLDI",EPSArnoldiSetDelayed_ARNOLDI);
573:   PetscObjectComposeFunctionDynamic((PetscObject)eps,"EPSArnoldiGetDelayed_C","EPSArnoldiGetDelayed_ARNOLDI",EPSArnoldiGetDelayed_ARNOLDI);
574:   return(0);
575: }