Actual source code: arnoldi.c

  1: /*                       

  3:    SLEPc eigensolver: "arnoldi"

  5:    Method: Explicitly Restarted Arnoldi

  7:    Algorithm:

  9:        Arnoldi method with explicit restart and deflation.

 11:    References:

 13:        [1] "Arnoldi Methods in SLEPc", SLEPc Technical Report STR-4, 
 14:            available at http://www.grycap.upv.es/slepc.

 16:    Last update: Feb 2006

 18: */
 19:  #include src/eps/epsimpl.h
 20:  #include slepcblaslapack.h

 22: typedef struct {
 23:   PetscTruth delayed;
 24: } EPS_ARNOLDI;

 28: PetscErrorCode EPSSetUp_ARNOLDI(EPS eps)
 29: {
 31:   PetscInt       N;

 34:   VecGetSize(eps->vec_initial,&N);
 35:   if (eps->nev > N) eps->nev = N;
 36:   if (eps->ncv) {
 37:     if (eps->ncv > N) eps->ncv = N;
 38:     if (eps->ncv<eps->nev) SETERRQ(1,"The value of ncv must be at least nev");
 39:   }
 40:   else eps->ncv = PetscMin(N,PetscMax(2*eps->nev,eps->nev+15));
 41: 
 42:   if (!eps->max_it) eps->max_it = PetscMax(100,N);
 43:   if (!eps->tol) eps->tol = 1.e-7;
 44:   if (eps->which!=EPS_LARGEST_MAGNITUDE)
 45:     SETERRQ(1,"Wrong value of eps->which");
 46:   EPSAllocateSolution(eps);
 47:   PetscFree(eps->T);
 48:   PetscMalloc(eps->ncv*eps->ncv*sizeof(PetscScalar),&eps->T);
 49:   if (eps->solverclass==EPS_TWO_SIDE) {
 50:     PetscFree(eps->Tl);
 51:     PetscMalloc(eps->ncv*eps->ncv*sizeof(PetscScalar),&eps->Tl);
 52:     EPSDefaultGetWork(eps,2);
 53:   }
 54:   else { EPSDefaultGetWork(eps,1); }
 55:   return(0);
 56: }

 60: /*
 61:    EPSBasicArnoldi - Computes an m-step Arnoldi factorization. The first k
 62:    columns are assumed to be locked and therefore they are not modified. On
 63:    exit, the following relation is satisfied:

 65:                     OP * V - V * H = f * e_m^T

 67:    where the columns of V are the Arnoldi vectors (which are B-orthonormal),
 68:    H is an upper Hessenberg matrix, f is the residual vector and e_m is
 69:    the m-th vector of the canonical basis. The vector f is B-orthogonal to
 70:    the columns of V. On exit, beta contains the B-norm of f and the next 
 71:    Arnoldi vector can be computed as v_{m+1} = f / beta. 
 72: */
 73: PetscErrorCode EPSBasicArnoldi(EPS eps,PetscTruth trans,PetscScalar *H,Vec *V,int k,int *M,Vec f,PetscReal *beta)
 74: {
 76:   int            j,m = *M;
 77:   PetscReal      norm;
 78:   PetscTruth     breakdown;

 81:   for (j=k;j<m-1;j++) {
 82:     if (trans) { STApplyTranspose(eps->OP,V[j],V[j+1]); }
 83:     else { STApply(eps->OP,V[j],V[j+1]); }
 84:     eps->its++;
 85:     EPSOrthogonalize(eps,eps->nds,eps->DS,V[j+1],PETSC_NULL,PETSC_NULL,PETSC_NULL);
 86:     EPSOrthogonalize(eps,j+1,V,V[j+1],H+m*j,&norm,&breakdown);
 87:     H[(m+1)*j+1] = norm;
 88:     if (breakdown) {
 89:       eps->count_breakdown++;
 90:       PetscInfo1(eps,"Breakdown in Arnoldi method (norm=%g)\n",norm);
 91:       *M = j+1;
 92:       *beta = norm;
 93:       return(0);
 94:     } else {
 95:       VecScale(V[j+1],1/norm);
 96:     }
 97:   }
 98:   STApply(eps->OP,V[m-1],f);
 99:   eps->its++;
100:   EPSOrthogonalize(eps,eps->nds,eps->DS,f,PETSC_NULL,PETSC_NULL,PETSC_NULL);
101:   EPSOrthogonalize(eps,m,V,f,H+m*(m-1),beta,PETSC_NULL);
102:   return(0);
103: }

107: /*
108:    EPSDelayedArnoldi - This function is equivalent to EPSBasicArnoldi but
109:    performs the computation in a different way. The main idea is that
110:    reorthogonalization is delayed to the next Arnoldi step. This version is
111:    more scalable but in some case may be less robust numerically.
112: */
113: static PetscErrorCode EPSDelayedArnoldi(EPS eps,PetscScalar *H,Vec *V,int k,int *M,Vec f,PetscReal *beta)
114: {
116:   int            i,j,m=*M;
117:   Vec            w,u,t;
118:   PetscScalar    shh[100],*lhh,dot;
119:   PetscReal      norm1,norm2;

122:   if (m<=100) lhh = shh;
123:   else { PetscMalloc(m*sizeof(PetscScalar),&lhh); }
124:   VecDuplicate(f,&w);
125:   VecDuplicate(f,&u);
126:   VecDuplicate(f,&t);

128:   for (j=k;j<m;j++) {
129:     STApply(eps->OP,V[j],f);
130:     eps->its++;
131:     EPSOrthogonalize(eps,eps->nds,eps->DS,f,PETSC_NULL,PETSC_NULL,PETSC_NULL);

133:     STMInnerProductBegin(eps->OP,j+1,f,V,H+m*j);
134:     if (j>k) {
135:       eps->count_reorthog++;
136:       STMInnerProductBegin(eps->OP,j,V[j],V,lhh);
137:       STInnerProductBegin(eps->OP,V[j],V[j],&dot);
138:     }
139:     if (j>k+1) {
140:       STNormBegin(eps->OP,u,&norm2);
141:     }
142: 
143:     STMInnerProductEnd(eps->OP,j+1,f,V,H+m*j);
144:     if (j>k) {
145:       STMInnerProductEnd(eps->OP,j,V[j],V,lhh);
146:       STInnerProductEnd(eps->OP,V[j],V[j],&dot);
147:     }
148:     if (j>k+1) {
149:       STNormEnd(eps->OP,u,&norm2);
150:       if (norm2 < eps->orthog_eta * norm1) {
151:         eps->count_breakdown++;
152:         PetscInfo2(eps,"Breakdown in Arnoldi method (it=%i norm=%g)\n",eps->its,norm2);
153:         *M = j-1;
154:         *beta = norm2;

156:         if (m>100) { PetscFree(lhh); }
157:         VecDestroy(w);
158:         VecDestroy(u);
159:         VecDestroy(t);
160:         return(0);
161:       }
162:     }
163: 
164:     if (j>k) {
165:       norm1 = sqrt(PetscRealPart(dot));
166:       for (i=0;i<j;i++)
167:         H[m*j+i] = H[m*j+i]/norm1;
168:       H[m*j+j] = H[m*j+j]/dot;
169: 
170:       VecCopy(V[j],t);
171:       VecScale(V[j],1.0/norm1);
172:       VecScale(f,1.0/norm1);
173:     }

175:     VecSet(w,0.0);
176:     VecMAXPY(w,j+1,H+m*j,V);
177:     VecAXPY(f,-1.0,w);

179:     if (j>k) {
180:       VecSet(w,0.0);
181:       VecMAXPY(w,j,lhh,V);
182:       VecAXPY(t,-1.0,w);
183:       for (i=0;i<j;i++)
184:         H[m*(j-1)+i] += lhh[i];
185:     }

187:     if (j>k+1) {
188:       VecCopy(u,V[j-1]);
189:       VecScale(V[j-1],1.0/norm2);
190:       H[m*(j-2)+j-1] = norm2;
191:     }

193:     if (j<m-1) {
194:       VecCopy(f,V[j+1]);
195:       VecCopy(t,u);
196:     }
197:   }

199:   STNorm(eps->OP,t,&norm2);
200:   VecScale(t,1.0/norm2);
201:   VecCopy(t,V[m-1]);
202:   H[m*(m-2)+m-1] = norm2;

204:   eps->count_reorthog++;
205:   STMInnerProduct(eps->OP,m,f,V,lhh);
206: 
207:   VecSet(w,0.0);
208:   VecMAXPY(w,m,lhh,V);
209:   VecAXPY(f,-1.0,w);
210:   for (i=0;i<m;i++)
211:     H[m*(m-1)+i] += lhh[i];

213:   STNorm(eps->OP,f,beta);
214:   VecScale(f,1.0 / *beta);

216:   if (m>100) { PetscFree(lhh); }
217:   VecDestroy(w);
218:   VecDestroy(u);
219:   VecDestroy(t);

221:   return(0);
222: }

226: /*
227:    EPSArnoldiResiduals - Computes the 2-norm of the residual vectors from
228:    the information provided by the m-step Arnoldi factorization,

230:                     OP * V - V * H = f * e_m^T

232:    For the approximate eigenpair (k_i,V*y_i), the residual norm is computed as
233:    |beta*y(end,i)| where beta is the norm of f and y is the corresponding 
234:    eigenvector of H.
235: */
236: PetscErrorCode ArnoldiResiduals(PetscScalar *H,int ldh,PetscScalar *U,PetscReal beta,int nconv,int ncv,PetscScalar *eigr,PetscScalar *eigi,PetscReal *errest,PetscScalar *work)
237: {
238: #if defined(SLEPC_MISSING_LAPACK_TREVC)
240:   SETERRQ(PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable.");
241: #else
243:   int            i,mout,info;
244:   PetscScalar    *Y=work+4*ncv;
245: #if defined(PETSC_USE_COMPLEX)
246:   PetscReal      *rwork=(PetscReal*)(work+3*ncv);
247: #endif


251:   /* Compute eigenvectors Y of H */
252:   PetscMemcpy(Y,U,ncv*ncv*sizeof(PetscScalar));
253: #if !defined(PETSC_USE_COMPLEX)
254:   LAPACKtrevc_("R","B",PETSC_NULL,&ncv,H,&ldh,PETSC_NULL,&ncv,Y,&ncv,&ncv,&mout,work,&info,1,1);
255: #else
256:   LAPACKtrevc_("R","B",PETSC_NULL,&ncv,H,&ldh,PETSC_NULL,&ncv,Y,&ncv,&ncv,&mout,work,rwork,&info,1,1);
257: #endif
258:   if (info) SETERRQ1(PETSC_ERR_LIB,"Error in Lapack xTREVC %i",info);

260:   /* Compute residual norm estimates as beta*abs(Y(m,:)) */
261:   for (i=nconv;i<ncv;i++) {
262: #if !defined(PETSC_USE_COMPLEX)
263:     if (eigi[i] != 0 && i<ncv-1) {
264:         errest[i] = beta*SlepcAbsEigenvalue(Y[i*ncv+ncv-1],Y[(i+1)*ncv+ncv-1]) /
265:                          SlepcAbsEigenvalue(eigr[i],eigi[i]);
266:         errest[i+1] = errest[i];
267:         i++;
268:     } else
269: #endif
270:     errest[i] = beta*PetscAbsScalar(Y[i*ncv+ncv-1]) / PetscAbsScalar(eigr[i]);
271:   }
272:   return(0);
273: #endif
274: }

278: PetscErrorCode EPSSolve_ARNOLDI(EPS eps)
279: {
281:   int            i,k;
282:   Vec            f=eps->work[0];
283:   PetscScalar    *H=eps->T,*U,*work;
284:   PetscReal      beta;
285:   PetscTruth     breakdown;
286:   EPS_ARNOLDI    *arnoldi = (EPS_ARNOLDI *)eps->data;

289:   PetscMemzero(eps->T,eps->ncv*eps->ncv*sizeof(PetscScalar));
290:   PetscMalloc(eps->ncv*eps->ncv*sizeof(PetscScalar),&U);
291:   PetscMalloc((eps->ncv+4)*eps->ncv*sizeof(PetscScalar),&work);
292: 
293:   eps->nconv = 0;
294:   eps->its = 0;
295:   EPSMonitor(eps,eps->its,eps->nconv,eps->eigr,eps->eigi,eps->errest,eps->nv);

297:   /* Get the starting Arnoldi vector */
298:   EPSGetStartVector(eps,0,eps->V[0],PETSC_NULL);
299: 
300:   /* Restart loop */
301:   while (eps->reason == EPS_CONVERGED_ITERATING) {

303:     /* Compute an nv-step Arnoldi factorization */
304:     eps->nv = eps->ncv;
305:     if (arnoldi->delayed) {
306:       EPSDelayedArnoldi(eps,H,eps->V,eps->nconv,&eps->nv,f,&beta);
307:     } else {
308:       EPSBasicArnoldi(eps,PETSC_FALSE,H,eps->V,eps->nconv,&eps->nv,f,&beta);
309:     }

311:     /* Reduce H to (quasi-)triangular form, H <- U H U' */
312:     PetscMemzero(U,eps->nv*eps->nv*sizeof(PetscScalar));
313:     for (i=0;i<eps->nv;i++) { U[i*(eps->nv+1)] = 1.0; }
314:     EPSDenseSchur(eps->nv,eps->nconv,H,eps->ncv,U,eps->eigr,eps->eigi);

316:     /* Sort the remaining columns of the Schur form */
317:     EPSSortDenseSchur(eps->nv,eps->nconv,H,eps->ncv,U,eps->eigr,eps->eigi);

319:     /* Compute residual norm estimates */
320:     ArnoldiResiduals(H,eps->ncv,U,beta,eps->nconv,eps->nv,eps->eigr,eps->eigi,eps->errest,work);
321: 
322:     /* Lock converged eigenpairs and update the corresponding vectors,
323:        including the restart vector: V(:,idx) = V*U(:,idx) */
324:     k = eps->nconv;
325:     while (k<eps->nv && eps->errest[k]<eps->tol) k++;
326:     for (i=eps->nconv;i<=k && i<eps->nv;i++) {
327:       VecSet(eps->AV[i],0.0);
328:       VecMAXPY(eps->AV[i],eps->nv,U+eps->nv*i,eps->V);
329:     }
330:     for (i=eps->nconv;i<=k && i<eps->nv;i++) {
331:       VecCopy(eps->AV[i],eps->V[i]);
332:     }
333:     eps->nconv = k;

335:     EPSMonitor(eps,eps->its,eps->nconv,eps->eigr,eps->eigi,eps->errest,eps->nv);
336:     if (eps->nv < eps->ncv) {
337:       EPSGetStartVector(eps,k,eps->V[k],&breakdown);
338:       if (breakdown) {
339:         eps->reason = EPS_DIVERGED_BREAKDOWN;
340:         PetscInfo(eps,"Unable to generate more start vectors\n");
341:       }
342:     }
343:     if (eps->its >= eps->max_it) eps->reason = EPS_DIVERGED_ITS;
344:     if (eps->nconv >= eps->nev) eps->reason = EPS_CONVERGED_TOL;
345:   }
346: 
347: #if defined(PETSC_USE_COMPLEX)
348:   for (i=0;i<eps->nconv;i++) eps->eigi[i]=0.0;
349: #endif

351:   PetscFree(U);
352:   PetscFree(work);
353:   return(0);
354: }

358: PetscErrorCode EPSSetFromOptions_ARNOLDI(EPS eps)
359: {
361:   EPS_ARNOLDI    *arnoldi = (EPS_ARNOLDI *)eps->data;

364:   PetscOptionsHead("ARNOLDI options");
365:   PetscOptionsTruth("-eps_arnoldi_delayed","Arnoldi with delayed reorthogonalization","EPSArnoldiSetDelayed",PETSC_FALSE,&arnoldi->delayed,PETSC_NULL);
366:   PetscOptionsTail();
367:   return(0);
368: }

373: PetscErrorCode EPSArnoldiSetDelayed_ARNOLDI(EPS eps,PetscTruth delayed)
374: {
375:   EPS_ARNOLDI    *arnoldi = (EPS_ARNOLDI *)eps->data;

378:   arnoldi->delayed = delayed;
379:   return(0);
380: }

385: /*@
386:    EPSArnoldiSetDelayed - Activates or deactivates delayed reorthogonalization 
387:    in the Arnoldi iteration. 

389:    Collective on EPS

391:    Input Parameters:
392: +  eps - the eigenproblem solver context
393: -  delayed - boolean flag for toggling delayed reorthogonalization

395:    Options Database Key:
396: .  -eps_arnoldi_delayed - Activates delayed reorthogonalization in Arnoldi
397:    
398:    Note:
399:    Delayed reorthogonalization is an aggressive optimization for the Arnoldi
400:    eigensolver than may provide better scalability, but it is sometimes less 
401:    robust than the default algorithm.

403:    Level: advanced

405: .seealso: EPSArnoldiGetDelayed()
406: @*/
407: PetscErrorCode EPSArnoldiSetDelayed(EPS eps,PetscTruth delayed)
408: {
409:   PetscErrorCode ierr, (*f)(EPS,PetscTruth);

413:   PetscObjectQueryFunction((PetscObject)eps,"EPSArnoldiSetDelayed_C",(void (**)())&f);
414:   if (f) {
415:     (*f)(eps,delayed);
416:   }
417:   return(0);
418: }

423: PetscErrorCode EPSArnoldiGetDelayed_ARNOLDI(EPS eps,PetscTruth *delayed)
424: {
425:   EPS_ARNOLDI    *arnoldi = (EPS_ARNOLDI *)eps->data;

428:   *delayed = arnoldi->delayed;
429:   return(0);
430: }

435: /*@C
436:    EPSArnoldiGetDelayed - Gets the type of reorthogonalization used during the Arnoldi
437:    iteration. 

439:    Collective on EPS

441:    Input Parameter:
442: .  eps - the eigenproblem solver context

444:    Input Parameter:
445: .  delayed - boolean flag indicating if delayed reorthogonalization has been enabled

447:    Level: advanced

449: .seealso: EPSArnoldiSetDelayed()
450: @*/
451: PetscErrorCode EPSArnoldiGetDelayed(EPS eps,PetscTruth *delayed)
452: {
453:   PetscErrorCode ierr, (*f)(EPS,PetscTruth*);

457:   PetscObjectQueryFunction((PetscObject)eps,"EPSArnoldiGetDelayed_C",(void (**)())&f);
458:   if (f) {
459:     (*f)(eps,delayed);
460:   }
461:   return(0);
462: }

466: PetscErrorCode EPSView_ARNOLDI(EPS eps,PetscViewer viewer)
467: {
469:   PetscTruth     isascii;
470:   EPS_ARNOLDI    *arnoldi = (EPS_ARNOLDI *)eps->data;

473:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
474:   if (!isascii) {
475:     SETERRQ1(1,"Viewer type %s not supported for EPSARNOLDI",((PetscObject)viewer)->type_name);
476:   }
477:   if (arnoldi->delayed) {
478:     PetscViewerASCIIPrintf(viewer,"using delayed reorthogonalization\n");
479:   }
480:   return(0);
481: }

483: EXTERN PetscErrorCode EPSSolve_TS_ARNOLDI(EPS);

488: PetscErrorCode EPSCreate_ARNOLDI(EPS eps)
489: {
491:   EPS_ARNOLDI    *arnoldi;
492: 
494:   PetscNew(EPS_ARNOLDI,&arnoldi);
495:   PetscLogObjectMemory(eps,sizeof(EPS_ARNOLDI));
496:   eps->data                      = (void *)arnoldi;
497:   eps->ops->solve                = EPSSolve_ARNOLDI;
498:   eps->ops->solvets              = EPSSolve_TS_ARNOLDI;
499:   eps->ops->setup                = EPSSetUp_ARNOLDI;
500:   eps->ops->setfromoptions       = EPSSetFromOptions_ARNOLDI;
501:   eps->ops->destroy              = EPSDestroy_Default;
502:   eps->ops->view                 = EPSView_ARNOLDI;
503:   eps->ops->backtransform        = EPSBackTransform_Default;
504:   eps->ops->computevectors       = EPSComputeVectors_Schur;
505:   arnoldi->delayed               = PETSC_FALSE;
506:   PetscObjectComposeFunctionDynamic((PetscObject)eps,"EPSArnoldiSetDelayed_C","EPSArnoldiSetDelayed_ARNOLDI",EPSArnoldiSetDelayed_ARNOLDI);
507:   PetscObjectComposeFunctionDynamic((PetscObject)eps,"EPSArnoldiGetDelayed_C","EPSArnoldiGetDelayed_ARNOLDI",EPSArnoldiGetDelayed_ARNOLDI);
508:   return(0);
509: }