Actual source code: arnoldi.c

  1: /*                       

  3:    SLEPc eigensolver: "arnoldi"

  5:    Method: Explicitly Restarted Arnoldi

  7:    Algorithm:

  9:        Arnoldi method with explicit restart and deflation.

 11:    References:

 13:        [1] "Arnoldi Methods in SLEPc", SLEPc Technical Report STR-4, 
 14:            available at http://www.grycap.upv.es/slepc.

 16:    Last update: Feb 2009

 18:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 19:    SLEPc - Scalable Library for Eigenvalue Problem Computations
 20:    Copyright (c) 2002-2012, Universitat Politecnica de Valencia, Spain

 22:    This file is part of SLEPc.
 23:       
 24:    SLEPc is free software: you can redistribute it and/or modify it under  the
 25:    terms of version 3 of the GNU Lesser General Public License as published by
 26:    the Free Software Foundation.

 28:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY 
 29:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS 
 30:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for 
 31:    more details.

 33:    You  should have received a copy of the GNU Lesser General  Public  License
 34:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 35:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 36: */

 38: #include <slepc-private/epsimpl.h>                /*I "slepceps.h" I*/
 39: #include <slepcblaslapack.h>

 41: PetscErrorCode EPSSolve_Arnoldi(EPS);

 43: typedef struct {
 44:   PetscBool delayed;
 45: } EPS_ARNOLDI;

 49: PetscErrorCode EPSSetUp_Arnoldi(EPS eps)
 50: {

 54:   if (eps->ncv) { /* ncv set */
 55:     if (eps->ncv<eps->nev) SETERRQ(((PetscObject)eps)->comm,1,"The value of ncv must be at least nev");
 56:   }
 57:   else if (eps->mpd) { /* mpd set */
 58:     eps->ncv = PetscMin(eps->n,eps->nev+eps->mpd);
 59:   }
 60:   else { /* neither set: defaults depend on nev being small or large */
 61:     if (eps->nev<500) eps->ncv = PetscMin(eps->n,PetscMax(2*eps->nev,eps->nev+15));
 62:     else { eps->mpd = 500; eps->ncv = PetscMin(eps->n,eps->nev+eps->mpd); }
 63:   }
 64:   if (!eps->mpd) eps->mpd = eps->ncv;
 65:   if (eps->ncv>eps->nev+eps->mpd) SETERRQ(((PetscObject)eps)->comm,1,"The value of ncv must not be larger than nev+mpd");
 66:   if (!eps->max_it) eps->max_it = PetscMax(100,2*eps->n/eps->ncv);
 67:   if (!eps->which) { EPSDefaultSetWhich(eps); }
 68:   if (eps->ishermitian && (eps->which==EPS_LARGEST_IMAGINARY || eps->which==EPS_SMALLEST_IMAGINARY)) SETERRQ(((PetscObject)eps)->comm,1,"Wrong value of eps->which");

 70:   if (!eps->extraction) {
 71:     EPSSetExtraction(eps,EPS_RITZ);
 72:   }
 73:   if (eps->arbit_func) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_SUP,"Arbitrary selection of eigenpairs not supported in this solver");

 75:   EPSAllocateSolution(eps);
 76:   DSSetType(eps->ds,DSNHEP);
 77:   if (eps->extraction==EPS_REFINED || eps->extraction==EPS_REFINED_HARMONIC) {
 78:     DSSetRefined(eps->ds,PETSC_TRUE);
 79:   }
 80:   DSSetExtraRow(eps->ds,PETSC_TRUE);
 81:   DSAllocate(eps->ds,eps->ncv+1);
 82:   EPSDefaultGetWork(eps,1);

 84:   /* dispatch solve method */
 85:   if (eps->leftvecs) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_SUP,"Left vectors not supported in this solver");
 86:   eps->ops->solve = EPSSolve_Arnoldi;
 87:   return(0);
 88: }

 92: /*
 93:    EPSDelayedArnoldi - This function is equivalent to EPSBasicArnoldi but
 94:    performs the computation in a different way. The main idea is that
 95:    reorthogonalization is delayed to the next Arnoldi step. This version is
 96:    more scalable but in some cases convergence may stagnate.
 97: */
 98: PetscErrorCode EPSDelayedArnoldi(EPS eps,PetscScalar *H,PetscInt ldh,Vec *V,PetscInt k,PetscInt *M,Vec f,PetscReal *beta,PetscBool *breakdown)
 99: {
101:   PetscInt       i,j,m=*M;
102:   Vec            u,t;
103:   PetscScalar    shh[100],*lhh,dot,dot2;
104:   PetscReal      norm1=0.0,norm2;

107:   if (m<=100) lhh = shh;
108:   else { PetscMalloc(m*sizeof(PetscScalar),&lhh); }
109:   VecDuplicate(f,&u);
110:   VecDuplicate(f,&t);

112:   for (j=k;j<m;j++) {
113:     STApply(eps->OP,V[j],f);
114:     IPOrthogonalize(eps->ip,0,PETSC_NULL,eps->nds,PETSC_NULL,eps->defl,f,PETSC_NULL,PETSC_NULL,PETSC_NULL);

116:     IPMInnerProductBegin(eps->ip,f,j+1,V,H+ldh*j);
117:     if (j>k) {
118:       IPMInnerProductBegin(eps->ip,V[j],j,V,lhh);
119:       IPInnerProductBegin(eps->ip,V[j],V[j],&dot);
120:     }
121:     if (j>k+1) {
122:       IPNormBegin(eps->ip,u,&norm2);
123:       VecDotBegin(u,V[j-2],&dot2);
124:     }
125: 
126:     IPMInnerProductEnd(eps->ip,f,j+1,V,H+ldh*j);
127:     if (j>k) {
128:       IPMInnerProductEnd(eps->ip,V[j],j,V,lhh);
129:       IPInnerProductEnd(eps->ip,V[j],V[j],&dot);
130:     }
131:     if (j>k+1) {
132:       IPNormEnd(eps->ip,u,&norm2);
133:       VecDotEnd(u,V[j-2],&dot2);
134:       if (PetscAbsScalar(dot2/norm2) > PETSC_MACHINE_EPSILON) {
135:         *breakdown = PETSC_TRUE;
136:         *M = j-1;
137:         *beta = norm2;

139:         if (m>100) { PetscFree(lhh); }
140:         VecDestroy(&u);
141:         VecDestroy(&t);
142:         return(0);
143:       }
144:     }
145: 
146:     if (j>k) {
147:       norm1 = PetscSqrtReal(PetscRealPart(dot));
148:       for (i=0;i<j;i++)
149:         H[ldh*j+i] = H[ldh*j+i]/norm1;
150:       H[ldh*j+j] = H[ldh*j+j]/dot;
151: 
152:       VecCopy(V[j],t);
153:       VecScale(V[j],1.0/norm1);
154:       VecScale(f,1.0/norm1);
155:     }

157:     SlepcVecMAXPBY(f,1.0,-1.0,j+1,H+ldh*j,V);

159:     if (j>k) {
160:       SlepcVecMAXPBY(t,1.0,-1.0,j,lhh,V);
161:       for (i=0;i<j;i++)
162:         H[ldh*(j-1)+i] += lhh[i];
163:     }

165:     if (j>k+1) {
166:       VecCopy(u,V[j-1]);
167:       VecScale(V[j-1],1.0/norm2);
168:       H[ldh*(j-2)+j-1] = norm2;
169:     }

171:     if (j<m-1) {
172:       VecCopy(f,V[j+1]);
173:       VecCopy(t,u);
174:     }
175:   }

177:   IPNorm(eps->ip,t,&norm2);
178:   VecScale(t,1.0/norm2);
179:   VecCopy(t,V[m-1]);
180:   H[ldh*(m-2)+m-1] = norm2;

182:   IPMInnerProduct(eps->ip,f,m,V,lhh);
183: 
184:   SlepcVecMAXPBY(f,1.0,-1.0,m,lhh,V);
185:   for (i=0;i<m;i++)
186:     H[ldh*(m-1)+i] += lhh[i];

188:   IPNorm(eps->ip,f,beta);
189:   VecScale(f,1.0 / *beta);
190:   *breakdown = PETSC_FALSE;
191: 
192:   if (m>100) { PetscFree(lhh); }
193:   VecDestroy(&u);
194:   VecDestroy(&t);
195:   return(0);
196: }

200: /*
201:    EPSDelayedArnoldi1 - This function is similar to EPSDelayedArnoldi1,
202:    but without reorthogonalization (only delayed normalization).
203: */
204: PetscErrorCode EPSDelayedArnoldi1(EPS eps,PetscScalar *H,PetscInt ldh,Vec *V,PetscInt k,PetscInt *M,Vec f,PetscReal *beta,PetscBool *breakdown)
205: {
207:   PetscInt       i,j,m=*M;
208:   PetscScalar    dot;
209:   PetscReal      norm=0.0;

212:   for (j=k;j<m;j++) {
213:     STApply(eps->OP,V[j],f);
214:     IPOrthogonalize(eps->ip,0,PETSC_NULL,eps->nds,PETSC_NULL,eps->defl,f,PETSC_NULL,PETSC_NULL,PETSC_NULL);

216:     IPMInnerProductBegin(eps->ip,f,j+1,V,H+ldh*j);
217:     if (j>k) {
218:       IPInnerProductBegin(eps->ip,V[j],V[j],&dot);
219:     }
220: 
221:     IPMInnerProductEnd(eps->ip,f,j+1,V,H+ldh*j);
222:     if (j>k) {
223:       IPInnerProductEnd(eps->ip,V[j],V[j],&dot);
224:     }
225: 
226:     if (j>k) {
227:       norm = PetscSqrtReal(PetscRealPart(dot));
228:       VecScale(V[j],1.0/norm);
229:       H[ldh*(j-1)+j] = norm;

231:       for (i=0;i<j;i++)
232:         H[ldh*j+i] = H[ldh*j+i]/norm;
233:       H[ldh*j+j] = H[ldh*j+j]/dot;
234:       VecScale(f,1.0/norm);
235:     }

237:     SlepcVecMAXPBY(f,1.0,-1.0,j+1,H+ldh*j,V);

239:     if (j<m-1) {
240:       VecCopy(f,V[j+1]);
241:     }
242:   }

244:   IPNorm(eps->ip,f,beta);
245:   VecScale(f,1.0 / *beta);
246:   *breakdown = PETSC_FALSE;
247:   return(0);
248: }

252: PetscErrorCode EPSSolve_Arnoldi(EPS eps)
253: {
254:   PetscErrorCode     ierr;
255:   PetscInt           k,nv,ld;
256:   Vec                f=eps->work[0];
257:   PetscScalar        *H,*U,*X;
258:   PetscReal          beta,gamma=1.0;
259:   PetscBool          breakdown,harmonic,refined;
260:   IPOrthogRefineType orthog_ref;
261:   EPS_ARNOLDI        *arnoldi = (EPS_ARNOLDI*)eps->data;

264:   DSGetLeadingDimension(eps->ds,&ld);
265:   DSGetRefined(eps->ds,&refined);
266:   harmonic = (eps->extraction==EPS_HARMONIC || eps->extraction==EPS_REFINED_HARMONIC)?PETSC_TRUE:PETSC_FALSE;
267:   IPGetOrthogonalization(eps->ip,PETSC_NULL,&orthog_ref,PETSC_NULL);

269:   /* Get the starting Arnoldi vector */
270:   EPSGetStartVector(eps,0,eps->V[0],PETSC_NULL);
271: 
272:   /* Restart loop */
273:   while (eps->reason == EPS_CONVERGED_ITERATING) {
274:     eps->its++;

276:     /* Compute an nv-step Arnoldi factorization */
277:     nv = PetscMin(eps->nconv+eps->mpd,eps->ncv);
278:     DSSetDimensions(eps->ds,nv,PETSC_IGNORE,eps->nconv,0);
279:     DSGetArray(eps->ds,DS_MAT_A,&H);
280:     if (!arnoldi->delayed) {
281:       EPSBasicArnoldi(eps,PETSC_FALSE,H,ld,eps->V,eps->nconv,&nv,f,&beta,&breakdown);
282:       H[nv+(nv-1)*ld] = beta;
283:     } else if (orthog_ref == IP_ORTHOG_REFINE_NEVER) {
284:       EPSDelayedArnoldi1(eps,H,ld,eps->V,eps->nconv,&nv,f,&beta,&breakdown);
285:     } else {
286:       EPSDelayedArnoldi(eps,H,ld,eps->V,eps->nconv,&nv,f,&beta,&breakdown);
287:     }
288:     DSRestoreArray(eps->ds,DS_MAT_A,&H);
289:     DSSetState(eps->ds,DS_STATE_INTERMEDIATE);

291:     /* Compute translation of Krylov decomposition if harmonic extraction used */
292:     if (harmonic) {
293:       DSTranslateHarmonic(eps->ds,eps->target,beta,PETSC_FALSE,PETSC_NULL,&gamma);
294:     }

296:     /* Solve projected problem */
297:     DSSolve(eps->ds,eps->eigr,eps->eigi);
298:     DSSort(eps->ds,eps->eigr,eps->eigi,PETSC_NULL,PETSC_NULL,PETSC_NULL);
299:     DSUpdateExtraRow(eps->ds);

301:     /* Check convergence */
302:     EPSKrylovConvergence(eps,PETSC_FALSE,eps->nconv,nv-eps->nconv,eps->V,nv,beta,gamma,&k);
303:     if (refined) {
304:       DSGetArray(eps->ds,DS_MAT_X,&X);
305:       SlepcVecMAXPBY(eps->V[k],0.0,1.0,nv,X+k*ld,eps->V);
306:       DSRestoreArray(eps->ds,DS_MAT_X,&X);
307:       DSGetArray(eps->ds,DS_MAT_Q,&U);
308:       SlepcUpdateVectors(nv,eps->V,eps->nconv,PetscMin(k,nv),U,ld,PETSC_FALSE);
309:       DSRestoreArray(eps->ds,DS_MAT_Q,&U);
310:       IPOrthogonalize(eps->ip,0,PETSC_NULL,k,PETSC_NULL,eps->V,eps->V[k],PETSC_NULL,PETSC_NULL,PETSC_NULL);
311:     } else {
312:       DSGetArray(eps->ds,DS_MAT_Q,&U);
313:       SlepcUpdateVectors(nv,eps->V,eps->nconv,PetscMin(k+1,nv),U,ld,PETSC_FALSE);
314:       DSRestoreArray(eps->ds,DS_MAT_Q,&U);
315:     }
316:     eps->nconv = k;

318:     EPSMonitor(eps,eps->its,eps->nconv,eps->eigr,eps->eigi,eps->errest,nv);
319:     if (breakdown) {
320:       PetscInfo2(eps,"Breakdown in Arnoldi method (it=%D norm=%G)\n",eps->its,beta);
321:       EPSGetStartVector(eps,k,eps->V[k],&breakdown);
322:       if (breakdown) {
323:         eps->reason = EPS_DIVERGED_BREAKDOWN;
324:         PetscInfo(eps,"Unable to generate more start vectors\n");
325:       }
326:     }
327:     if (eps->its >= eps->max_it) eps->reason = EPS_DIVERGED_ITS;
328:     if (eps->nconv >= eps->nev) eps->reason = EPS_CONVERGED_TOL;
329:   }
330: 
331:   /* truncate Schur decomposition and change the state to raw so that
332:      PSVectors() computes eigenvectors from scratch */
333:   DSSetDimensions(eps->ds,eps->nconv,PETSC_IGNORE,0,0);
334:   DSSetState(eps->ds,DS_STATE_RAW);
335:   return(0);
336: }

340: PetscErrorCode EPSSetFromOptions_Arnoldi(EPS eps)
341: {
343:   PetscBool      set,val;
344:   EPS_ARNOLDI    *arnoldi = (EPS_ARNOLDI*)eps->data;

347:   PetscOptionsHead("EPS Arnoldi Options");
348:   PetscOptionsBool("-eps_arnoldi_delayed","Arnoldi with delayed reorthogonalization","EPSArnoldiSetDelayed",arnoldi->delayed,&val,&set);
349:   if (set) {
350:     EPSArnoldiSetDelayed(eps,val);
351:   }
352:   PetscOptionsTail();
353:   return(0);
354: }

356: EXTERN_C_BEGIN
359: PetscErrorCode EPSArnoldiSetDelayed_Arnoldi(EPS eps,PetscBool delayed)
360: {
361:   EPS_ARNOLDI *arnoldi = (EPS_ARNOLDI*)eps->data;

364:   arnoldi->delayed = delayed;
365:   return(0);
366: }
367: EXTERN_C_END

371: /*@
372:    EPSArnoldiSetDelayed - Activates or deactivates delayed reorthogonalization 
373:    in the Arnoldi iteration. 

375:    Logically Collective on EPS

377:    Input Parameters:
378: +  eps - the eigenproblem solver context
379: -  delayed - boolean flag

381:    Options Database Key:
382: .  -eps_arnoldi_delayed - Activates delayed reorthogonalization in Arnoldi
383:    
384:    Note:
385:    Delayed reorthogonalization is an aggressive optimization for the Arnoldi
386:    eigensolver than may provide better scalability, but sometimes makes the
387:    solver converge less than the default algorithm.

389:    Level: advanced

391: .seealso: EPSArnoldiGetDelayed()
392: @*/
393: PetscErrorCode EPSArnoldiSetDelayed(EPS eps,PetscBool delayed)
394: {

400:   PetscTryMethod(eps,"EPSArnoldiSetDelayed_C",(EPS,PetscBool),(eps,delayed));
401:   return(0);
402: }

404: EXTERN_C_BEGIN
407: PetscErrorCode EPSArnoldiGetDelayed_Arnoldi(EPS eps,PetscBool *delayed)
408: {
409:   EPS_ARNOLDI *arnoldi = (EPS_ARNOLDI*)eps->data;

412:   *delayed = arnoldi->delayed;
413:   return(0);
414: }
415: EXTERN_C_END

419: /*@C
420:    EPSArnoldiGetDelayed - Gets the type of reorthogonalization used during the Arnoldi
421:    iteration. 

423:    Not Collective

425:    Input Parameter:
426: .  eps - the eigenproblem solver context

428:    Input Parameter:
429: .  delayed - boolean flag indicating if delayed reorthogonalization has been enabled

431:    Level: advanced

433: .seealso: EPSArnoldiSetDelayed()
434: @*/
435: PetscErrorCode EPSArnoldiGetDelayed(EPS eps,PetscBool *delayed)
436: {

442:   PetscTryMethod(eps,"EPSArnoldiGetDelayed_C",(EPS,PetscBool*),(eps,delayed));
443:   return(0);
444: }

448: PetscErrorCode EPSDestroy_Arnoldi(EPS eps)
449: {

453:   PetscFree(eps->data);
454:   PetscObjectComposeFunctionDynamic((PetscObject)eps,"EPSArnoldiSetDelayed_C","",PETSC_NULL);
455:   PetscObjectComposeFunctionDynamic((PetscObject)eps,"EPSArnoldiGetDelayed_C","",PETSC_NULL);
456:   return(0);
457: }

461: PetscErrorCode EPSView_Arnoldi(EPS eps,PetscViewer viewer)
462: {
464:   PetscBool      isascii;
465:   EPS_ARNOLDI    *arnoldi = (EPS_ARNOLDI*)eps->data;

468:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
469:   if (!isascii) SETERRQ1(((PetscObject)eps)->comm,1,"Viewer type %s not supported for EPS Arnoldi",((PetscObject)viewer)->type_name);
470:   if (arnoldi->delayed) {
471:     PetscViewerASCIIPrintf(viewer,"  Arnoldi: using delayed reorthogonalization\n");
472:   }
473:   return(0);
474: }

476: EXTERN_C_BEGIN
479: PetscErrorCode EPSCreate_Arnoldi(EPS eps)
480: {
482: 
484:   PetscNewLog(eps,EPS_ARNOLDI,&eps->data);
485:   eps->ops->setup                = EPSSetUp_Arnoldi;
486:   eps->ops->setfromoptions       = EPSSetFromOptions_Arnoldi;
487:   eps->ops->destroy              = EPSDestroy_Arnoldi;
488:   eps->ops->reset                = EPSReset_Default;
489:   eps->ops->view                 = EPSView_Arnoldi;
490:   eps->ops->backtransform        = EPSBackTransform_Default;
491:   eps->ops->computevectors       = EPSComputeVectors_Schur;
492:   PetscObjectComposeFunctionDynamic((PetscObject)eps,"EPSArnoldiSetDelayed_C","EPSArnoldiSetDelayed_Arnoldi",EPSArnoldiSetDelayed_Arnoldi);
493:   PetscObjectComposeFunctionDynamic((PetscObject)eps,"EPSArnoldiGetDelayed_C","EPSArnoldiGetDelayed_Arnoldi",EPSArnoldiGetDelayed_Arnoldi);
494:   return(0);
495: }
496: EXTERN_C_END