Actual source code: arnoldi.c
1: /*
3: SLEPc eigensolver: "arnoldi"
5: Method: Explicitly Restarted Arnoldi
7: Algorithm:
9: Arnoldi method with explicit restart and deflation.
11: References:
13: [1] "Arnoldi Methods in SLEPc", SLEPc Technical Report STR-4,
14: available at http://www.grycap.upv.es/slepc.
16: Last update: Feb 2006
18: */
19: #include src/eps/epsimpl.h
20: #include slepcblaslapack.h
22: typedef struct {
23: PetscTruth delayed;
24: } EPS_ARNOLDI;
28: PetscErrorCode EPSSetUp_ARNOLDI(EPS eps)
29: {
31: PetscInt N;
34: VecGetSize(eps->vec_initial,&N);
35: if (eps->nev > N) eps->nev = N;
36: if (eps->ncv) {
37: if (eps->ncv > N) eps->ncv = N;
38: if (eps->ncv<eps->nev) SETERRQ(1,"The value of ncv must be at least nev");
39: }
40: else eps->ncv = PetscMin(N,PetscMax(2*eps->nev,eps->nev+15));
41:
42: if (!eps->max_it) eps->max_it = PetscMax(100,N);
43: if (!eps->tol) eps->tol = 1.e-7;
44: if (eps->which!=EPS_LARGEST_MAGNITUDE)
45: SETERRQ(1,"Wrong value of eps->which");
46: EPSAllocateSolution(eps);
47: PetscFree(eps->T);
48: PetscMalloc(eps->ncv*eps->ncv*sizeof(PetscScalar),&eps->T);
49: if (eps->solverclass==EPS_TWO_SIDE) {
50: PetscFree(eps->Tl);
51: PetscMalloc(eps->ncv*eps->ncv*sizeof(PetscScalar),&eps->Tl);
52: EPSDefaultGetWork(eps,2);
53: }
54: else { EPSDefaultGetWork(eps,1); }
55: return(0);
56: }
60: /*
61: EPSBasicArnoldi - Computes an m-step Arnoldi factorization. The first k
62: columns are assumed to be locked and therefore they are not modified. On
63: exit, the following relation is satisfied:
65: OP * V - V * H = f * e_m^T
67: where the columns of V are the Arnoldi vectors (which are B-orthonormal),
68: H is an upper Hessenberg matrix, f is the residual vector and e_m is
69: the m-th vector of the canonical basis. The vector f is B-orthogonal to
70: the columns of V. On exit, beta contains the B-norm of f and the next
71: Arnoldi vector can be computed as v_{m+1} = f / beta.
72: */
73: PetscErrorCode EPSBasicArnoldi(EPS eps,PetscTruth trans,PetscScalar *H,Vec *V,int k,int *M,Vec f,PetscReal *beta)
74: {
76: int j,m = *M;
77: PetscReal norm;
78: PetscTruth breakdown;
81: for (j=k;j<m-1;j++) {
82: if (trans) { STApplyTranspose(eps->OP,V[j],V[j+1]); }
83: else { STApply(eps->OP,V[j],V[j+1]); }
84: eps->its++;
85: EPSOrthogonalize(eps,eps->nds,eps->DS,V[j+1],PETSC_NULL,PETSC_NULL,PETSC_NULL);
86: EPSOrthogonalize(eps,j+1,V,V[j+1],H+m*j,&norm,&breakdown);
87: H[(m+1)*j+1] = norm;
88: if (breakdown) {
89: eps->count_breakdown++;
90: PetscInfo1(eps,"Breakdown in Arnoldi method (norm=%g)\n",norm);
91: *M = j+1;
92: *beta = norm;
93: return(0);
94: } else {
95: VecScale(V[j+1],1/norm);
96: }
97: }
98: STApply(eps->OP,V[m-1],f);
99: eps->its++;
100: EPSOrthogonalize(eps,eps->nds,eps->DS,f,PETSC_NULL,PETSC_NULL,PETSC_NULL);
101: EPSOrthogonalize(eps,m,V,f,H+m*(m-1),beta,PETSC_NULL);
102: return(0);
103: }
107: /*
108: EPSDelayedArnoldi - This function is equivalent to EPSBasicArnoldi but
109: performs the computation in a different way. The main idea is that
110: reorthogonalization is delayed to the next Arnoldi step. This version is
111: more scalable but in some case may be less robust numerically.
112: */
113: static PetscErrorCode EPSDelayedArnoldi(EPS eps,PetscScalar *H,Vec *V,int k,int *M,Vec f,PetscReal *beta)
114: {
116: int i,j,m=*M;
117: Vec w,u,t;
118: PetscScalar shh[100],*lhh,dot;
119: PetscReal norm1,norm2;
122: if (m<=100) lhh = shh;
123: else { PetscMalloc(m*sizeof(PetscScalar),&lhh); }
124: VecDuplicate(f,&w);
125: VecDuplicate(f,&u);
126: VecDuplicate(f,&t);
128: for (j=k;j<m;j++) {
129: STApply(eps->OP,V[j],f);
130: eps->its++;
131: EPSOrthogonalize(eps,eps->nds,eps->DS,f,PETSC_NULL,PETSC_NULL,PETSC_NULL);
133: STMInnerProductBegin(eps->OP,j+1,f,V,H+m*j);
134: if (j>k) {
135: eps->count_reorthog++;
136: STMInnerProductBegin(eps->OP,j,V[j],V,lhh);
137: STInnerProductBegin(eps->OP,V[j],V[j],&dot);
138: }
139: if (j>k+1) {
140: STNormBegin(eps->OP,u,&norm2);
141: }
142:
143: STMInnerProductEnd(eps->OP,j+1,f,V,H+m*j);
144: if (j>k) {
145: STMInnerProductEnd(eps->OP,j,V[j],V,lhh);
146: STInnerProductEnd(eps->OP,V[j],V[j],&dot);
147: }
148: if (j>k+1) {
149: STNormEnd(eps->OP,u,&norm2);
150: if (norm2 < eps->orthog_eta * norm1) {
151: eps->count_breakdown++;
152: PetscInfo2(eps,"Breakdown in Arnoldi method (it=%i norm=%g)\n",eps->its,norm2);
153: *M = j-1;
154: *beta = norm2;
156: if (m>100) { PetscFree(lhh); }
157: VecDestroy(w);
158: VecDestroy(u);
159: VecDestroy(t);
160: return(0);
161: }
162: }
163:
164: if (j>k) {
165: norm1 = sqrt(PetscRealPart(dot));
166: for (i=0;i<j;i++)
167: H[m*j+i] = H[m*j+i]/norm1;
168: H[m*j+j] = H[m*j+j]/dot;
169:
170: VecCopy(V[j],t);
171: VecScale(V[j],1.0/norm1);
172: VecScale(f,1.0/norm1);
173: }
175: VecSet(w,0.0);
176: VecMAXPY(w,j+1,H+m*j,V);
177: VecAXPY(f,-1.0,w);
179: if (j>k) {
180: VecSet(w,0.0);
181: VecMAXPY(w,j,lhh,V);
182: VecAXPY(t,-1.0,w);
183: for (i=0;i<j;i++)
184: H[m*(j-1)+i] += lhh[i];
185: }
187: if (j>k+1) {
188: VecCopy(u,V[j-1]);
189: VecScale(V[j-1],1.0/norm2);
190: H[m*(j-2)+j-1] = norm2;
191: }
193: if (j<m-1) {
194: VecCopy(f,V[j+1]);
195: VecCopy(t,u);
196: }
197: }
199: STNorm(eps->OP,t,&norm2);
200: VecScale(t,1.0/norm2);
201: VecCopy(t,V[m-1]);
202: H[m*(m-2)+m-1] = norm2;
204: eps->count_reorthog++;
205: STMInnerProduct(eps->OP,m,f,V,lhh);
206:
207: VecSet(w,0.0);
208: VecMAXPY(w,m,lhh,V);
209: VecAXPY(f,-1.0,w);
210: for (i=0;i<m;i++)
211: H[m*(m-1)+i] += lhh[i];
213: STNorm(eps->OP,f,beta);
214: VecScale(f,1.0 / *beta);
216: if (m>100) { PetscFree(lhh); }
217: VecDestroy(w);
218: VecDestroy(u);
219: VecDestroy(t);
221: return(0);
222: }
226: /*
227: EPSArnoldiResiduals - Computes the 2-norm of the residual vectors from
228: the information provided by the m-step Arnoldi factorization,
230: OP * V - V * H = f * e_m^T
232: For the approximate eigenpair (k_i,V*y_i), the residual norm is computed as
233: |beta*y(end,i)| where beta is the norm of f and y is the corresponding
234: eigenvector of H.
235: */
236: PetscErrorCode ArnoldiResiduals(PetscScalar *H,int ldh,PetscScalar *U,PetscReal beta,int nconv,int ncv,PetscScalar *eigr,PetscScalar *eigi,PetscReal *errest,PetscScalar *work)
237: {
238: #if defined(SLEPC_MISSING_LAPACK_TREVC)
240: SETERRQ(PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable.");
241: #else
243: int i,mout,info;
244: PetscScalar *Y=work+4*ncv;
245: #if defined(PETSC_USE_COMPLEX)
246: PetscReal *rwork=(PetscReal*)(work+3*ncv);
247: #endif
251: /* Compute eigenvectors Y of H */
252: PetscMemcpy(Y,U,ncv*ncv*sizeof(PetscScalar));
253: #if !defined(PETSC_USE_COMPLEX)
254: LAPACKtrevc_("R","B",PETSC_NULL,&ncv,H,&ldh,PETSC_NULL,&ncv,Y,&ncv,&ncv,&mout,work,&info,1,1);
255: #else
256: LAPACKtrevc_("R","B",PETSC_NULL,&ncv,H,&ldh,PETSC_NULL,&ncv,Y,&ncv,&ncv,&mout,work,rwork,&info,1,1);
257: #endif
258: if (info) SETERRQ1(PETSC_ERR_LIB,"Error in Lapack xTREVC %i",info);
260: /* Compute residual norm estimates as beta*abs(Y(m,:)) */
261: for (i=nconv;i<ncv;i++) {
262: #if !defined(PETSC_USE_COMPLEX)
263: if (eigi[i] != 0 && i<ncv-1) {
264: errest[i] = beta*SlepcAbsEigenvalue(Y[i*ncv+ncv-1],Y[(i+1)*ncv+ncv-1]) /
265: SlepcAbsEigenvalue(eigr[i],eigi[i]);
266: errest[i+1] = errest[i];
267: i++;
268: } else
269: #endif
270: errest[i] = beta*PetscAbsScalar(Y[i*ncv+ncv-1]) / PetscAbsScalar(eigr[i]);
271: }
272: return(0);
273: #endif
274: }
278: PetscErrorCode EPSSolve_ARNOLDI(EPS eps)
279: {
281: int i,k;
282: Vec f=eps->work[0];
283: PetscScalar *H=eps->T,*U,*work;
284: PetscReal beta;
285: PetscTruth breakdown;
286: EPS_ARNOLDI *arnoldi = (EPS_ARNOLDI *)eps->data;
289: PetscMemzero(eps->T,eps->ncv*eps->ncv*sizeof(PetscScalar));
290: PetscMalloc(eps->ncv*eps->ncv*sizeof(PetscScalar),&U);
291: PetscMalloc((eps->ncv+4)*eps->ncv*sizeof(PetscScalar),&work);
292:
293: eps->nconv = 0;
294: eps->its = 0;
295: EPSMonitor(eps,eps->its,eps->nconv,eps->eigr,eps->eigi,eps->errest,eps->nv);
297: /* Get the starting Arnoldi vector */
298: EPSGetStartVector(eps,0,eps->V[0],PETSC_NULL);
299:
300: /* Restart loop */
301: while (eps->reason == EPS_CONVERGED_ITERATING) {
303: /* Compute an nv-step Arnoldi factorization */
304: eps->nv = eps->ncv;
305: if (arnoldi->delayed) {
306: EPSDelayedArnoldi(eps,H,eps->V,eps->nconv,&eps->nv,f,&beta);
307: } else {
308: EPSBasicArnoldi(eps,PETSC_FALSE,H,eps->V,eps->nconv,&eps->nv,f,&beta);
309: }
311: /* Reduce H to (quasi-)triangular form, H <- U H U' */
312: PetscMemzero(U,eps->nv*eps->nv*sizeof(PetscScalar));
313: for (i=0;i<eps->nv;i++) { U[i*(eps->nv+1)] = 1.0; }
314: EPSDenseSchur(eps->nv,eps->nconv,H,eps->ncv,U,eps->eigr,eps->eigi);
316: /* Sort the remaining columns of the Schur form */
317: EPSSortDenseSchur(eps->nv,eps->nconv,H,eps->ncv,U,eps->eigr,eps->eigi);
319: /* Compute residual norm estimates */
320: ArnoldiResiduals(H,eps->ncv,U,beta,eps->nconv,eps->nv,eps->eigr,eps->eigi,eps->errest,work);
321:
322: /* Lock converged eigenpairs and update the corresponding vectors,
323: including the restart vector: V(:,idx) = V*U(:,idx) */
324: k = eps->nconv;
325: while (k<eps->nv && eps->errest[k]<eps->tol) k++;
326: for (i=eps->nconv;i<=k && i<eps->nv;i++) {
327: VecSet(eps->AV[i],0.0);
328: VecMAXPY(eps->AV[i],eps->nv,U+eps->nv*i,eps->V);
329: }
330: for (i=eps->nconv;i<=k && i<eps->nv;i++) {
331: VecCopy(eps->AV[i],eps->V[i]);
332: }
333: eps->nconv = k;
335: EPSMonitor(eps,eps->its,eps->nconv,eps->eigr,eps->eigi,eps->errest,eps->nv);
336: if (eps->nv < eps->ncv) {
337: EPSGetStartVector(eps,k,eps->V[k],&breakdown);
338: if (breakdown) {
339: eps->reason = EPS_DIVERGED_BREAKDOWN;
340: PetscInfo(eps,"Unable to generate more start vectors\n");
341: }
342: }
343: if (eps->its >= eps->max_it) eps->reason = EPS_DIVERGED_ITS;
344: if (eps->nconv >= eps->nev) eps->reason = EPS_CONVERGED_TOL;
345: }
346:
347: #if defined(PETSC_USE_COMPLEX)
348: for (i=0;i<eps->nconv;i++) eps->eigi[i]=0.0;
349: #endif
351: PetscFree(U);
352: PetscFree(work);
353: return(0);
354: }
358: PetscErrorCode EPSSetFromOptions_ARNOLDI(EPS eps)
359: {
361: EPS_ARNOLDI *arnoldi = (EPS_ARNOLDI *)eps->data;
364: PetscOptionsHead("ARNOLDI options");
365: PetscOptionsTruth("-eps_arnoldi_delayed","Arnoldi with delayed reorthogonalization","EPSArnoldiSetDelayed",PETSC_FALSE,&arnoldi->delayed,PETSC_NULL);
366: PetscOptionsTail();
367: return(0);
368: }
373: PetscErrorCode EPSArnoldiSetDelayed_ARNOLDI(EPS eps,PetscTruth delayed)
374: {
375: EPS_ARNOLDI *arnoldi = (EPS_ARNOLDI *)eps->data;
378: arnoldi->delayed = delayed;
379: return(0);
380: }
385: /*@
386: EPSArnoldiSetDelayed - Activates or deactivates delayed reorthogonalization
387: in the Arnoldi iteration.
389: Collective on EPS
391: Input Parameters:
392: + eps - the eigenproblem solver context
393: - delayed - boolean flag for toggling delayed reorthogonalization
395: Options Database Key:
396: . -eps_arnoldi_delayed - Activates delayed reorthogonalization in Arnoldi
397:
398: Note:
399: Delayed reorthogonalization is an aggressive optimization for the Arnoldi
400: eigensolver than may provide better scalability, but it is sometimes less
401: robust than the default algorithm.
403: Level: advanced
405: .seealso: EPSArnoldiGetDelayed()
406: @*/
407: PetscErrorCode EPSArnoldiSetDelayed(EPS eps,PetscTruth delayed)
408: {
409: PetscErrorCode ierr, (*f)(EPS,PetscTruth);
413: PetscObjectQueryFunction((PetscObject)eps,"EPSArnoldiSetDelayed_C",(void (**)())&f);
414: if (f) {
415: (*f)(eps,delayed);
416: }
417: return(0);
418: }
423: PetscErrorCode EPSArnoldiGetDelayed_ARNOLDI(EPS eps,PetscTruth *delayed)
424: {
425: EPS_ARNOLDI *arnoldi = (EPS_ARNOLDI *)eps->data;
428: *delayed = arnoldi->delayed;
429: return(0);
430: }
435: /*@C
436: EPSArnoldiGetDelayed - Gets the type of reorthogonalization used during the Arnoldi
437: iteration.
439: Collective on EPS
441: Input Parameter:
442: . eps - the eigenproblem solver context
444: Input Parameter:
445: . delayed - boolean flag indicating if delayed reorthogonalization has been enabled
447: Level: advanced
449: .seealso: EPSArnoldiSetDelayed()
450: @*/
451: PetscErrorCode EPSArnoldiGetDelayed(EPS eps,PetscTruth *delayed)
452: {
453: PetscErrorCode ierr, (*f)(EPS,PetscTruth*);
457: PetscObjectQueryFunction((PetscObject)eps,"EPSArnoldiGetDelayed_C",(void (**)())&f);
458: if (f) {
459: (*f)(eps,delayed);
460: }
461: return(0);
462: }
466: PetscErrorCode EPSView_ARNOLDI(EPS eps,PetscViewer viewer)
467: {
469: PetscTruth isascii;
470: EPS_ARNOLDI *arnoldi = (EPS_ARNOLDI *)eps->data;
473: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
474: if (!isascii) {
475: SETERRQ1(1,"Viewer type %s not supported for EPSARNOLDI",((PetscObject)viewer)->type_name);
476: }
477: if (arnoldi->delayed) {
478: PetscViewerASCIIPrintf(viewer,"using delayed reorthogonalization\n");
479: }
480: return(0);
481: }
483: EXTERN PetscErrorCode EPSSolve_TS_ARNOLDI(EPS);
488: PetscErrorCode EPSCreate_ARNOLDI(EPS eps)
489: {
491: EPS_ARNOLDI *arnoldi;
492:
494: PetscNew(EPS_ARNOLDI,&arnoldi);
495: PetscLogObjectMemory(eps,sizeof(EPS_ARNOLDI));
496: eps->data = (void *)arnoldi;
497: eps->ops->solve = EPSSolve_ARNOLDI;
498: eps->ops->solvets = EPSSolve_TS_ARNOLDI;
499: eps->ops->setup = EPSSetUp_ARNOLDI;
500: eps->ops->setfromoptions = EPSSetFromOptions_ARNOLDI;
501: eps->ops->destroy = EPSDestroy_Default;
502: eps->ops->view = EPSView_ARNOLDI;
503: eps->ops->backtransform = EPSBackTransform_Default;
504: eps->ops->computevectors = EPSComputeVectors_Schur;
505: arnoldi->delayed = PETSC_FALSE;
506: PetscObjectComposeFunctionDynamic((PetscObject)eps,"EPSArnoldiSetDelayed_C","EPSArnoldiSetDelayed_ARNOLDI",EPSArnoldiSetDelayed_ARNOLDI);
507: PetscObjectComposeFunctionDynamic((PetscObject)eps,"EPSArnoldiGetDelayed_C","EPSArnoldiGetDelayed_ARNOLDI",EPSArnoldiGetDelayed_ARNOLDI);
508: return(0);
509: }