Actual source code: arnoldi.c
1: /*
3: SLEPc eigensolver: "arnoldi"
5: Method: Explicitly Restarted Arnoldi
7: Algorithm:
9: Arnoldi method with explicit restart and deflation.
11: References:
13: [1] "Arnoldi Methods in SLEPc", SLEPc Technical Report STR-4,
14: available at http://www.grycap.upv.es/slepc.
16: Last update: Oct 2006
18: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
19: SLEPc - Scalable Library for Eigenvalue Problem Computations
20: Copyright (c) 2002-2007, Universidad Politecnica de Valencia, Spain
22: This file is part of SLEPc. See the README file for conditions of use
23: and additional information.
24: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
25: */
27: #include src/eps/epsimpl.h
28: #include slepcblaslapack.h
30: typedef struct {
31: PetscTruth delayed;
32: } EPS_ARNOLDI;
36: PetscErrorCode EPSSetUp_ARNOLDI(EPS eps)
37: {
39: PetscInt N;
42: VecGetSize(eps->vec_initial,&N);
43: if (eps->ncv) {
44: if (eps->ncv<eps->nev) SETERRQ(1,"The value of ncv must be at least nev");
45: }
46: else eps->ncv = PetscMin(N,PetscMax(2*eps->nev,eps->nev+15));
47: if (!eps->max_it) eps->max_it = PetscMax(100,2*N/eps->ncv);
48: if (eps->ishermitian && (eps->which==EPS_LARGEST_IMAGINARY || eps->which==EPS_SMALLEST_IMAGINARY))
49: SETERRQ(1,"Wrong value of eps->which");
51: EPSAllocateSolution(eps);
52: PetscFree(eps->T);
53: PetscMalloc(eps->ncv*eps->ncv*sizeof(PetscScalar),&eps->T);
54: if (eps->solverclass==EPS_TWO_SIDE) {
55: PetscFree(eps->Tl);
56: PetscMalloc(eps->ncv*eps->ncv*sizeof(PetscScalar),&eps->Tl);
57: }
58: EPSDefaultGetWork(eps,2);
59: return(0);
60: }
64: /*
65: EPSBasicArnoldi - Computes an m-step Arnoldi factorization. The first k
66: columns are assumed to be locked and therefore they are not modified. On
67: exit, the following relation is satisfied:
69: OP * V - V * H = f * e_m^T
71: where the columns of V are the Arnoldi vectors (which are B-orthonormal),
72: H is an upper Hessenberg matrix, f is the residual vector and e_m is
73: the m-th vector of the canonical basis. The vector f is B-orthogonal to
74: the columns of V. On exit, beta contains the B-norm of f and the next
75: Arnoldi vector can be computed as v_{m+1} = f / beta.
76: */
77: PetscErrorCode EPSBasicArnoldi(EPS eps,PetscTruth trans,PetscScalar *H,Vec *V,int k,int *M,Vec f,PetscReal *beta,PetscTruth *breakdown)
78: {
80: int j,m = *M;
81: PetscReal norm;
84: for (j=k;j<m-1;j++) {
85: if (trans) { STApplyTranspose(eps->OP,V[j],V[j+1]); }
86: else { STApply(eps->OP,V[j],V[j+1]); }
87: IPOrthogonalize(eps->ip,eps->nds,PETSC_NULL,eps->DS,V[j+1],PETSC_NULL,PETSC_NULL,PETSC_NULL,eps->work[0]);
88: IPOrthogonalize(eps->ip,j+1,PETSC_NULL,V,V[j+1],H+m*j,&norm,breakdown,eps->work[0]);
89: H[(m+1)*j+1] = norm;
90: if (*breakdown) {
91: *M = j+1;
92: *beta = norm;
93: return(0);
94: } else {
95: VecScale(V[j+1],1/norm);
96: }
97: }
98: STApply(eps->OP,V[m-1],f);
99: IPOrthogonalize(eps->ip,eps->nds,PETSC_NULL,eps->DS,f,PETSC_NULL,PETSC_NULL,PETSC_NULL,eps->work[0]);
100: IPOrthogonalize(eps->ip,m,PETSC_NULL,V,f,H+m*(m-1),beta,PETSC_NULL,eps->work[0]);
101: return(0);
102: }
106: /*
107: EPSDelayedArnoldi - This function is equivalent to EPSBasicArnoldi but
108: performs the computation in a different way. The main idea is that
109: reorthogonalization is delayed to the next Arnoldi step. This version is
110: more scalable but in some cases convergence may stagnate.
111: */
112: PetscErrorCode EPSDelayedArnoldi(EPS eps,PetscScalar *H,Vec *V,int k,int *M,Vec f,PetscReal *beta,PetscTruth *breakdown)
113: {
115: int i,j,m=*M;
116: Vec w,u,t;
117: PetscScalar shh[100],*lhh,dot,dot2;
118: PetscReal norm1=0.0,norm2;
121: if (m<=100) lhh = shh;
122: else { PetscMalloc(m*sizeof(PetscScalar),&lhh); }
123: VecDuplicate(f,&w);
124: VecDuplicate(f,&u);
125: VecDuplicate(f,&t);
127: for (j=k;j<m;j++) {
128: STApply(eps->OP,V[j],f);
129: IPOrthogonalize(eps->ip,eps->nds,PETSC_NULL,eps->DS,f,PETSC_NULL,PETSC_NULL,PETSC_NULL,eps->work[0]);
131: IPMInnerProductBegin(eps->ip,f,j+1,V,H+m*j);
132: if (j>k) {
133: IPMInnerProductBegin(eps->ip,V[j],j,V,lhh);
134: IPInnerProductBegin(eps->ip,V[j],V[j],&dot);
135: }
136: if (j>k+1) {
137: IPNormBegin(eps->ip,u,&norm2);
138: VecDotBegin(u,V[j-2],&dot2);
139: }
140:
141: IPMInnerProductEnd(eps->ip,f,j+1,V,H+m*j);
142: if (j>k) {
143: IPMInnerProductEnd(eps->ip,V[j],j,V,lhh);
144: IPInnerProductEnd(eps->ip,V[j],V[j],&dot);
145: }
146: if (j>k+1) {
147: IPNormEnd(eps->ip,u,&norm2);
148: VecDotEnd(u,V[j-2],&dot2);
149: if (PetscAbsScalar(dot2/norm2) > PETSC_MACHINE_EPSILON) {
150: *breakdown = PETSC_TRUE;
151: *M = j-1;
152: *beta = norm2;
154: if (m>100) { PetscFree(lhh); }
155: VecDestroy(w);
156: VecDestroy(u);
157: VecDestroy(t);
158: return(0);
159: }
160: }
161:
162: if (j>k) {
163: norm1 = sqrt(PetscRealPart(dot));
164: for (i=0;i<j;i++)
165: H[m*j+i] = H[m*j+i]/norm1;
166: H[m*j+j] = H[m*j+j]/dot;
167:
168: VecCopy(V[j],t);
169: VecScale(V[j],1.0/norm1);
170: VecScale(f,1.0/norm1);
171: }
173: VecSet(w,0.0);
174: VecMAXPY(w,j+1,H+m*j,V);
175: VecAXPY(f,-1.0,w);
177: if (j>k) {
178: VecSet(w,0.0);
179: VecMAXPY(w,j,lhh,V);
180: VecAXPY(t,-1.0,w);
181: for (i=0;i<j;i++)
182: H[m*(j-1)+i] += lhh[i];
183: }
185: if (j>k+1) {
186: VecCopy(u,V[j-1]);
187: VecScale(V[j-1],1.0/norm2);
188: H[m*(j-2)+j-1] = norm2;
189: }
191: if (j<m-1) {
192: VecCopy(f,V[j+1]);
193: VecCopy(t,u);
194: }
195: }
197: IPNorm(eps->ip,t,&norm2);
198: VecScale(t,1.0/norm2);
199: VecCopy(t,V[m-1]);
200: H[m*(m-2)+m-1] = norm2;
202: IPMInnerProduct(eps->ip,f,m,V,lhh);
203:
204: VecSet(w,0.0);
205: VecMAXPY(w,m,lhh,V);
206: VecAXPY(f,-1.0,w);
207: for (i=0;i<m;i++)
208: H[m*(m-1)+i] += lhh[i];
210: IPNorm(eps->ip,f,beta);
211: VecScale(f,1.0 / *beta);
212: *breakdown = PETSC_FALSE;
213:
214: if (m>100) { PetscFree(lhh); }
215: VecDestroy(w);
216: VecDestroy(u);
217: VecDestroy(t);
219: return(0);
220: }
224: /*
225: EPSDelayedArnoldi1 - This function is similar to EPSDelayedArnoldi1,
226: but without reorthogonalization (only delayed normalization).
227: */
228: PetscErrorCode EPSDelayedArnoldi1(EPS eps,PetscScalar *H,Vec *V,int k,int *M,Vec f,PetscReal *beta,PetscTruth *breakdown)
229: {
231: int i,j,m=*M;
232: Vec w;
233: PetscScalar dot;
234: PetscReal norm=0.0;
237: VecDuplicate(f,&w);
239: for (j=k;j<m;j++) {
240: STApply(eps->OP,V[j],f);
241: IPOrthogonalize(eps->ip,eps->nds,PETSC_NULL,eps->DS,f,PETSC_NULL,PETSC_NULL,PETSC_NULL,eps->work[0]);
243: IPMInnerProductBegin(eps->ip,f,j+1,V,H+m*j);
244: if (j>k) {
245: IPInnerProductBegin(eps->ip,V[j],V[j],&dot);
246: }
247:
248: IPMInnerProductEnd(eps->ip,f,j+1,V,H+m*j);
249: if (j>k) {
250: IPInnerProductEnd(eps->ip,V[j],V[j],&dot);
251: }
252:
253: if (j>k) {
254: norm = sqrt(PetscRealPart(dot));
255: VecScale(V[j],1.0/norm);
256: H[m*(j-1)+j] = norm;
258: for (i=0;i<j;i++)
259: H[m*j+i] = H[m*j+i]/norm;
260: H[m*j+j] = H[m*j+j]/dot;
261: VecScale(f,1.0/norm);
262: }
264: VecSet(w,0.0);
265: VecMAXPY(w,j+1,H+m*j,V);
266: VecAXPY(f,-1.0,w);
268: if (j<m-1) {
269: VecCopy(f,V[j+1]);
270: }
271: }
273: IPNorm(eps->ip,f,beta);
274: VecScale(f,1.0 / *beta);
275: *breakdown = PETSC_FALSE;
276:
277: VecDestroy(w);
278: return(0);
279: }
283: /*
284: EPSArnoldiResiduals - Computes the 2-norm of the residual vectors from
285: the information provided by the m-step Arnoldi factorization,
287: OP * V - V * H = f * e_m^T
289: For the approximate eigenpair (k_i,V*y_i), the residual norm is computed as
290: |beta*y(end,i)| where beta is the norm of f and y is the corresponding
291: eigenvector of H.
292: */
293: PetscErrorCode ArnoldiResiduals(PetscScalar *H,int ldh,PetscScalar *U,PetscReal beta,int nconv,int ncv,PetscScalar *eigr,PetscScalar *eigi,PetscReal *errest,PetscScalar *work)
294: {
295: #if defined(SLEPC_MISSING_LAPACK_TREVC)
297: SETERRQ(PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable.");
298: #else
300: int i,mout,info;
301: PetscScalar *Y=work+4*ncv;
302: PetscReal w;
303: #if defined(PETSC_USE_COMPLEX)
304: PetscReal *rwork=(PetscReal*)(work+3*ncv);
305: #endif
309: /* Compute eigenvectors Y of H */
310: PetscMemcpy(Y,U,ncv*ncv*sizeof(PetscScalar));
311: PetscLogEventBegin(EPS_Dense,0,0,0,0);
312: #if !defined(PETSC_USE_COMPLEX)
313: LAPACKtrevc_("R","B",PETSC_NULL,&ncv,H,&ldh,PETSC_NULL,&ncv,Y,&ncv,&ncv,&mout,work,&info,1,1);
314: #else
315: LAPACKtrevc_("R","B",PETSC_NULL,&ncv,H,&ldh,PETSC_NULL,&ncv,Y,&ncv,&ncv,&mout,work,rwork,&info,1,1);
316: #endif
317: PetscLogEventEnd(EPS_Dense,0,0,0,0);
318: if (info) SETERRQ1(PETSC_ERR_LIB,"Error in Lapack xTREVC %i",info);
320: /* Compute residual norm estimates as beta*abs(Y(m,:)) */
321: for (i=nconv;i<ncv;i++) {
322: #if !defined(PETSC_USE_COMPLEX)
323: if (eigi[i] != 0 && i<ncv-1) {
324: errest[i] = beta*SlepcAbsEigenvalue(Y[i*ncv+ncv-1],Y[(i+1)*ncv+ncv-1]);
325: w = SlepcAbsEigenvalue(eigr[i],eigi[i]);
326: if (w > errest[i])
327: errest[i] = errest[i] / w;
328: errest[i+1] = errest[i];
329: i++;
330: } else
331: #endif
332: {
333: errest[i] = beta*PetscAbsScalar(Y[i*ncv+ncv-1]);
334: w = PetscAbsScalar(eigr[i]);
335: if (w > errest[i])
336: errest[i] = errest[i] / w;
337: }
338: }
339: return(0);
340: #endif
341: }
345: PetscErrorCode EPSSolve_ARNOLDI(EPS eps)
346: {
348: int i,k;
349: Vec f=eps->work[1];
350: PetscScalar *H=eps->T,*U,*work;
351: PetscReal beta;
352: PetscTruth breakdown;
353: IPOrthogonalizationRefinementType orthog_ref;
354: EPS_ARNOLDI *arnoldi = (EPS_ARNOLDI *)eps->data;
357: PetscMemzero(eps->T,eps->ncv*eps->ncv*sizeof(PetscScalar));
358: PetscMalloc(eps->ncv*eps->ncv*sizeof(PetscScalar),&U);
359: PetscMalloc((eps->ncv+4)*eps->ncv*sizeof(PetscScalar),&work);
360:
361: IPGetOrthogonalization(eps->ip,PETSC_NULL,&orthog_ref,PETSC_NULL);
363: /* Get the starting Arnoldi vector */
364: EPSGetStartVector(eps,0,eps->V[0],PETSC_NULL);
365:
366: /* Restart loop */
367: while (eps->reason == EPS_CONVERGED_ITERATING) {
368: eps->its++;
370: /* Compute an nv-step Arnoldi factorization */
371: eps->nv = eps->ncv;
372: if (!arnoldi->delayed) {
373: EPSBasicArnoldi(eps,PETSC_FALSE,H,eps->V,eps->nconv,&eps->nv,f,&beta,&breakdown);
374: } else if (orthog_ref == IP_ORTH_REFINE_NEVER) {
375: EPSDelayedArnoldi1(eps,H,eps->V,eps->nconv,&eps->nv,f,&beta,&breakdown);
376: } else {
377: EPSDelayedArnoldi(eps,H,eps->V,eps->nconv,&eps->nv,f,&beta,&breakdown);
378: }
380: /* Reduce H to (quasi-)triangular form, H <- U H U' */
381: PetscMemzero(U,eps->nv*eps->nv*sizeof(PetscScalar));
382: for (i=0;i<eps->nv;i++) { U[i*(eps->nv+1)] = 1.0; }
383: EPSDenseSchur(eps->nv,eps->nconv,H,eps->ncv,U,eps->eigr,eps->eigi);
385: /* Sort the remaining columns of the Schur form */
386: EPSSortDenseSchur(eps->nv,eps->nconv,H,eps->ncv,U,eps->eigr,eps->eigi,eps->which);
388: /* Compute residual norm estimates */
389: ArnoldiResiduals(H,eps->ncv,U,beta,eps->nconv,eps->nv,eps->eigr,eps->eigi,eps->errest,work);
390:
391: /* Lock converged eigenpairs and update the corresponding vectors,
392: including the restart vector: V(:,idx) = V*U(:,idx) */
393: k = eps->nconv;
394: while (k<eps->nv && eps->errest[k]<eps->tol) k++;
395: for (i=eps->nconv;i<=k && i<eps->nv;i++) {
396: VecSet(eps->AV[i],0.0);
397: VecMAXPY(eps->AV[i],eps->nv,U+eps->nv*i,eps->V);
398: }
399: for (i=eps->nconv;i<=k && i<eps->nv;i++) {
400: VecCopy(eps->AV[i],eps->V[i]);
401: }
402: eps->nconv = k;
404: EPSMonitor(eps,eps->its,eps->nconv,eps->eigr,eps->eigi,eps->errest,eps->nv);
405: if (breakdown) {
406: PetscInfo2(eps,"Breakdown in Arnoldi method (it=%i norm=%g)\n",eps->its,beta);
407: EPSGetStartVector(eps,k,eps->V[k],&breakdown);
408: if (breakdown) {
409: eps->reason = EPS_DIVERGED_BREAKDOWN;
410: PetscInfo(eps,"Unable to generate more start vectors\n");
411: }
412: }
413: if (eps->its >= eps->max_it) eps->reason = EPS_DIVERGED_ITS;
414: if (eps->nconv >= eps->nev) eps->reason = EPS_CONVERGED_TOL;
415: }
416:
417: PetscFree(U);
418: PetscFree(work);
419: return(0);
420: }
424: PetscErrorCode EPSSetFromOptions_ARNOLDI(EPS eps)
425: {
427: EPS_ARNOLDI *arnoldi = (EPS_ARNOLDI *)eps->data;
430: PetscOptionsHead("ARNOLDI options");
431: PetscOptionsTruth("-eps_arnoldi_delayed","Arnoldi with delayed reorthogonalization","EPSArnoldiSetDelayed",PETSC_FALSE,&arnoldi->delayed,PETSC_NULL);
432: PetscOptionsTail();
433: return(0);
434: }
439: PetscErrorCode EPSArnoldiSetDelayed_ARNOLDI(EPS eps,PetscTruth delayed)
440: {
441: EPS_ARNOLDI *arnoldi = (EPS_ARNOLDI *)eps->data;
444: arnoldi->delayed = delayed;
445: return(0);
446: }
451: /*@
452: EPSArnoldiSetDelayed - Activates or deactivates delayed reorthogonalization
453: in the Arnoldi iteration.
455: Collective on EPS
457: Input Parameters:
458: + eps - the eigenproblem solver context
459: - delayed - boolean flag
461: Options Database Key:
462: . -eps_arnoldi_delayed - Activates delayed reorthogonalization in Arnoldi
463:
464: Note:
465: Delayed reorthogonalization is an aggressive optimization for the Arnoldi
466: eigensolver than may provide better scalability, but sometimes makes the
467: solver converge less than the default algorithm.
469: Level: advanced
471: .seealso: EPSArnoldiGetDelayed()
472: @*/
473: PetscErrorCode EPSArnoldiSetDelayed(EPS eps,PetscTruth delayed)
474: {
475: PetscErrorCode ierr, (*f)(EPS,PetscTruth);
479: PetscObjectQueryFunction((PetscObject)eps,"EPSArnoldiSetDelayed_C",(void (**)())&f);
480: if (f) {
481: (*f)(eps,delayed);
482: }
483: return(0);
484: }
489: PetscErrorCode EPSArnoldiGetDelayed_ARNOLDI(EPS eps,PetscTruth *delayed)
490: {
491: EPS_ARNOLDI *arnoldi = (EPS_ARNOLDI *)eps->data;
494: *delayed = arnoldi->delayed;
495: return(0);
496: }
501: /*@C
502: EPSArnoldiGetDelayed - Gets the type of reorthogonalization used during the Arnoldi
503: iteration.
505: Collective on EPS
507: Input Parameter:
508: . eps - the eigenproblem solver context
510: Input Parameter:
511: . delayed - boolean flag indicating if delayed reorthogonalization has been enabled
513: Level: advanced
515: .seealso: EPSArnoldiSetDelayed()
516: @*/
517: PetscErrorCode EPSArnoldiGetDelayed(EPS eps,PetscTruth *delayed)
518: {
519: PetscErrorCode ierr, (*f)(EPS,PetscTruth*);
523: PetscObjectQueryFunction((PetscObject)eps,"EPSArnoldiGetDelayed_C",(void (**)())&f);
524: if (f) {
525: (*f)(eps,delayed);
526: }
527: return(0);
528: }
532: PetscErrorCode EPSView_ARNOLDI(EPS eps,PetscViewer viewer)
533: {
535: PetscTruth isascii;
536: EPS_ARNOLDI *arnoldi = (EPS_ARNOLDI *)eps->data;
539: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
540: if (!isascii) {
541: SETERRQ1(1,"Viewer type %s not supported for EPSARNOLDI",((PetscObject)viewer)->type_name);
542: }
543: if (arnoldi->delayed) {
544: PetscViewerASCIIPrintf(viewer,"using delayed reorthogonalization\n");
545: }
546: return(0);
547: }
549: EXTERN PetscErrorCode EPSSolve_TS_ARNOLDI(EPS);
554: PetscErrorCode EPSCreate_ARNOLDI(EPS eps)
555: {
557: EPS_ARNOLDI *arnoldi;
558:
560: PetscNew(EPS_ARNOLDI,&arnoldi);
561: PetscLogObjectMemory(eps,sizeof(EPS_ARNOLDI));
562: eps->data = (void *)arnoldi;
563: eps->ops->solve = EPSSolve_ARNOLDI;
564: eps->ops->solvets = EPSSolve_TS_ARNOLDI;
565: eps->ops->setup = EPSSetUp_ARNOLDI;
566: eps->ops->setfromoptions = EPSSetFromOptions_ARNOLDI;
567: eps->ops->destroy = EPSDestroy_Default;
568: eps->ops->view = EPSView_ARNOLDI;
569: eps->ops->backtransform = EPSBackTransform_Default;
570: eps->ops->computevectors = EPSComputeVectors_Schur;
571: arnoldi->delayed = PETSC_FALSE;
572: PetscObjectComposeFunctionDynamic((PetscObject)eps,"EPSArnoldiSetDelayed_C","EPSArnoldiSetDelayed_ARNOLDI",EPSArnoldiSetDelayed_ARNOLDI);
573: PetscObjectComposeFunctionDynamic((PetscObject)eps,"EPSArnoldiGetDelayed_C","EPSArnoldiGetDelayed_ARNOLDI",EPSArnoldiGetDelayed_ARNOLDI);
574: return(0);
575: }