Actual source code: arnoldi.c
1: /*
3: SLEPc eigensolver: "arnoldi"
5: Method: Explicitly Restarted Arnoldi
7: Algorithm:
9: Arnoldi method with explicit restart and deflation.
11: References:
13: [1] "Arnoldi Methods in SLEPc", SLEPc Technical Report STR-4,
14: available at http://www.grycap.upv.es/slepc.
16: Last update: Feb 2009
18: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
19: SLEPc - Scalable Library for Eigenvalue Problem Computations
20: Copyright (c) 2002-2012, Universitat Politecnica de Valencia, Spain
22: This file is part of SLEPc.
23:
24: SLEPc is free software: you can redistribute it and/or modify it under the
25: terms of version 3 of the GNU Lesser General Public License as published by
26: the Free Software Foundation.
28: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
29: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
30: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
31: more details.
33: You should have received a copy of the GNU Lesser General Public License
34: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
35: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
36: */
38: #include <slepc-private/epsimpl.h> /*I "slepceps.h" I*/
39: #include <slepcblaslapack.h>
41: PetscErrorCode EPSSolve_Arnoldi(EPS);
43: typedef struct {
44: PetscBool delayed;
45: } EPS_ARNOLDI;
49: PetscErrorCode EPSSetUp_Arnoldi(EPS eps)
50: {
54: if (eps->ncv) { /* ncv set */
55: if (eps->ncv<eps->nev) SETERRQ(((PetscObject)eps)->comm,1,"The value of ncv must be at least nev");
56: }
57: else if (eps->mpd) { /* mpd set */
58: eps->ncv = PetscMin(eps->n,eps->nev+eps->mpd);
59: }
60: else { /* neither set: defaults depend on nev being small or large */
61: if (eps->nev<500) eps->ncv = PetscMin(eps->n,PetscMax(2*eps->nev,eps->nev+15));
62: else { eps->mpd = 500; eps->ncv = PetscMin(eps->n,eps->nev+eps->mpd); }
63: }
64: if (!eps->mpd) eps->mpd = eps->ncv;
65: if (eps->ncv>eps->nev+eps->mpd) SETERRQ(((PetscObject)eps)->comm,1,"The value of ncv must not be larger than nev+mpd");
66: if (!eps->max_it) eps->max_it = PetscMax(100,2*eps->n/eps->ncv);
67: if (!eps->which) { EPSDefaultSetWhich(eps); }
68: if (eps->ishermitian && (eps->which==EPS_LARGEST_IMAGINARY || eps->which==EPS_SMALLEST_IMAGINARY)) SETERRQ(((PetscObject)eps)->comm,1,"Wrong value of eps->which");
70: if (!eps->extraction) {
71: EPSSetExtraction(eps,EPS_RITZ);
72: }
73: if (eps->arbit_func) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_SUP,"Arbitrary selection of eigenpairs not supported in this solver");
75: EPSAllocateSolution(eps);
76: DSSetType(eps->ds,DSNHEP);
77: if (eps->extraction==EPS_REFINED || eps->extraction==EPS_REFINED_HARMONIC) {
78: DSSetRefined(eps->ds,PETSC_TRUE);
79: }
80: DSSetExtraRow(eps->ds,PETSC_TRUE);
81: DSAllocate(eps->ds,eps->ncv+1);
82: EPSDefaultGetWork(eps,1);
84: /* dispatch solve method */
85: if (eps->leftvecs) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_SUP,"Left vectors not supported in this solver");
86: eps->ops->solve = EPSSolve_Arnoldi;
87: return(0);
88: }
92: /*
93: EPSDelayedArnoldi - This function is equivalent to EPSBasicArnoldi but
94: performs the computation in a different way. The main idea is that
95: reorthogonalization is delayed to the next Arnoldi step. This version is
96: more scalable but in some cases convergence may stagnate.
97: */
98: PetscErrorCode EPSDelayedArnoldi(EPS eps,PetscScalar *H,PetscInt ldh,Vec *V,PetscInt k,PetscInt *M,Vec f,PetscReal *beta,PetscBool *breakdown)
99: {
101: PetscInt i,j,m=*M;
102: Vec u,t;
103: PetscScalar shh[100],*lhh,dot,dot2;
104: PetscReal norm1=0.0,norm2;
107: if (m<=100) lhh = shh;
108: else { PetscMalloc(m*sizeof(PetscScalar),&lhh); }
109: VecDuplicate(f,&u);
110: VecDuplicate(f,&t);
112: for (j=k;j<m;j++) {
113: STApply(eps->OP,V[j],f);
114: IPOrthogonalize(eps->ip,0,PETSC_NULL,eps->nds,PETSC_NULL,eps->defl,f,PETSC_NULL,PETSC_NULL,PETSC_NULL);
116: IPMInnerProductBegin(eps->ip,f,j+1,V,H+ldh*j);
117: if (j>k) {
118: IPMInnerProductBegin(eps->ip,V[j],j,V,lhh);
119: IPInnerProductBegin(eps->ip,V[j],V[j],&dot);
120: }
121: if (j>k+1) {
122: IPNormBegin(eps->ip,u,&norm2);
123: VecDotBegin(u,V[j-2],&dot2);
124: }
125:
126: IPMInnerProductEnd(eps->ip,f,j+1,V,H+ldh*j);
127: if (j>k) {
128: IPMInnerProductEnd(eps->ip,V[j],j,V,lhh);
129: IPInnerProductEnd(eps->ip,V[j],V[j],&dot);
130: }
131: if (j>k+1) {
132: IPNormEnd(eps->ip,u,&norm2);
133: VecDotEnd(u,V[j-2],&dot2);
134: if (PetscAbsScalar(dot2/norm2) > PETSC_MACHINE_EPSILON) {
135: *breakdown = PETSC_TRUE;
136: *M = j-1;
137: *beta = norm2;
139: if (m>100) { PetscFree(lhh); }
140: VecDestroy(&u);
141: VecDestroy(&t);
142: return(0);
143: }
144: }
145:
146: if (j>k) {
147: norm1 = PetscSqrtReal(PetscRealPart(dot));
148: for (i=0;i<j;i++)
149: H[ldh*j+i] = H[ldh*j+i]/norm1;
150: H[ldh*j+j] = H[ldh*j+j]/dot;
151:
152: VecCopy(V[j],t);
153: VecScale(V[j],1.0/norm1);
154: VecScale(f,1.0/norm1);
155: }
157: SlepcVecMAXPBY(f,1.0,-1.0,j+1,H+ldh*j,V);
159: if (j>k) {
160: SlepcVecMAXPBY(t,1.0,-1.0,j,lhh,V);
161: for (i=0;i<j;i++)
162: H[ldh*(j-1)+i] += lhh[i];
163: }
165: if (j>k+1) {
166: VecCopy(u,V[j-1]);
167: VecScale(V[j-1],1.0/norm2);
168: H[ldh*(j-2)+j-1] = norm2;
169: }
171: if (j<m-1) {
172: VecCopy(f,V[j+1]);
173: VecCopy(t,u);
174: }
175: }
177: IPNorm(eps->ip,t,&norm2);
178: VecScale(t,1.0/norm2);
179: VecCopy(t,V[m-1]);
180: H[ldh*(m-2)+m-1] = norm2;
182: IPMInnerProduct(eps->ip,f,m,V,lhh);
183:
184: SlepcVecMAXPBY(f,1.0,-1.0,m,lhh,V);
185: for (i=0;i<m;i++)
186: H[ldh*(m-1)+i] += lhh[i];
188: IPNorm(eps->ip,f,beta);
189: VecScale(f,1.0 / *beta);
190: *breakdown = PETSC_FALSE;
191:
192: if (m>100) { PetscFree(lhh); }
193: VecDestroy(&u);
194: VecDestroy(&t);
195: return(0);
196: }
200: /*
201: EPSDelayedArnoldi1 - This function is similar to EPSDelayedArnoldi1,
202: but without reorthogonalization (only delayed normalization).
203: */
204: PetscErrorCode EPSDelayedArnoldi1(EPS eps,PetscScalar *H,PetscInt ldh,Vec *V,PetscInt k,PetscInt *M,Vec f,PetscReal *beta,PetscBool *breakdown)
205: {
207: PetscInt i,j,m=*M;
208: PetscScalar dot;
209: PetscReal norm=0.0;
212: for (j=k;j<m;j++) {
213: STApply(eps->OP,V[j],f);
214: IPOrthogonalize(eps->ip,0,PETSC_NULL,eps->nds,PETSC_NULL,eps->defl,f,PETSC_NULL,PETSC_NULL,PETSC_NULL);
216: IPMInnerProductBegin(eps->ip,f,j+1,V,H+ldh*j);
217: if (j>k) {
218: IPInnerProductBegin(eps->ip,V[j],V[j],&dot);
219: }
220:
221: IPMInnerProductEnd(eps->ip,f,j+1,V,H+ldh*j);
222: if (j>k) {
223: IPInnerProductEnd(eps->ip,V[j],V[j],&dot);
224: }
225:
226: if (j>k) {
227: norm = PetscSqrtReal(PetscRealPart(dot));
228: VecScale(V[j],1.0/norm);
229: H[ldh*(j-1)+j] = norm;
231: for (i=0;i<j;i++)
232: H[ldh*j+i] = H[ldh*j+i]/norm;
233: H[ldh*j+j] = H[ldh*j+j]/dot;
234: VecScale(f,1.0/norm);
235: }
237: SlepcVecMAXPBY(f,1.0,-1.0,j+1,H+ldh*j,V);
239: if (j<m-1) {
240: VecCopy(f,V[j+1]);
241: }
242: }
244: IPNorm(eps->ip,f,beta);
245: VecScale(f,1.0 / *beta);
246: *breakdown = PETSC_FALSE;
247: return(0);
248: }
252: PetscErrorCode EPSSolve_Arnoldi(EPS eps)
253: {
254: PetscErrorCode ierr;
255: PetscInt k,nv,ld;
256: Vec f=eps->work[0];
257: PetscScalar *H,*U,*X;
258: PetscReal beta,gamma=1.0;
259: PetscBool breakdown,harmonic,refined;
260: IPOrthogRefineType orthog_ref;
261: EPS_ARNOLDI *arnoldi = (EPS_ARNOLDI*)eps->data;
264: DSGetLeadingDimension(eps->ds,&ld);
265: DSGetRefined(eps->ds,&refined);
266: harmonic = (eps->extraction==EPS_HARMONIC || eps->extraction==EPS_REFINED_HARMONIC)?PETSC_TRUE:PETSC_FALSE;
267: IPGetOrthogonalization(eps->ip,PETSC_NULL,&orthog_ref,PETSC_NULL);
269: /* Get the starting Arnoldi vector */
270: EPSGetStartVector(eps,0,eps->V[0],PETSC_NULL);
271:
272: /* Restart loop */
273: while (eps->reason == EPS_CONVERGED_ITERATING) {
274: eps->its++;
276: /* Compute an nv-step Arnoldi factorization */
277: nv = PetscMin(eps->nconv+eps->mpd,eps->ncv);
278: DSSetDimensions(eps->ds,nv,PETSC_IGNORE,eps->nconv,0);
279: DSGetArray(eps->ds,DS_MAT_A,&H);
280: if (!arnoldi->delayed) {
281: EPSBasicArnoldi(eps,PETSC_FALSE,H,ld,eps->V,eps->nconv,&nv,f,&beta,&breakdown);
282: H[nv+(nv-1)*ld] = beta;
283: } else if (orthog_ref == IP_ORTHOG_REFINE_NEVER) {
284: EPSDelayedArnoldi1(eps,H,ld,eps->V,eps->nconv,&nv,f,&beta,&breakdown);
285: } else {
286: EPSDelayedArnoldi(eps,H,ld,eps->V,eps->nconv,&nv,f,&beta,&breakdown);
287: }
288: DSRestoreArray(eps->ds,DS_MAT_A,&H);
289: DSSetState(eps->ds,DS_STATE_INTERMEDIATE);
291: /* Compute translation of Krylov decomposition if harmonic extraction used */
292: if (harmonic) {
293: DSTranslateHarmonic(eps->ds,eps->target,beta,PETSC_FALSE,PETSC_NULL,&gamma);
294: }
296: /* Solve projected problem */
297: DSSolve(eps->ds,eps->eigr,eps->eigi);
298: DSSort(eps->ds,eps->eigr,eps->eigi,PETSC_NULL,PETSC_NULL,PETSC_NULL);
299: DSUpdateExtraRow(eps->ds);
301: /* Check convergence */
302: EPSKrylovConvergence(eps,PETSC_FALSE,eps->nconv,nv-eps->nconv,eps->V,nv,beta,gamma,&k);
303: if (refined) {
304: DSGetArray(eps->ds,DS_MAT_X,&X);
305: SlepcVecMAXPBY(eps->V[k],0.0,1.0,nv,X+k*ld,eps->V);
306: DSRestoreArray(eps->ds,DS_MAT_X,&X);
307: DSGetArray(eps->ds,DS_MAT_Q,&U);
308: SlepcUpdateVectors(nv,eps->V,eps->nconv,PetscMin(k,nv),U,ld,PETSC_FALSE);
309: DSRestoreArray(eps->ds,DS_MAT_Q,&U);
310: IPOrthogonalize(eps->ip,0,PETSC_NULL,k,PETSC_NULL,eps->V,eps->V[k],PETSC_NULL,PETSC_NULL,PETSC_NULL);
311: } else {
312: DSGetArray(eps->ds,DS_MAT_Q,&U);
313: SlepcUpdateVectors(nv,eps->V,eps->nconv,PetscMin(k+1,nv),U,ld,PETSC_FALSE);
314: DSRestoreArray(eps->ds,DS_MAT_Q,&U);
315: }
316: eps->nconv = k;
318: EPSMonitor(eps,eps->its,eps->nconv,eps->eigr,eps->eigi,eps->errest,nv);
319: if (breakdown) {
320: PetscInfo2(eps,"Breakdown in Arnoldi method (it=%D norm=%G)\n",eps->its,beta);
321: EPSGetStartVector(eps,k,eps->V[k],&breakdown);
322: if (breakdown) {
323: eps->reason = EPS_DIVERGED_BREAKDOWN;
324: PetscInfo(eps,"Unable to generate more start vectors\n");
325: }
326: }
327: if (eps->its >= eps->max_it) eps->reason = EPS_DIVERGED_ITS;
328: if (eps->nconv >= eps->nev) eps->reason = EPS_CONVERGED_TOL;
329: }
330:
331: /* truncate Schur decomposition and change the state to raw so that
332: PSVectors() computes eigenvectors from scratch */
333: DSSetDimensions(eps->ds,eps->nconv,PETSC_IGNORE,0,0);
334: DSSetState(eps->ds,DS_STATE_RAW);
335: return(0);
336: }
340: PetscErrorCode EPSSetFromOptions_Arnoldi(EPS eps)
341: {
343: PetscBool set,val;
344: EPS_ARNOLDI *arnoldi = (EPS_ARNOLDI*)eps->data;
347: PetscOptionsHead("EPS Arnoldi Options");
348: PetscOptionsBool("-eps_arnoldi_delayed","Arnoldi with delayed reorthogonalization","EPSArnoldiSetDelayed",arnoldi->delayed,&val,&set);
349: if (set) {
350: EPSArnoldiSetDelayed(eps,val);
351: }
352: PetscOptionsTail();
353: return(0);
354: }
356: EXTERN_C_BEGIN
359: PetscErrorCode EPSArnoldiSetDelayed_Arnoldi(EPS eps,PetscBool delayed)
360: {
361: EPS_ARNOLDI *arnoldi = (EPS_ARNOLDI*)eps->data;
364: arnoldi->delayed = delayed;
365: return(0);
366: }
367: EXTERN_C_END
371: /*@
372: EPSArnoldiSetDelayed - Activates or deactivates delayed reorthogonalization
373: in the Arnoldi iteration.
375: Logically Collective on EPS
377: Input Parameters:
378: + eps - the eigenproblem solver context
379: - delayed - boolean flag
381: Options Database Key:
382: . -eps_arnoldi_delayed - Activates delayed reorthogonalization in Arnoldi
383:
384: Note:
385: Delayed reorthogonalization is an aggressive optimization for the Arnoldi
386: eigensolver than may provide better scalability, but sometimes makes the
387: solver converge less than the default algorithm.
389: Level: advanced
391: .seealso: EPSArnoldiGetDelayed()
392: @*/
393: PetscErrorCode EPSArnoldiSetDelayed(EPS eps,PetscBool delayed)
394: {
400: PetscTryMethod(eps,"EPSArnoldiSetDelayed_C",(EPS,PetscBool),(eps,delayed));
401: return(0);
402: }
404: EXTERN_C_BEGIN
407: PetscErrorCode EPSArnoldiGetDelayed_Arnoldi(EPS eps,PetscBool *delayed)
408: {
409: EPS_ARNOLDI *arnoldi = (EPS_ARNOLDI*)eps->data;
412: *delayed = arnoldi->delayed;
413: return(0);
414: }
415: EXTERN_C_END
419: /*@C
420: EPSArnoldiGetDelayed - Gets the type of reorthogonalization used during the Arnoldi
421: iteration.
423: Not Collective
425: Input Parameter:
426: . eps - the eigenproblem solver context
428: Input Parameter:
429: . delayed - boolean flag indicating if delayed reorthogonalization has been enabled
431: Level: advanced
433: .seealso: EPSArnoldiSetDelayed()
434: @*/
435: PetscErrorCode EPSArnoldiGetDelayed(EPS eps,PetscBool *delayed)
436: {
442: PetscTryMethod(eps,"EPSArnoldiGetDelayed_C",(EPS,PetscBool*),(eps,delayed));
443: return(0);
444: }
448: PetscErrorCode EPSDestroy_Arnoldi(EPS eps)
449: {
453: PetscFree(eps->data);
454: PetscObjectComposeFunctionDynamic((PetscObject)eps,"EPSArnoldiSetDelayed_C","",PETSC_NULL);
455: PetscObjectComposeFunctionDynamic((PetscObject)eps,"EPSArnoldiGetDelayed_C","",PETSC_NULL);
456: return(0);
457: }
461: PetscErrorCode EPSView_Arnoldi(EPS eps,PetscViewer viewer)
462: {
464: PetscBool isascii;
465: EPS_ARNOLDI *arnoldi = (EPS_ARNOLDI*)eps->data;
468: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
469: if (!isascii) SETERRQ1(((PetscObject)eps)->comm,1,"Viewer type %s not supported for EPS Arnoldi",((PetscObject)viewer)->type_name);
470: if (arnoldi->delayed) {
471: PetscViewerASCIIPrintf(viewer," Arnoldi: using delayed reorthogonalization\n");
472: }
473: return(0);
474: }
476: EXTERN_C_BEGIN
479: PetscErrorCode EPSCreate_Arnoldi(EPS eps)
480: {
482:
484: PetscNewLog(eps,EPS_ARNOLDI,&eps->data);
485: eps->ops->setup = EPSSetUp_Arnoldi;
486: eps->ops->setfromoptions = EPSSetFromOptions_Arnoldi;
487: eps->ops->destroy = EPSDestroy_Arnoldi;
488: eps->ops->reset = EPSReset_Default;
489: eps->ops->view = EPSView_Arnoldi;
490: eps->ops->backtransform = EPSBackTransform_Default;
491: eps->ops->computevectors = EPSComputeVectors_Schur;
492: PetscObjectComposeFunctionDynamic((PetscObject)eps,"EPSArnoldiSetDelayed_C","EPSArnoldiSetDelayed_Arnoldi",EPSArnoldiSetDelayed_Arnoldi);
493: PetscObjectComposeFunctionDynamic((PetscObject)eps,"EPSArnoldiGetDelayed_C","EPSArnoldiGetDelayed_Arnoldi",EPSArnoldiGetDelayed_Arnoldi);
494: return(0);
495: }
496: EXTERN_C_END