Actual source code: ks-slice.c
slepc-3.6.0 2015-06-12
1: /*
3: SLEPc eigensolver: "krylovschur"
5: Method: Krylov-Schur with spectrum slicing for symmetric eigenproblems
7: References:
9: [1] R.G. Grimes et al., "A shifted block Lanczos algorithm for
10: solving sparse symmetric generalized eigenproblems", SIAM J.
11: Matrix Anal. Appl. 15(1):228-272, 1994.
13: [2] C. Campos and J.E. Roman, "Spectrum slicing strategies based
14: on restarted Lanczos methods", Numer. Algor. 60(2):279-295,
15: 2012.
17: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
18: SLEPc - Scalable Library for Eigenvalue Problem Computations
19: Copyright (c) 2002-2015, Universitat Politecnica de Valencia, Spain
21: This file is part of SLEPc.
23: SLEPc is free software: you can redistribute it and/or modify it under the
24: terms of version 3 of the GNU Lesser General Public License as published by
25: the Free Software Foundation.
27: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
28: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
29: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
30: more details.
32: You should have received a copy of the GNU Lesser General Public License
33: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
34: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
35: */
37: #include <slepc/private/epsimpl.h>
38: #include krylovschur.h
40: #define SLICE_PTOL PETSC_SQRT_MACHINE_EPSILON
44: static PetscErrorCode EPSSliceResetSR(EPS eps) {
45: PetscErrorCode ierr;
46: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
47: EPS_SR sr=ctx->sr;
48: EPS_shift s;
51: if (sr) {
52: if (ctx->npart>1) {
53: BVDestroy(&sr->V);
54: PetscFree4(sr->eigr,sr->eigi,sr->errest,sr->perm);
55: }
56: /* Reviewing list of shifts to free memory */
57: s = sr->s0;
58: if (s) {
59: while (s->neighb[1]) {
60: s = s->neighb[1];
61: PetscFree(s->neighb[0]);
62: }
63: PetscFree(s);
64: }
65: PetscFree(sr);
66: }
67: ctx->sr = PETSC_NULL;
68: return(0);
69: }
73: PetscErrorCode EPSReset_KrylovSchur_Slice(EPS eps)
74: {
75: PetscErrorCode ierr;
76: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
79: if (!ctx->global) return(0);
80: /* Destroy auxiliary EPS */
81: EPSSliceResetSR(ctx->eps);
82: EPSDestroy(&ctx->eps);
83: if (ctx->npart>1) {
84: PetscSubcommDestroy(&ctx->subc);
85: if (ctx->commset) {
86: MPI_Comm_free(&ctx->commrank);
87: ctx->commset = PETSC_FALSE;
88: }
89: }
90: PetscFree(ctx->subintervals);
91: PetscFree(ctx->nconv_loc);
92: EPSSliceResetSR(eps);
93: PetscFree(ctx->inertias);
94: PetscFree(ctx->shifts);
95: return(0);
96: }
100: /*
101: EPSSliceAllocateSolution - Allocate memory storage for common variables such
102: as eigenvalues and eigenvectors. The argument extra is used for methods
103: that require a working basis slightly larger than ncv.
104: */
105: static PetscErrorCode EPSSliceAllocateSolution(EPS eps,PetscInt extra)
106: {
107: PetscErrorCode ierr;
108: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
109: PetscReal eta;
110: PetscInt k;
111: PetscLogDouble cnt;
112: BVType type;
113: BVOrthogType orthog_type;
114: BVOrthogRefineType orthog_ref;
115: BVOrthogBlockType ob_type;
116: Mat matrix;
117: Vec t;
118: EPS_SR sr = ctx->sr;
121: /* allocate space for eigenvalues and friends */
122: k = PetscMax(1,sr->numEigs);
123: PetscFree4(sr->eigr,sr->eigi,sr->errest,sr->perm);
124: PetscMalloc4(k,&sr->eigr,k,&sr->eigi,k,&sr->errest,k,&sr->perm);
125: cnt = 2*k*sizeof(PetscScalar) + 2*k*sizeof(PetscReal) + k*sizeof(PetscInt);
126: PetscLogObjectMemory((PetscObject)eps,cnt);
128: /* allocate sr->V and transfer options from eps->V */
129: BVDestroy(&sr->V);
130: BVCreate(PetscObjectComm((PetscObject)eps),&sr->V);
131: PetscLogObjectParent((PetscObject)eps,(PetscObject)sr->V);
132: if (!eps->V) { EPSGetBV(eps,&eps->V); }
133: if (!((PetscObject)(eps->V))->type_name) {
134: BVSetType(sr->V,BVSVEC);
135: } else {
136: BVGetType(eps->V,&type);
137: BVSetType(sr->V,type);
138: }
139: STMatCreateVecs(eps->st,&t,NULL);
140: BVSetSizesFromVec(sr->V,t,k);
141: VecDestroy(&t);
142: EPS_SetInnerProduct(eps);
143: BVGetMatrix(eps->V,&matrix,NULL);
144: BVSetMatrix(sr->V,matrix,PETSC_FALSE);
145: BVGetOrthogonalization(eps->V,&orthog_type,&orthog_ref,&eta,&ob_type);
146: BVSetOrthogonalization(sr->V,orthog_type,orthog_ref,eta,ob_type);
147: return(0);
148: }
152: static PetscErrorCode EPSSliceGetEPS(EPS eps)
153: {
154: PetscErrorCode ierr;
155: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data,*ctx_local;
156: BV V;
157: BVType type;
158: PetscReal eta;
159: BVOrthogType orthog_type;
160: BVOrthogRefineType orthog_ref;
161: BVOrthogBlockType ob_type;
162: Mat A,B=NULL,Ar,Br=NULL;
163: PetscInt i;
164: PetscReal h,a,b;
165: PetscMPIInt rank;
166: EPS_SR sr=ctx->sr;
167: PC pc;
168: PCType pctype;
169: KSP ksp;
170: KSPType ksptype;
171: STType sttype;
172: const MatSolverPackage stype;
175: EPSGetOperators(eps,&A,&B);
176: if (ctx->npart==1) {
177: if (!ctx->eps) { EPSCreate(((PetscObject)eps)->comm,&ctx->eps); }
178: EPSSetType(ctx->eps,((PetscObject)eps)->type_name);
179: EPSSetST(ctx->eps,eps->st);
180: a = eps->inta; b = eps->intb;
181: } else {
182: if (!ctx->subc) {
183: /* Create context for subcommunicators */
184: PetscSubcommCreate(PetscObjectComm((PetscObject)eps),&ctx->subc);
185: PetscSubcommSetNumber(ctx->subc,ctx->npart);
186: PetscSubcommSetType(ctx->subc,PETSC_SUBCOMM_CONTIGUOUS);
187: PetscLogObjectMemory((PetscObject)eps,sizeof(PetscSubcomm));
189: /* Duplicate matrices */
190: MatCreateRedundantMatrix(A,0,PetscSubcommChild(ctx->subc),MAT_INITIAL_MATRIX,&Ar);
191: if (B) { MatCreateRedundantMatrix(B,0,PetscSubcommChild(ctx->subc),MAT_INITIAL_MATRIX,&Br); }
192: }
194: /* Determine subintervals */
195: if (!ctx->subintset) { /* uniform distribution if no set by user */
196: if (!sr->hasEnd) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"Global interval must be bounded for splitting it in uniform subintervals");
197: h = (eps->intb-eps->inta)/ctx->npart;
198: a = eps->inta+ctx->subc->color*h;
199: b = (ctx->subc->color==ctx->npart-1)?eps->intb:eps->inta+(ctx->subc->color+1)*h;
200: PetscFree(ctx->subintervals);
201: PetscMalloc1(ctx->npart+1,&ctx->subintervals);
202: for (i=0;i<ctx->npart;i++) ctx->subintervals[i] = eps->inta+h*i;
203: ctx->subintervals[ctx->npart] = eps->intb;
204: } else {
205: a = ctx->subintervals[ctx->subc->color];
206: b = ctx->subintervals[ctx->subc->color+1];
207: }
209: if (!ctx->eps) {
210: /* Create auxiliary EPS */
211: EPSCreate(PetscSubcommChild(ctx->subc),&ctx->eps);
212: EPSSetOperators(ctx->eps,Ar,Br);
213: MatDestroy(&Ar);
214: MatDestroy(&Br);
215: }
216: EPSSetType(ctx->eps,((PetscObject)eps)->type_name);
218: /* Transfer options for ST, KSP and PC */
219: STGetType(eps->st,&sttype);
220: STSetType(ctx->eps->st,sttype);
221: STGetKSP(eps->st,&ksp);
222: KSPGetType(ksp,&ksptype);
223: KSPGetPC(ksp,&pc);
224: PCGetType(pc,&pctype);
225: PCFactorGetMatSolverPackage(pc,&stype);
226: STGetKSP(ctx->eps->st,&ksp);
227: KSPSetType(ksp,ksptype);
228: KSPGetPC(ksp,&pc);
229: PCSetType(pc,pctype);
230: PCFactorSetMatSolverPackage(pc,stype);
232: /* Create subcommunicator grouping processes with same rank */
233: if (ctx->commrank) { MPI_Comm_free(&ctx->commrank); }
234: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
235: MPI_Comm_split(((PetscObject)eps)->comm,rank,ctx->subc->color,&ctx->commrank);
236: ctx->commset = PETSC_TRUE;
237: }
238: EPSSetConvergenceTest(ctx->eps,eps->conv);
239: EPSSetInterval(ctx->eps,a,b);
240: ctx_local = (EPS_KRYLOVSCHUR*)ctx->eps->data;
241: ctx_local->npart = ctx->npart;
242: ctx_local->detect = ctx->detect;
243: ctx_local->global = PETSC_FALSE;
244: ctx_local->eps = eps;
245: ctx_local->subc = ctx->subc;
246: ctx_local->commrank = ctx->commrank;
248: EPSSetDimensions(ctx->eps,ctx->nev,ctx->ncv,ctx->mpd);
249: EPSKrylovSchurSetLocking(ctx->eps,ctx->lock);
251: /* transfer options from eps->V */
252: EPSGetBV(ctx->eps,&V);
253: if (!eps->V) { EPSGetBV(eps,&eps->V); }
254: if (!((PetscObject)(eps->V))->type_name) {
255: BVSetType(V,BVSVEC);
256: } else {
257: BVGetType(eps->V,&type);
258: BVSetType(V,type);
259: }
260: BVGetOrthogonalization(eps->V,&orthog_type,&orthog_ref,&eta,&ob_type);
261: BVSetOrthogonalization(V,orthog_type,orthog_ref,eta,ob_type);
262: ctx->eps->which = eps->which;
263: ctx->eps->max_it = eps->max_it;
264: ctx->eps->tol = eps->tol;
265: if (eps->tol==PETSC_DEFAULT) eps->tol = SLEPC_DEFAULT_TOL;
266: EPSSetProblemType(ctx->eps,eps->problem_type);
267: EPSSetUp(ctx->eps);
268: ctx->eps->nconv = 0;
269: ctx->eps->its = 0;
270: for (i=0;i<ctx->eps->ncv;i++) {
271: ctx->eps->eigr[i] = 0.0;
272: ctx->eps->eigi[i] = 0.0;
273: ctx->eps->errest[i] = 0.0;
274: }
275: return(0);
276: }
280: static PetscErrorCode EPSSliceGetInertia(EPS eps,PetscReal shift,PetscInt *inertia,PetscInt *zeros)
281: {
283: KSP ksp;
284: PC pc;
285: Mat F;
288: if (shift >= PETSC_MAX_REAL) { /* Right-open interval */
289: if (inertia) *inertia = eps->n;
290: } else if (shift <= PETSC_MIN_REAL) {
291: if (inertia) *inertia = 0;
292: if (zeros) *zeros = 0;
293: } else {
294: STSetShift(eps->st,shift);
295: STSetUp(eps->st);
296: STGetKSP(eps->st,&ksp);
297: KSPGetPC(ksp,&pc);
298: PCFactorGetMatrix(pc,&F);
299: MatGetInertia(F,inertia,zeros,NULL);
300: }
301: return(0);
302: }
306: PetscErrorCode EPSSetUp_KrylovSchur_Slice(EPS eps)
307: {
308: PetscErrorCode ierr;
309: PetscBool issinv;
310: EPS_KRYLOVSCHUR *ctx = (EPS_KRYLOVSCHUR*)eps->data,*ctx_glob;
311: EPS_SR sr,sr_loc,sr_glob;
312: PetscInt nEigs,dssz=1,i,zeros=0,off=0;
313: PetscMPIInt nproc,rank,aux;
314: MPI_Request req;
317: if (ctx->global) {
318: if (eps->inta==0.0 && eps->intb==0.0) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"Must define a computational interval when using EPS_ALL");
319: if (eps->intb >= PETSC_MAX_REAL && eps->inta <= PETSC_MIN_REAL) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"The defined computational interval should have at least one of their sides bounded");
320: if (!eps->ishermitian) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Spectrum slicing only available for symmetric/Hermitian eigenproblems");
321: if (eps->arbitrary) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Arbitrary selection of eigenpairs cannot be used with spectrum slicing");
322: if (!((PetscObject)(eps->st))->type_name) { /* default to shift-and-invert */
323: STSetType(eps->st,STSINVERT);
324: }
325: PetscObjectTypeCompareAny((PetscObject)eps->st,&issinv,STSINVERT,STCAYLEY,"");
326: if (!issinv) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Shift-and-invert or Cayley ST is needed for spectrum slicing");
327: if (eps->tol==PETSC_DEFAULT) eps->tol = SLEPC_DEFAULT_TOL*1e-2; /* use tighter tolerance */
328: if (!eps->max_it) eps->max_it = 100;
329: if (ctx->nev==1) ctx->nev = 40; /* nev not set, use default value */
330: if (ctx->nev<10) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"nev cannot be less than 10 in spectrum slicing runs");
331: }
332: eps->ops->backtransform = NULL;
334: /* create spectrum slicing context and initialize it */
335: EPSSliceResetSR(eps);
336: PetscNewLog(eps,&sr);
337: ctx->sr = sr;
338: sr->itsKs = 0;
339: sr->nleap = 0;
340: sr->nMAXCompl = eps->nev/4;
341: sr->iterCompl = eps->max_it/4;
342: sr->sPres = NULL;
343: sr->nS = 0;
345: if (ctx->npart==1 || ctx->global) {
346: /* check presence of ends and finding direction */
347: if ((eps->inta > PETSC_MIN_REAL && eps->inta != 0.0) || eps->intb >= PETSC_MAX_REAL) {
348: sr->int0 = eps->inta;
349: sr->int1 = eps->intb;
350: sr->dir = 1;
351: if (eps->intb >= PETSC_MAX_REAL) { /* Right-open interval */
352: sr->hasEnd = PETSC_FALSE;
353: } else sr->hasEnd = PETSC_TRUE;
354: } else {
355: sr->int0 = eps->intb;
356: sr->int1 = eps->inta;
357: sr->dir = -1;
358: sr->hasEnd = (eps->inta <= PETSC_MIN_REAL)?PETSC_FALSE:PETSC_TRUE;
359: }
360: }
361: if (ctx->global) {
362: if (ctx->npart>1) {
363: /* prevent computation of factorization in global eps unless npart==1 */
364: STSetTransform(eps->st,PETSC_FALSE);
365: }
366: EPSSetDimensions_Default(eps,ctx->nev,&ctx->ncv,&ctx->mpd);
367: /* create subintervals and initialize auxiliary eps for slicing runs */
368: EPSSliceGetEPS(eps);
369: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
370: if (ctx->npart>1) {
371: if ((sr->dir>0&&ctx->subc->color==0)||(sr->dir<0&&ctx->subc->color==ctx->npart-1)) sr->inertia0 = sr_loc->inertia0;
372: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
373: if (rank==0) {
374: MPI_Bcast(&sr->inertia0,1,MPIU_INT,(sr->dir>0)?0:ctx->npart-1,ctx->commrank);
375: }
376: MPI_Bcast(&sr->inertia0,1,MPIU_INT,0,PetscSubcommChild(ctx->subc));
377: PetscFree(ctx->nconv_loc);
378: PetscMalloc1(ctx->npart,&ctx->nconv_loc);
379: MPI_Comm_size(((PetscObject)eps)->comm,&nproc);
380: if (sr->dir<0) off = 1;
381: if (nproc%ctx->npart==0) { /* subcommunicators with the same size */
382: PetscMPIIntCast(sr_loc->numEigs,&aux);
383: MPI_Allgather(&aux,1,MPI_INT,ctx->nconv_loc,1,MPI_INT,ctx->commrank);
384: MPI_Allgather(&sr_loc->int0,1,MPIU_REAL,ctx->subintervals+off,1,MPIU_REAL,ctx->commrank);
385: } else {
386: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
387: if (!rank) {
388: PetscMPIIntCast(sr_loc->numEigs,&aux);
389: MPI_Allgather(&aux,1,MPI_INT,ctx->nconv_loc,1,MPI_INT,ctx->commrank);
390: MPI_Allgather(&sr_loc->int0,1,MPIU_REAL,ctx->subintervals+off,1,MPIU_REAL,ctx->commrank);
391: }
392: PetscMPIIntCast(ctx->npart,&aux);
393: MPI_Bcast(ctx->nconv_loc,aux,MPI_INT,0,PetscSubcommChild(ctx->subc));
394: MPI_Bcast(ctx->subintervals+off,aux,MPIU_REAL,0,PetscSubcommChild(ctx->subc));
395: }
396: nEigs = 0;
397: for (i=0;i<ctx->npart;i++) nEigs += ctx->nconv_loc[i];
398: } else {
399: nEigs = sr_loc->numEigs;
400: sr->inertia0 = sr_loc->inertia0;
401: }
402: sr->inertia1 = sr->inertia0+sr->dir*nEigs;
403: sr->numEigs = nEigs;
404: eps->nev = nEigs;
405: eps->ncv = nEigs;
406: eps->mpd = nEigs;
407: } else {
408: ctx_glob = (EPS_KRYLOVSCHUR*)ctx->eps->data;
409: sr_glob = ctx_glob->sr;
410: if (ctx->npart>1) {
411: sr->dir = sr_glob->dir;
412: sr->int0 = (sr->dir==1)?eps->inta:eps->intb;
413: sr->int1 = (sr->dir==1)?eps->intb:eps->inta;
414: if ((sr->dir>0&&ctx->subc->color==ctx->npart-1)||(sr->dir<0&&ctx->subc->color==0)) sr->hasEnd = sr_glob->hasEnd;
415: else sr->hasEnd = PETSC_TRUE;
416: }
418: /* last process in eps comm computes inertia1 */
419: if (ctx->npart==1 || ((sr->dir>0 && ctx->subc->color==ctx->npart-1) || (sr->dir<0 && ctx->subc->color==0))) {
420: EPSSliceGetInertia(eps,sr->int1,&sr->inertia1,ctx->detect?&zeros:NULL);
421: if (zeros) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_USER,"Found singular matrix for the transformed problem in an interval endpoint defined by user");
422: }
424: /* compute inertia0 */
425: EPSSliceGetInertia(eps,sr->int0,&sr->inertia0,ctx->detect?&zeros:NULL);
426: if (zeros) { /* error in factorization */
427: if (ctx->npart==1 || ctx_glob->subintset || ((sr->dir>0 && ctx->subc->color==0) || (sr->dir<0 && ctx->subc->color==ctx->npart-1))) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_USER,"Found singular matrix for the transformed problem in an interval endpoint defined by user");
428: else { /* perturb shift */
429: sr->int0 *= (1.0+SLICE_PTOL);
430: EPSSliceGetInertia(eps,sr->int0,&sr->inertia0,&zeros);
431: if (zeros) SETERRQ1(((PetscObject)eps)->comm,PETSC_ERR_CONV_FAILED,"Inertia computation fails in %g",sr->int1);
432: }
433: }
434: if (ctx->npart>1) {
435: /* inertia1 is received from neighbour */
436: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
437: if (!rank) {
438: if ((sr->dir>0 && ctx->subc->color>0) || (sr->dir<0 && ctx->subc->color<ctx->npart-1)) { /* send inertia0 to neighbour0 */
439: MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
440: MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
441: }
442: if ((sr->dir>0 && ctx->subc->color<ctx->npart-1)|| (sr->dir<0 && ctx->subc->color>0)) { /* receive inertia1 from neighbour1 */
443: MPI_Recv(&(sr->inertia1),1,MPIU_INT,ctx->subc->color+sr->dir,0,ctx->commrank,MPI_STATUS_IGNORE);
444: MPI_Recv(&(sr->int1),1,MPIU_REAL,ctx->subc->color+sr->dir,0,ctx->commrank,MPI_STATUS_IGNORE);
445: }
446: }
447: if ((sr->dir>0 && ctx->subc->color<ctx->npart-1)||(sr->dir<0 && ctx->subc->color>0)) {
448: MPI_Bcast(&sr->inertia1,1,MPIU_INT,0,PetscSubcommChild(ctx->subc));
449: MPI_Bcast(&sr->int1,1,MPIU_REAL,0,PetscSubcommChild(ctx->subc));
450: } else sr_glob->inertia1 = sr->inertia1;
451: }
453: /* number of eigenvalues in interval */
454: sr->numEigs = (sr->dir)*(sr->inertia1 - sr->inertia0);
455: if (ctx->npart>1) {
456: /* memory allocate for subinterval eigenpairs */
457: EPSSliceAllocateSolution(eps,1);
458: }
459: dssz = eps->ncv+1;
460: }
461: DSSetType(eps->ds,DSHEP);
462: DSSetCompact(eps->ds,PETSC_TRUE);
463: DSAllocate(eps->ds,dssz);
464: return(0);
465: }
469: static PetscErrorCode EPSSliceGatherEigenVectors(EPS eps)
470: {
471: PetscErrorCode ierr;
472: Vec v,vg,v_loc;
473: IS is1,is2;
474: VecScatter vec_sc;
475: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
476: PetscInt nloc,m0,n0,i,si,idx,*idx1,*idx2,j;
477: PetscScalar *array;
478: EPS_SR sr_loc;
479: BV V_loc;
482: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
483: V_loc = sr_loc->V;
485: /* Gather parallel eigenvectors */
486: BVGetColumn(eps->V,0,&v);
487: VecGetOwnershipRange(v,&n0,&m0);
488: BVRestoreColumn(eps->V,0,&v);
489: BVGetColumn(ctx->eps->V,0,&v);
490: VecGetLocalSize(v,&nloc);
491: BVRestoreColumn(ctx->eps->V,0,&v);
492: PetscMalloc2(m0-n0,&idx1,m0-n0,&idx2);
493: VecCreateMPI(PetscObjectComm((PetscObject)eps),nloc,PETSC_DECIDE,&vg);
494: idx = -1;
495: for (si=0;si<ctx->npart;si++) {
496: j = 0;
497: for (i=n0;i<m0;i++) {
498: idx1[j] = i;
499: idx2[j++] = i+eps->n*si;
500: }
501: ISCreateGeneral(PetscObjectComm((PetscObject)eps),(m0-n0),idx1,PETSC_COPY_VALUES,&is1);
502: ISCreateGeneral(PetscObjectComm((PetscObject)eps),(m0-n0),idx2,PETSC_COPY_VALUES,&is2);
503: BVGetColumn(eps->V,0,&v);
504: VecScatterCreate(v,is1,vg,is2,&vec_sc);
505: BVRestoreColumn(eps->V,0,&v);
506: ISDestroy(&is1);
507: ISDestroy(&is2);
508: for (i=0;i<ctx->nconv_loc[si];i++) {
509: BVGetColumn(eps->V,++idx,&v);
510: if (ctx->subc->color==si) {
511: BVGetColumn(V_loc,i,&v_loc);
512: VecGetArray(v_loc,&array);
513: VecPlaceArray(vg,array);
514: }
515: VecScatterBegin(vec_sc,vg,v,INSERT_VALUES,SCATTER_REVERSE);
516: VecScatterEnd(vec_sc,vg,v,INSERT_VALUES,SCATTER_REVERSE);
517: if (ctx->subc->color==si) {
518: VecResetArray(vg);
519: VecRestoreArray(v_loc,&array);
520: BVRestoreColumn(V_loc,i,&v_loc);
521: }
522: BVRestoreColumn(eps->V,idx,&v);
523: }
524: VecScatterDestroy(&vec_sc);
525: }
526: PetscFree2(idx1,idx2);
527: VecDestroy(&vg);
528: return(0);
529: }
533: /*
534: EPSComputeVectors_Slice - Recover Eigenvectors from subcomunicators
535: */
536: PetscErrorCode EPSComputeVectors_Slice(EPS eps)
537: {
538: PetscErrorCode ierr;
539: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
542: if (ctx->global && ctx->npart>1) {
543: EPSComputeVectors(ctx->eps);
544: EPSSliceGatherEigenVectors(eps);
545: }
546: return(0);
547: }
549: #define SWAP(a,b,t) {t=a;a=b;b=t;}
553: static PetscErrorCode EPSSliceGetInertias(EPS eps,PetscInt *n,PetscReal **shifts,PetscInt **inertias)
554: {
555: PetscErrorCode ierr;
556: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
557: PetscInt i=0,j,tmpi;
558: PetscReal v,tmpr;
559: EPS_shift s;
562: if (!eps->state) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONGSTATE,"Must call EPSSetUp() first");
563: if (!ctx->sr) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONGSTATE,"Only available in interval computations, see EPSSetInterval()");
564: if (!ctx->sr->s0) { /* EPSSolve not called yet */
565: *n = 2;
566: } else {
567: *n = 1;
568: s = ctx->sr->s0;
569: while (s) {
570: (*n)++;
571: s = s->neighb[1];
572: }
573: }
574: PetscMalloc1(*n,shifts);
575: PetscMalloc1(*n,inertias);
576: if (!ctx->sr->s0) { /* EPSSolve not called yet */
577: (*shifts)[0] = ctx->sr->int0;
578: (*shifts)[1] = ctx->sr->int1;
579: (*inertias)[0] = ctx->sr->inertia0;
580: (*inertias)[1] = ctx->sr->inertia1;
581: } else {
582: s = ctx->sr->s0;
583: while (s) {
584: (*shifts)[i] = s->value;
585: (*inertias)[i++] = s->inertia;
586: s = s->neighb[1];
587: }
588: (*shifts)[i] = ctx->sr->int1;
589: (*inertias)[i] = ctx->sr->inertia1;
590: }
591: /* remove possible duplicate in last position */
592: if ((*shifts)[(*n)-1]==(*shifts)[(*n)-2]) (*n)--;
593: /* sort result */
594: for (i=0;i<*n;i++) {
595: v = (*shifts)[i];
596: for (j=i+1;j<*n;j++) {
597: if (v > (*shifts)[j]) {
598: SWAP((*shifts)[i],(*shifts)[j],tmpr);
599: SWAP((*inertias)[i],(*inertias)[j],tmpi);
600: v = (*shifts)[i];
601: }
602: }
603: }
604: return(0);
605: }
609: static PetscErrorCode EPSSliceGatherSolution(EPS eps)
610: {
611: PetscErrorCode ierr;
612: PetscMPIInt rank,nproc;
613: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
614: PetscInt i,idx,j;
615: PetscInt *perm_loc,off=0,*inertias_loc,ns;
616: PetscScalar *eigr_loc;
617: EPS_SR sr_loc;
618: PetscReal *shifts_loc;
619: PetscMPIInt *disp,*ns_loc,aux;
622: eps->nconv = 0;
623: for (i=0;i<ctx->npart;i++) eps->nconv += ctx->nconv_loc[i];
624: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
626: /* Gather the shifts used and the inertias computed */
627: EPSSliceGetInertias(ctx->eps,&ns,&shifts_loc,&inertias_loc);
628: if (ctx->sr->dir>0 && shifts_loc[ns-1]==sr_loc->int1 && ctx->subc->color<ctx->npart-1) ns--;
629: if (ctx->sr->dir<0 && shifts_loc[ns-1]==sr_loc->int0 && ctx->subc->color>0) {
630: ns--;
631: for (i=0;i<ns;i++) {
632: inertias_loc[i] = inertias_loc[i+1];
633: shifts_loc[i] = shifts_loc[i+1];
634: }
635: }
636: PetscMalloc1(ctx->npart,&ns_loc);
637: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
638: PetscMPIIntCast(ns,&aux);
639: if (rank==0) { MPI_Allgather(&aux,1,MPI_INT,ns_loc,1,MPI_INT,ctx->commrank); }
640: PetscMPIIntCast(ctx->npart,&aux);
641: MPI_Bcast(ns_loc,aux,MPI_INT,0,PetscSubcommChild(ctx->subc));
642: ctx->nshifts = 0;
643: for (i=0;i<ctx->npart;i++) ctx->nshifts += ns_loc[i];
644: PetscFree(ctx->inertias);
645: PetscFree(ctx->shifts);
646: PetscMalloc1(ctx->nshifts,&ctx->inertias);
647: PetscMalloc1(ctx->nshifts,&ctx->shifts);
649: /* Gather eigenvalues (same ranks have fully set of eigenvalues)*/
650: eigr_loc = sr_loc->eigr;
651: perm_loc = sr_loc->perm;
652: MPI_Comm_size(((PetscObject)eps)->comm,&nproc);
653: PetscMalloc1(ctx->npart,&disp);
654: disp[0] = 0;
655: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ctx->nconv_loc[i-1];
656: if (nproc%ctx->npart==0) { /* subcommunicators with the same size */
657: PetscMPIIntCast(sr_loc->numEigs,&aux);
658: MPI_Allgatherv(eigr_loc,aux,MPIU_SCALAR,eps->eigr,ctx->nconv_loc,disp,MPIU_SCALAR,ctx->commrank); /* eigenvalues */
659: MPI_Allgatherv(perm_loc,aux,MPIU_INT,eps->perm,ctx->nconv_loc,disp,MPIU_INT,ctx->commrank); /* perm */
660: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ns_loc[i-1];
661: PetscMPIIntCast(ns,&aux);
662: MPI_Allgatherv(shifts_loc,aux,MPIU_REAL,ctx->shifts,ns_loc,disp,MPIU_REAL,ctx->commrank); /* shifts */
663: MPI_Allgatherv(inertias_loc,aux,MPIU_INT,ctx->inertias,ns_loc,disp,MPIU_INT,ctx->commrank); /* inertias */
664: MPI_Allreduce(&sr_loc->itsKs,&eps->its,1,MPIU_INT,MPI_SUM,ctx->commrank);
665: } else { /* subcommunicators with different size */
666: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
667: if (rank==0) {
668: PetscMPIIntCast(sr_loc->numEigs,&aux);
669: MPI_Allgatherv(eigr_loc,aux,MPIU_SCALAR,eps->eigr,ctx->nconv_loc,disp,MPIU_SCALAR,ctx->commrank); /* eigenvalues */
670: MPI_Allgatherv(perm_loc,aux,MPIU_INT,eps->perm,ctx->nconv_loc,disp,MPIU_INT,ctx->commrank); /* perm */
671: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ns_loc[i-1];
672: PetscMPIIntCast(ns,&aux);
673: MPI_Allgatherv(shifts_loc,aux,MPIU_REAL,ctx->shifts,ns_loc,disp,MPIU_REAL,ctx->commrank); /* shifts */
674: MPI_Allgatherv(inertias_loc,aux,MPIU_INT,ctx->inertias,ns_loc,disp,MPIU_INT,ctx->commrank); /* inertias */
675: MPI_Allreduce(&sr_loc->itsKs,&eps->its,1,MPIU_INT,MPI_SUM,ctx->commrank);
676: }
677: PetscMPIIntCast(eps->nconv,&aux);
678: MPI_Bcast(eps->eigr,aux,MPIU_SCALAR,0,PetscSubcommChild(ctx->subc));
679: MPI_Bcast(eps->perm,aux,MPIU_INT,0,PetscSubcommChild(ctx->subc));
680: MPI_Bcast(ctx->shifts,ctx->nshifts,MPIU_REAL,0,PetscSubcommChild(ctx->subc));
681: PetscMPIIntCast(ctx->nshifts,&aux);
682: MPI_Bcast(ctx->inertias,aux,MPIU_INT,0,PetscSubcommChild(ctx->subc));
683: MPI_Bcast(&eps->its,1,MPIU_INT,0,PetscSubcommChild(ctx->subc));
684: }
685: /* Update global array eps->perm */
686: idx = ctx->nconv_loc[0];
687: for (i=1;i<ctx->npart;i++) {
688: off += ctx->nconv_loc[i-1];
689: for (j=0;j<ctx->nconv_loc[i];j++) eps->perm[idx++] += off;
690: }
692: /* Gather parallel eigenvectors */
693: PetscFree(ns_loc);
694: PetscFree(disp);
695: PetscFree(shifts_loc);
696: PetscFree(inertias_loc);
697: return(0);
698: }
700: /*
701: Fills the fields of a shift structure
702: */
705: static PetscErrorCode EPSCreateShift(EPS eps,PetscReal val,EPS_shift neighb0,EPS_shift neighb1)
706: {
707: PetscErrorCode ierr;
708: EPS_shift s,*pending2;
709: PetscInt i;
710: EPS_SR sr;
711: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
714: sr = ctx->sr;
715: PetscNewLog(eps,&s);
716: s->value = val;
717: s->neighb[0] = neighb0;
718: if (neighb0) neighb0->neighb[1] = s;
719: s->neighb[1] = neighb1;
720: if (neighb1) neighb1->neighb[0] = s;
721: s->comp[0] = PETSC_FALSE;
722: s->comp[1] = PETSC_FALSE;
723: s->index = -1;
724: s->neigs = 0;
725: s->nconv[0] = s->nconv[1] = 0;
726: s->nsch[0] = s->nsch[1]=0;
727: /* Inserts in the stack of pending shifts */
728: /* If needed, the array is resized */
729: if (sr->nPend >= sr->maxPend) {
730: sr->maxPend *= 2;
731: PetscMalloc1(sr->maxPend,&pending2);
732: PetscLogObjectMemory((PetscObject)eps,sizeof(EPS_shift));
733: for (i=0;i<sr->nPend;i++) pending2[i] = sr->pending[i];
734: PetscFree(sr->pending);
735: sr->pending = pending2;
736: }
737: sr->pending[sr->nPend++]=s;
738: return(0);
739: }
741: /* Prepare for Rational Krylov update */
744: static PetscErrorCode EPSPrepareRational(EPS eps)
745: {
746: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
747: PetscErrorCode ierr;
748: PetscInt dir,i,k,ld,nv;
749: PetscScalar *A;
750: EPS_SR sr = ctx->sr;
751: Vec v;
754: DSGetLeadingDimension(eps->ds,&ld);
755: dir = (sr->sPres->neighb[0] == sr->sPrev)?1:-1;
756: dir*=sr->dir;
757: k = 0;
758: for (i=0;i<sr->nS;i++) {
759: if (dir*PetscRealPart(sr->S[i])>0.0) {
760: sr->S[k] = sr->S[i];
761: sr->S[sr->nS+k] = sr->S[sr->nS+i];
762: BVGetColumn(sr->Vnext,k,&v);
763: BVCopyVec(eps->V,eps->nconv+i,v);
764: BVRestoreColumn(sr->Vnext,k,&v);
765: k++;
766: if (k>=sr->nS/2)break;
767: }
768: }
769: /* Copy to DS */
770: DSGetArray(eps->ds,DS_MAT_A,&A);
771: PetscMemzero(A,ld*ld*sizeof(PetscScalar));
772: for (i=0;i<k;i++) {
773: A[i*(1+ld)] = sr->S[i];
774: A[k+i*ld] = sr->S[sr->nS+i];
775: }
776: sr->nS = k;
777: DSRestoreArray(eps->ds,DS_MAT_A,&A);
778: DSGetDimensions(eps->ds,&nv,NULL,NULL,NULL,NULL);
779: DSSetDimensions(eps->ds,nv,0,0,k);
780: /* Append u to V */
781: BVGetColumn(sr->Vnext,sr->nS,&v);
782: BVCopyVec(eps->V,sr->nv,v);
783: BVRestoreColumn(sr->Vnext,sr->nS,&v);
784: return(0);
785: }
787: /* Provides next shift to be computed */
790: static PetscErrorCode EPSExtractShift(EPS eps)
791: {
792: PetscErrorCode ierr;
793: PetscInt iner,zeros=0;
794: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
795: EPS_SR sr;
796: PetscReal newShift;
797: EPS_shift sPres;
800: sr = ctx->sr;
801: if (sr->nPend > 0) {
802: sr->sPrev = sr->sPres;
803: sr->sPres = sr->pending[--sr->nPend];
804: sPres = sr->sPres;
805: EPSSliceGetInertia(eps,sPres->value,&iner,ctx->detect?&zeros:NULL);
806: if (zeros) {
807: newShift = sPres->value*(1.0+SLICE_PTOL);
808: if (sr->dir*(sPres->neighb[0] && newShift-sPres->neighb[0]->value) < 0) newShift = (sPres->value+sPres->neighb[0]->value)/2;
809: else if (sPres->neighb[1] && sr->dir*(sPres->neighb[1]->value-newShift) < 0) newShift = (sPres->value+sPres->neighb[1]->value)/2;
810: EPSSliceGetInertia(eps,newShift,&iner,&zeros);
811: if (zeros) SETERRQ1(((PetscObject)eps)->comm,PETSC_ERR_CONV_FAILED,"Inertia computation fails in %g",newShift);
812: sPres->value = newShift;
813: }
814: sr->sPres->inertia = iner;
815: eps->target = sr->sPres->value;
816: eps->reason = EPS_CONVERGED_ITERATING;
817: eps->its = 0;
818: } else sr->sPres = NULL;
819: return(0);
820: }
822: /*
823: Symmetric KrylovSchur adapted to spectrum slicing:
824: Allows searching an specific amount of eigenvalues in the subintervals left and right.
825: Returns whether the search has succeeded
826: */
829: static PetscErrorCode EPSKrylovSchur_Slice(EPS eps)
830: {
831: PetscErrorCode ierr;
832: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
833: PetscInt i,conv,k,l,ld,nv,*iwork,j,p;
834: Mat U;
835: PetscScalar *Q,*A,rtmp;
836: PetscReal *a,*b,beta;
837: PetscBool breakdown;
838: PetscInt count0,count1;
839: PetscReal lambda;
840: EPS_shift sPres;
841: PetscBool complIterating;
842: PetscBool sch0,sch1;
843: PetscInt iterCompl=0,n0,n1;
844: EPS_SR sr = ctx->sr;
847: /* Spectrum slicing data */
848: sPres = sr->sPres;
849: complIterating =PETSC_FALSE;
850: sch1 = sch0 = PETSC_TRUE;
851: DSGetLeadingDimension(eps->ds,&ld);
852: PetscMalloc1(2*ld,&iwork);
853: count0=0;count1=0; /* Found on both sides */
854: if (sr->nS > 0 && (sPres->neighb[0] == sr->sPrev || sPres->neighb[1] == sr->sPrev)) {
855: /* Rational Krylov */
856: DSTranslateRKS(eps->ds,sr->sPrev->value-sPres->value);
857: DSGetDimensions(eps->ds,NULL,NULL,NULL,&l,NULL);
858: DSSetDimensions(eps->ds,l+1,0,0,0);
859: BVSetActiveColumns(eps->V,0,l+1);
860: DSGetMat(eps->ds,DS_MAT_Q,&U);
861: BVMultInPlace(eps->V,U,0,l+1);
862: MatDestroy(&U);
863: } else {
864: /* Get the starting Lanczos vector */
865: EPSGetStartVector(eps,0,NULL);
866: l = 0;
867: }
868: /* Restart loop */
869: while (eps->reason == EPS_CONVERGED_ITERATING) {
870: eps->its++; sr->itsKs++;
871: /* Compute an nv-step Lanczos factorization */
872: nv = PetscMin(eps->nconv+eps->mpd,eps->ncv);
873: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
874: b = a + ld;
875: EPSFullLanczos(eps,a,b,eps->nconv+l,&nv,&breakdown);
876: sr->nv = nv;
877: beta = b[nv-1];
878: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
879: DSSetDimensions(eps->ds,nv,0,eps->nconv,eps->nconv+l);
880: if (l==0) {
881: DSSetState(eps->ds,DS_STATE_INTERMEDIATE);
882: } else {
883: DSSetState(eps->ds,DS_STATE_RAW);
884: }
885: BVSetActiveColumns(eps->V,eps->nconv,nv);
887: /* Solve projected problem and compute residual norm estimates */
888: if (eps->its == 1 && l > 0) {/* After rational update */
889: DSGetArray(eps->ds,DS_MAT_A,&A);
890: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
891: b = a + ld;
892: k = eps->nconv+l;
893: A[k*ld+k-1] = A[(k-1)*ld+k];
894: A[k*ld+k] = a[k];
895: for (j=k+1; j< nv; j++) {
896: A[j*ld+j] = a[j];
897: A[j*ld+j-1] = b[j-1] ;
898: A[(j-1)*ld+j] = b[j-1];
899: }
900: DSRestoreArray(eps->ds,DS_MAT_A,&A);
901: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
902: DSSolve(eps->ds,eps->eigr,NULL);
903: DSSort(eps->ds,eps->eigr,NULL,NULL,NULL,NULL);
904: DSSetCompact(eps->ds,PETSC_TRUE);
905: } else { /* Restart */
906: DSSolve(eps->ds,eps->eigr,NULL);
907: DSSort(eps->ds,eps->eigr,NULL,NULL,NULL,NULL);
908: }
909: /* Residual */
910: EPSKrylovConvergence(eps,PETSC_TRUE,eps->nconv,nv-eps->nconv,beta,1.0,&k);
912: if (ctx->lock) {
913: /* Check convergence */
914: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
915: b = a + ld;
916: conv = 0;
917: j = k = eps->nconv;
918: for (i=eps->nconv;i<nv;i++) if (eps->errest[i] < eps->tol) conv++;
919: for (i=eps->nconv;i<nv;i++) {
920: if (eps->errest[i] < eps->tol) {
921: iwork[j++]=i;
922: } else iwork[conv+k++]=i;
923: }
924: for (i=eps->nconv;i<nv;i++) {
925: a[i]=PetscRealPart(eps->eigr[i]);
926: b[i]=eps->errest[i];
927: }
928: for (i=eps->nconv;i<nv;i++) {
929: eps->eigr[i] = a[iwork[i]];
930: eps->errest[i] = b[iwork[i]];
931: }
932: for (i=eps->nconv;i<nv;i++) {
933: a[i]=PetscRealPart(eps->eigr[i]);
934: b[i]=eps->errest[i];
935: }
936: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
937: DSGetArray(eps->ds,DS_MAT_Q,&Q);
938: for (i=eps->nconv;i<nv;i++) {
939: p=iwork[i];
940: if (p!=i) {
941: j=i+1;
942: while (iwork[j]!=i) j++;
943: iwork[j]=p;iwork[i]=i;
944: for (k=0;k<nv;k++) {
945: rtmp=Q[k+p*ld];Q[k+p*ld]=Q[k+i*ld];Q[k+i*ld]=rtmp;
946: }
947: }
948: }
949: DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
950: k=eps->nconv+conv;
951: }
953: /* Checking values obtained for completing */
954: for (i=0;i<k;i++) {
955: sr->back[i]=eps->eigr[i];
956: }
957: STBackTransform(eps->st,k,sr->back,eps->eigi);
958: count0=count1=0;
959: for (i=0;i<k;i++) {
960: lambda = PetscRealPart(sr->back[i]);
961: if (((sr->dir)*(sPres->value - lambda) > 0) && ((sr->dir)*(lambda - sPres->ext[0]) > 0)) count0++;
962: if (((sr->dir)*(lambda - sPres->value) > 0) && ((sr->dir)*(sPres->ext[1] - lambda) > 0)) count1++;
963: }
964: if (k>eps->nev && eps->ncv-k<5) eps->reason = EPS_CONVERGED_TOL;
965: else {
966: /* Checks completion */
967: if ((!sch0||count0 >= sPres->nsch[0]) && (!sch1 ||count1 >= sPres->nsch[1])) {
968: eps->reason = EPS_CONVERGED_TOL;
969: } else {
970: if (!complIterating && eps->its >= eps->max_it) eps->reason = EPS_DIVERGED_ITS;
971: if (complIterating) {
972: if (--iterCompl <= 0) eps->reason = EPS_DIVERGED_ITS;
973: } else if (k >= eps->nev) {
974: n0 = sPres->nsch[0]-count0;
975: n1 = sPres->nsch[1]-count1;
976: if (sr->iterCompl>0 && ((n0>0 && n0<= sr->nMAXCompl)||(n1>0&&n1<=sr->nMAXCompl))) {
977: /* Iterating for completion*/
978: complIterating = PETSC_TRUE;
979: if (n0 >sr->nMAXCompl)sch0 = PETSC_FALSE;
980: if (n1 >sr->nMAXCompl)sch1 = PETSC_FALSE;
981: iterCompl = sr->iterCompl;
982: } else eps->reason = EPS_CONVERGED_TOL;
983: }
984: }
985: }
986: /* Update l */
987: if (eps->reason == EPS_CONVERGED_ITERATING) l = PetscMax(1,(PetscInt)((nv-k)*ctx->keep));
988: else l = 0;
989: if (!ctx->lock && l>0) { l += k; k = 0; } /* non-locking variant: reset no. of converged pairs */
990: if (breakdown) l=0;
992: if (eps->reason == EPS_CONVERGED_ITERATING) {
993: if (breakdown) {
994: /* Start a new Lanczos factorization */
995: PetscInfo2(eps,"Breakdown in Krylov-Schur method (it=%D norm=%g)\n",eps->its,(double)beta);
996: EPSGetStartVector(eps,k,&breakdown);
997: if (breakdown) {
998: eps->reason = EPS_DIVERGED_BREAKDOWN;
999: PetscInfo(eps,"Unable to generate more start vectors\n");
1000: }
1001: } else {
1002: /* Prepare the Rayleigh quotient for restart */
1003: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
1004: DSGetArray(eps->ds,DS_MAT_Q,&Q);
1005: b = a + ld;
1006: for (i=k;i<k+l;i++) {
1007: a[i] = PetscRealPart(eps->eigr[i]);
1008: b[i] = PetscRealPart(Q[nv-1+i*ld]*beta);
1009: }
1010: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
1011: DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
1012: }
1013: }
1014: /* Update the corresponding vectors V(:,idx) = V*Q(:,idx) */
1015: DSGetMat(eps->ds,DS_MAT_Q,&U);
1016: BVMultInPlace(eps->V,U,eps->nconv,k+l);
1017: MatDestroy(&U);
1019: /* Normalize u and append it to V */
1020: if (eps->reason == EPS_CONVERGED_ITERATING && !breakdown) {
1021: BVCopyColumn(eps->V,nv,k+l);
1022: }
1023: eps->nconv = k;
1024: if (eps->reason != EPS_CONVERGED_ITERATING) {
1025: /* Store approximated values for next shift */
1026: DSGetArray(eps->ds,DS_MAT_Q,&Q);
1027: sr->nS = l;
1028: for (i=0;i<l;i++) {
1029: sr->S[i] = eps->eigr[i+k];/* Diagonal elements */
1030: sr->S[i+l] = Q[nv-1+(i+k)*ld]*beta; /* Out of diagonal elements */
1031: }
1032: DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
1033: }
1034: }
1035: /* Check for completion */
1036: for (i=0;i< eps->nconv; i++) {
1037: if ((sr->dir)*PetscRealPart(eps->eigr[i])>0) sPres->nconv[1]++;
1038: else sPres->nconv[0]++;
1039: }
1040: sPres->comp[0] = (count0 >= sPres->nsch[0])?PETSC_TRUE:PETSC_FALSE;
1041: sPres->comp[1] = (count1 >= sPres->nsch[1])?PETSC_TRUE:PETSC_FALSE;
1042: if (count0 > sPres->nsch[0] || count1 > sPres->nsch[1])SETERRQ(PetscObjectComm((PetscObject)eps),1,"Mismatch between number of values found and information from inertia, consider using EPSKrylovSchurSetDetectZeros()");
1043: PetscFree(iwork);
1044: return(0);
1045: }
1047: /*
1048: Obtains value of subsequent shift
1049: */
1052: static PetscErrorCode EPSGetNewShiftValue(EPS eps,PetscInt side,PetscReal *newS)
1053: {
1054: PetscReal lambda,d_prev;
1055: PetscInt i,idxP;
1056: EPS_SR sr;
1057: EPS_shift sPres,s;
1058: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1061: sr = ctx->sr;
1062: sPres = sr->sPres;
1063: if (sPres->neighb[side]) {
1064: /* Completing a previous interval */
1065: if (!sPres->neighb[side]->neighb[side] && sPres->neighb[side]->nconv[side]==0) { /* One of the ends might be too far from eigenvalues */
1066: if (side) *newS = (sPres->value + PetscRealPart(sr->eigr[sr->perm[sr->indexEig-1]]))/2;
1067: else *newS = (sPres->value + PetscRealPart(sr->eigr[sr->perm[0]]))/2;
1068: } else *newS=(sPres->value + sPres->neighb[side]->value)/2;
1069: } else { /* (Only for side=1). Creating a new interval. */
1070: if (sPres->neigs==0) {/* No value has been accepted*/
1071: if (sPres->neighb[0]) {
1072: /* Multiplying by 10 the previous distance */
1073: *newS = sPres->value + 10*(sr->dir)*PetscAbsReal(sPres->value - sPres->neighb[0]->value);
1074: sr->nleap++;
1075: /* Stops when the interval is open and no values are found in the last 5 shifts (there might be infinite eigenvalues) */
1076: if (!sr->hasEnd && sr->nleap > 5) SETERRQ(PetscObjectComm((PetscObject)eps),1,"Unable to compute the wanted eigenvalues with open interval");
1077: } else { /* First shift */
1078: if (eps->nconv != 0) {
1079: /* Unaccepted values give information for next shift */
1080: idxP=0;/* Number of values left from shift */
1081: for (i=0;i<eps->nconv;i++) {
1082: lambda = PetscRealPart(sr->eigr[i]);
1083: if ((sr->dir)*(lambda - sPres->value) <0) idxP++;
1084: else break;
1085: }
1086: /* Avoiding subtraction of eigenvalues (might be the same).*/
1087: if (idxP>0) {
1088: d_prev = PetscAbsReal(sPres->value - PetscRealPart(sr->eigr[0]))/(idxP+0.3);
1089: } else {
1090: d_prev = PetscAbsReal(sPres->value - PetscRealPart(sr->eigr[eps->nconv-1]))/(eps->nconv+0.3);
1091: }
1092: *newS = sPres->value + ((sr->dir)*d_prev*eps->nev)/2;
1093: } else { /* No values found, no information for next shift */
1094: SETERRQ(PetscObjectComm((PetscObject)eps),1,"First shift renders no information");
1095: }
1096: }
1097: } else { /* Accepted values found */
1098: sr->nleap = 0;
1099: /* Average distance of values in previous subinterval */
1100: s = sPres->neighb[0];
1101: while (s && PetscAbs(s->inertia - sPres->inertia)==0) {
1102: s = s->neighb[0];/* Looking for previous shifts with eigenvalues within */
1103: }
1104: if (s) {
1105: d_prev = PetscAbsReal((sPres->value - s->value)/(sPres->inertia - s->inertia));
1106: } else { /* First shift. Average distance obtained with values in this shift */
1107: /* first shift might be too far from first wanted eigenvalue (no values found outside the interval)*/
1108: if ((sr->dir)*(PetscRealPart(sr->eigr[0])-sPres->value)>0 && PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0]))/PetscRealPart(sr->eigr[0])) > PetscSqrtReal(eps->tol)) {
1109: d_prev = PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0])))/(sPres->neigs+0.3);
1110: } else {
1111: d_prev = PetscAbsReal(PetscRealPart(sr->eigr[sr->indexEig-1]) - sPres->value)/(sPres->neigs+0.3);
1112: }
1113: }
1114: /* Average distance is used for next shift by adding it to value on the right or to shift */
1115: if ((sr->dir)*(PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1]) - sPres->value)>0) {
1116: *newS = PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1])+ ((sr->dir)*d_prev*(eps->nev))/2;
1117: } else { /* Last accepted value is on the left of shift. Adding to shift */
1118: *newS = sPres->value + ((sr->dir)*d_prev*(eps->nev))/2;
1119: }
1120: }
1121: /* End of interval can not be surpassed */
1122: if ((sr->dir)*(sr->int1 - *newS) < 0) *newS = sr->int1;
1123: }/* of neighb[side]==null */
1124: return(0);
1125: }
1127: /*
1128: Function for sorting an array of real values
1129: */
1132: static PetscErrorCode sortRealEigenvalues(PetscScalar *r,PetscInt *perm,PetscInt nr,PetscBool prev,PetscInt dir)
1133: {
1134: PetscReal re;
1135: PetscInt i,j,tmp;
1138: if (!prev) for (i=0;i<nr;i++) perm[i] = i;
1139: /* Insertion sort */
1140: for (i=1;i<nr;i++) {
1141: re = PetscRealPart(r[perm[i]]);
1142: j = i-1;
1143: while (j>=0 && dir*(re - PetscRealPart(r[perm[j]])) <= 0) {
1144: tmp = perm[j]; perm[j] = perm[j+1]; perm[j+1] = tmp; j--;
1145: }
1146: }
1147: return(0);
1148: }
1150: /* Stores the pairs obtained since the last shift in the global arrays */
1153: static PetscErrorCode EPSStoreEigenpairs(EPS eps)
1154: {
1155: PetscErrorCode ierr;
1156: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1157: PetscReal lambda,err,norm;
1158: PetscInt i,count;
1159: PetscBool iscayley;
1160: EPS_SR sr = ctx->sr;
1161: EPS_shift sPres;
1162: Vec v,w;
1165: sPres = sr->sPres;
1166: sPres->index = sr->indexEig;
1167: count = sr->indexEig;
1168: /* Back-transform */
1169: STBackTransform(eps->st,eps->nconv,eps->eigr,eps->eigi);
1170: PetscObjectTypeCompare((PetscObject)eps->st,STCAYLEY,&iscayley);
1171: /* Sort eigenvalues */
1172: sortRealEigenvalues(eps->eigr,eps->perm,eps->nconv,PETSC_FALSE,sr->dir);
1173: /* Values stored in global array */
1174: for (i=0;i<eps->nconv;i++) {
1175: lambda = PetscRealPart(eps->eigr[eps->perm[i]]);
1176: err = eps->errest[eps->perm[i]];
1178: if ((sr->dir)*(lambda - sPres->ext[0]) > 0 && (sr->dir)*(sPres->ext[1] - lambda) > 0) {/* Valid value */
1179: if (count>=sr->numEigs) SETERRQ(PetscObjectComm((PetscObject)eps),1,"Unexpected error in Spectrum Slicing");
1180: sr->eigr[count] = lambda;
1181: sr->errest[count] = err;
1182: /* Explicit purification */
1183: if (eps->purify) {
1184: BVGetColumn(sr->V,count,&v);
1185: BVGetColumn(eps->V,eps->perm[i],&w);
1186: STApply(eps->st,w,v);
1187: BVRestoreColumn(sr->V,count,&v);
1188: BVRestoreColumn(eps->V,eps->perm[i],&w);
1189: BVNormColumn(sr->V,count,NORM_2,&norm);
1190: BVScaleColumn(sr->V,count,1.0/norm);
1191: }
1192: count++;
1193: }
1194: }
1195: sPres->neigs = count - sr->indexEig;
1196: sr->indexEig = count;
1197: /* Global ordering array updating */
1198: sortRealEigenvalues(sr->eigr,sr->perm,count,PETSC_TRUE,sr->dir);
1199: return(0);
1200: }
1204: static PetscErrorCode EPSLookForDeflation(EPS eps)
1205: {
1206: PetscErrorCode ierr;
1207: PetscReal val;
1208: PetscInt i,count0=0,count1=0;
1209: EPS_shift sPres;
1210: PetscInt ini,fin,k,idx0,idx1;
1211: EPS_SR sr;
1212: Vec v;
1213: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1216: sr = ctx->sr;
1217: sPres = sr->sPres;
1219: if (sPres->neighb[0]) ini = (sr->dir)*(sPres->neighb[0]->inertia - sr->inertia0);
1220: else ini = 0;
1221: fin = sr->indexEig;
1222: /* Selection of ends for searching new values */
1223: if (!sPres->neighb[0]) sPres->ext[0] = sr->int0;/* First shift */
1224: else sPres->ext[0] = sPres->neighb[0]->value;
1225: if (!sPres->neighb[1]) {
1226: if (sr->hasEnd) sPres->ext[1] = sr->int1;
1227: else sPres->ext[1] = (sr->dir > 0)?PETSC_MAX_REAL:PETSC_MIN_REAL;
1228: } else sPres->ext[1] = sPres->neighb[1]->value;
1229: /* Selection of values between right and left ends */
1230: for (i=ini;i<fin;i++) {
1231: val=PetscRealPart(sr->eigr[sr->perm[i]]);
1232: /* Values to the right of left shift */
1233: if ((sr->dir)*(val - sPres->ext[1]) < 0) {
1234: if ((sr->dir)*(val - sPres->value) < 0) count0++;
1235: else count1++;
1236: } else break;
1237: }
1238: /* The number of values on each side are found */
1239: if (sPres->neighb[0]) {
1240: sPres->nsch[0] = (sr->dir)*(sPres->inertia - sPres->neighb[0]->inertia)-count0;
1241: if (sPres->nsch[0]<0)SETERRQ(PetscObjectComm((PetscObject)eps),1,"Mismatch between number of values found and information from inertia, consider using EPSKrylovSchurSetDetectZeros()");
1242: } else sPres->nsch[0] = 0;
1244: if (sPres->neighb[1]) {
1245: sPres->nsch[1] = (sr->dir)*(sPres->neighb[1]->inertia - sPres->inertia) - count1;
1246: if (sPres->nsch[1]<0)SETERRQ(PetscObjectComm((PetscObject)eps),1,"Mismatch between number of values found and information from inertia, consider using EPSKrylovSchurSetDetectZeros()");
1247: } else sPres->nsch[1] = (sr->dir)*(sr->inertia1 - sPres->inertia);
1249: /* Completing vector of indexes for deflation */
1250: idx0 = ini;
1251: idx1 = ini+count0+count1;
1252: k=0;
1253: for (i=idx0;i<idx1;i++) sr->idxDef[k++]=sr->perm[i];
1254: BVDuplicateResize(eps->V,k+eps->ncv+1,&sr->Vnext);
1255: BVSetNumConstraints(sr->Vnext,k);
1256: for (i=0;i<k;i++) {
1257: BVGetColumn(sr->Vnext,-i-1,&v);
1258: BVCopyVec(sr->V,sr->idxDef[i],v);
1259: BVRestoreColumn(sr->Vnext,-i-1,&v);
1260: }
1262: /* For rational Krylov */
1263: if (sr->nS>0 && (sr->sPrev == sr->sPres->neighb[0] || sr->sPrev == sr->sPres->neighb[1])) {
1264: EPSPrepareRational(eps);
1265: }
1266: eps->nconv = 0;
1267: /* Get rid of temporary Vnext */
1268: BVDestroy(&eps->V);
1269: eps->V = sr->Vnext;
1270: sr->Vnext = NULL;
1271: return(0);
1272: }
1276: PetscErrorCode EPSSolve_KrylovSchur_Slice(EPS eps)
1277: {
1278: PetscErrorCode ierr;
1279: PetscInt i,lds;
1280: PetscReal newS;
1281: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1282: EPS_SR sr=ctx->sr;
1285: if (ctx->global) {
1286: EPSSolve_KrylovSchur_Slice(ctx->eps);
1287: ctx->eps->state = EPS_STATE_SOLVED;
1288: eps->reason = EPS_CONVERGED_TOL;
1289: if (ctx->npart>1) {
1290: /* Gather solution from subsolvers */
1291: EPSSliceGatherSolution(eps);
1292: } else {
1293: eps->nconv = sr->numEigs;
1294: eps->its = ctx->eps->its;
1295: PetscFree(ctx->inertias);
1296: PetscFree(ctx->shifts);
1297: EPSSliceGetInertias(ctx->eps,&ctx->nshifts,&ctx->shifts,&ctx->inertias);
1298: }
1299: } else {
1300: if (ctx->npart==1) {
1301: sr->eigr = ctx->eps->eigr;
1302: sr->eigi = ctx->eps->eigi;
1303: sr->perm = ctx->eps->perm;
1304: sr->errest = ctx->eps->errest;
1305: sr->V = ctx->eps->V;
1306: }
1307: /* Only with eigenvalues present in the interval ...*/
1308: if (sr->numEigs==0) {
1309: eps->reason = EPS_CONVERGED_TOL;
1310: return(0);
1311: }
1312: /* Array of pending shifts */
1313: sr->maxPend = 100; /* Initial size */
1314: sr->nPend = 0;
1315: PetscMalloc1(sr->maxPend,&sr->pending);
1316: PetscLogObjectMemory((PetscObject)eps,(sr->maxPend)*sizeof(EPS_shift));
1317: EPSCreateShift(eps,sr->int0,NULL,NULL);
1318: /* extract first shift */
1319: sr->sPrev = NULL;
1320: sr->sPres = sr->pending[--sr->nPend];
1321: sr->sPres->inertia = sr->inertia0;
1322: eps->target = sr->sPres->value;
1323: sr->s0 = sr->sPres;
1324: sr->indexEig = 0;
1325: /* Memory reservation for auxiliary variables */
1326: lds = PetscMin(eps->mpd,eps->ncv);
1327: PetscCalloc1(lds*lds,&sr->S);
1328: PetscMalloc1(eps->ncv,&sr->back);
1329: PetscLogObjectMemory((PetscObject)eps,(sr->numEigs+2*eps->ncv)*sizeof(PetscScalar));
1330: for (i=0;i<sr->numEigs;i++) {
1331: sr->eigr[i] = 0.0;
1332: sr->eigi[i] = 0.0;
1333: sr->errest[i] = 0.0;
1334: sr->perm[i] = i;
1335: }
1336: /* Vectors for deflation */
1337: PetscMalloc1(sr->numEigs,&sr->idxDef);
1338: PetscLogObjectMemory((PetscObject)eps,sr->numEigs*sizeof(PetscInt));
1339: sr->indexEig = 0;
1340: /* Main loop */
1341: while (sr->sPres) {
1342: /* Search for deflation */
1343: EPSLookForDeflation(eps);
1344: /* KrylovSchur */
1345: EPSKrylovSchur_Slice(eps);
1347: EPSStoreEigenpairs(eps);
1348: /* Select new shift */
1349: if (!sr->sPres->comp[1]) {
1350: EPSGetNewShiftValue(eps,1,&newS);
1351: EPSCreateShift(eps,newS,sr->sPres,sr->sPres->neighb[1]);
1352: }
1353: if (!sr->sPres->comp[0]) {
1354: /* Completing earlier interval */
1355: EPSGetNewShiftValue(eps,0,&newS);
1356: EPSCreateShift(eps,newS,sr->sPres->neighb[0],sr->sPres);
1357: }
1358: /* Preparing for a new search of values */
1359: EPSExtractShift(eps);
1360: }
1362: /* Updating eps values prior to exit */
1363: PetscFree(sr->S);
1364: PetscFree(sr->idxDef);
1365: PetscFree(sr->pending);
1366: PetscFree(sr->back);
1367: BVDuplicateResize(eps->V,eps->ncv+1,&sr->Vnext);
1368: BVSetNumConstraints(sr->Vnext,0);
1369: BVDestroy(&eps->V);
1370: eps->V = sr->Vnext;
1371: eps->nconv = sr->indexEig;
1372: eps->reason = EPS_CONVERGED_TOL;
1373: eps->its = sr->itsKs;
1374: eps->nds = 0;
1375: }
1376: return(0);
1377: }