Actual source code: blzpack.c

  1: /*
  2:        This file implements a wrapper to the BLZPACK package

  4:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  5:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  6:    Copyright (c) 2002-2012, Universitat Politecnica de Valencia, Spain

  8:    This file is part of SLEPc.
  9:       
 10:    SLEPc is free software: you can redistribute it and/or modify it under  the
 11:    terms of version 3 of the GNU Lesser General Public License as published by
 12:    the Free Software Foundation.

 14:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY 
 15:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS 
 16:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for 
 17:    more details.

 19:    You  should have received a copy of the GNU Lesser General  Public  License
 20:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 21:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 22: */

 24: #include <slepc-private/epsimpl.h>    /*I "slepceps.h" I*/
 25: #include <slepc-private/stimpl.h>     /*I "slepcst.h" I*/
 26: #include <../src/eps/impls/external/blzpack/blzpackp.h>

 28: PetscErrorCode EPSSolve_BLZPACK(EPS);

 30: const char* blzpack_error[33] = {
 31:   "",
 32:   "illegal data, LFLAG ",
 33:   "illegal data, dimension of (U), (V), (X) ",
 34:   "illegal data, leading dimension of (U), (V), (X) ",
 35:   "illegal data, leading dimension of (EIG) ",
 36:   "illegal data, number of required eigenpairs ",
 37:   "illegal data, Lanczos algorithm block size ",
 38:   "illegal data, maximum number of steps ",
 39:   "illegal data, number of starting vectors ",
 40:   "illegal data, number of eigenpairs provided ",
 41:   "illegal data, problem type flag ",
 42:   "illegal data, spectrum slicing flag ",
 43:   "illegal data, eigenvectors purification flag ",
 44:   "illegal data, level of output ",
 45:   "illegal data, output file unit ",
 46:   "illegal data, LCOMM (MPI or PVM) ",
 47:   "illegal data, dimension of ISTOR ",
 48:   "illegal data, convergence threshold ",
 49:   "illegal data, dimension of RSTOR ",
 50:   "illegal data on at least one PE ",
 51:   "ISTOR(3:14) must be equal on all PEs ",
 52:   "RSTOR(1:3) must be equal on all PEs ",
 53:   "not enough space in ISTOR to start eigensolution ",
 54:   "not enough space in RSTOR to start eigensolution ",
 55:   "illegal data, number of negative eigenvalues ",
 56:   "illegal data, entries of V ",
 57:   "illegal data, entries of X ",
 58:   "failure in computational subinterval ",
 59:   "file I/O error, blzpack.__.BQ ",
 60:   "file I/O error, blzpack.__.BX ",
 61:   "file I/O error, blzpack.__.Q ",
 62:   "file I/O error, blzpack.__.X ",
 63:   "parallel interface error "
 64: };

 68: PetscErrorCode EPSSetUp_BLZPACK(EPS eps)
 69: {
 71:   PetscInt       listor,lrstor,ncuv,k1,k2,k3,k4;
 72:   EPS_BLZPACK    *blz = (EPS_BLZPACK *)eps->data;
 73:   PetscBool      issinv;

 76:   if (eps->ncv) {
 77:     if (eps->ncv < PetscMin(eps->nev+10,eps->nev*2)) SETERRQ(((PetscObject)eps)->comm,0,"Warning: BLZpack recommends that ncv be larger than min(nev+10,nev*2)");
 78:   }
 79:   else eps->ncv = PetscMin(eps->nev+10,eps->nev*2);
 80:   if (eps->mpd) { PetscInfo(eps,"Warning: parameter mpd ignored\n"); }
 81:   if (!eps->max_it) eps->max_it = PetscMax(1000,eps->n);

 83:   if (!eps->ishermitian) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_SUP,"Requested method is only available for Hermitian problems");
 84:   if (eps->which==EPS_ALL) {
 85:     if (eps->inta==0.0 && eps->intb==0.0) SETERRQ(((PetscObject)eps)->comm,1,"Must define a computational interval when using EPS_ALL");
 86:     blz->slice = 1;
 87:   }
 88:   PetscObjectTypeCompare((PetscObject)eps->OP,STSINVERT,&issinv);
 89:   if (blz->slice || eps->isgeneralized) {
 90:     if (!issinv) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_SUP,"Shift-and-invert ST is needed for generalized problems or spectrum slicing");
 91:   }
 92:   if (blz->slice) {
 93:     if (eps->intb >= PETSC_MAX_REAL) { /* right-open interval */
 94:       if (eps->inta <= PETSC_MIN_REAL) SETERRQ(((PetscObject)eps)->comm,1,"The defined computational interval should have at least one of their sides bounded");
 95:       STSetDefaultShift(eps->OP,eps->inta);
 96:     }
 97:     else { STSetDefaultShift(eps->OP,eps->intb); }
 98:   }
 99:   if (!eps->which) {
100:     if (issinv) eps->which = EPS_TARGET_REAL;
101:     else eps->which = EPS_SMALLEST_REAL;
102:   }
103:   if ((issinv && eps->which!=EPS_TARGET_REAL && eps->which!=EPS_TARGET_MAGNITUDE && eps->which!=EPS_ALL) || (!issinv && eps->which!=EPS_SMALLEST_REAL)) SETERRQ(((PetscObject)eps)->comm,1,"Wrong value of eps->which");
104:   if (eps->arbit_func) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_SUP,"Arbitrary selection of eigenpairs not supported in this solver");

106:   k1 = PetscMin(eps->n,180);
107:   k2 = blz->block_size;
108:   k4 = PetscMin(eps->ncv,eps->n);
109:   k3 = 484+k1*(13+k1*2+k2+PetscMax(18,k2+2))+k2*k2*3+k4*2;

111:   listor = 123+k1*12;
112:   PetscFree(blz->istor);
113:   PetscMalloc((17+listor)*sizeof(PetscBLASInt),&blz->istor);
114:   blz->istor[14] = PetscBLASIntCast(listor);

116:   if (blz->slice) lrstor = eps->nloc*(k2*4+k1*2+k4)+k3;
117:   else lrstor = eps->nloc*(k2*4+k1)+k3;
118: lrstor*=10;
119:   PetscFree(blz->rstor);
120:   PetscMalloc((4+lrstor)*sizeof(PetscReal),&blz->rstor);
121:   blz->rstor[3] = lrstor;

123:   ncuv = PetscMax(3,blz->block_size);
124:   PetscFree(blz->u);
125:   PetscMalloc(ncuv*eps->nloc*sizeof(PetscScalar),&blz->u);
126:   PetscFree(blz->v);
127:   PetscMalloc(ncuv*eps->nloc*sizeof(PetscScalar),&blz->v);

129:   PetscFree(blz->eig);
130:   PetscMalloc(2*eps->ncv*sizeof(PetscReal),&blz->eig);

132:   if (eps->extraction) { PetscInfo(eps,"Warning: extraction type ignored\n"); }

134:   EPSAllocateSolution(eps);

136:   /* dispatch solve method */
137:   if (eps->leftvecs) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_SUP,"Left vectors not supported in this solver");
138:   eps->ops->solve = EPSSolve_BLZPACK;
139:   return(0);
140: }

144: PetscErrorCode EPSSolve_BLZPACK(EPS eps)
145: {
147:   EPS_BLZPACK    *blz = (EPS_BLZPACK *)eps->data;
148:   PetscInt       nn;
149:   PetscBLASInt   i,nneig,lflag,nvopu;
150:   Vec            x,y;
151:   PetscScalar    sigma,*pV;
152:   Mat            A;
153:   KSP            ksp;
154:   PC             pc;
155: 
157:   VecCreateMPIWithArray(((PetscObject)eps)->comm,1,eps->nloc,PETSC_DECIDE,PETSC_NULL,&x);
158:   VecCreateMPIWithArray(((PetscObject)eps)->comm,1,eps->nloc,PETSC_DECIDE,PETSC_NULL,&y);
159:   VecGetArray(eps->V[0],&pV);
160: 
161:   if (eps->isgeneralized && !blz->slice) {
162:     STGetShift(eps->OP,&sigma); /* shift of origin */
163:     blz->rstor[0]  = sigma;        /* lower limit of eigenvalue interval */
164:     blz->rstor[1]  = sigma;        /* upper limit of eigenvalue interval */
165:   } else {
166:     sigma = 0.0;
167:     blz->rstor[0]  = eps->inta;    /* lower limit of eigenvalue interval */
168:     blz->rstor[1]  = eps->intb;    /* upper limit of eigenvalue interval */
169:   }
170:   nneig = 0;                       /* no. of eigs less than sigma */

172:   blz->istor[0]  = PetscBLASIntCast(eps->nloc); /* number of rows of U, V, X*/
173:   blz->istor[1]  = PetscBLASIntCast(eps->nloc); /* leading dimension of U, V, X */
174:   blz->istor[2]  = PetscBLASIntCast(eps->nev); /* number of required eigenpairs */
175:   blz->istor[3]  = PetscBLASIntCast(eps->ncv); /* number of working eigenpairs */
176:   blz->istor[4]  = blz->block_size;    /* number of vectors in a block */
177:   blz->istor[5]  = blz->nsteps;  /* maximun number of steps per run */
178:   blz->istor[6]  = 1;            /* number of starting vectors as input */
179:   blz->istor[7]  = 0;            /* number of eigenpairs given as input */
180:   blz->istor[8]  = (blz->slice || eps->isgeneralized) ? 1 : 0;   /* problem type */
181:   blz->istor[9]  = blz->slice;   /* spectrum slicing */
182:   blz->istor[10] = eps->isgeneralized ? 1 : 0;   /* solutions refinement (purify) */
183:   blz->istor[11] = 0;            /* level of printing */
184:   blz->istor[12] = 6;            /* file unit for output */
185:   blz->istor[13] = PetscBLASIntCast(MPI_Comm_c2f(((PetscObject)eps)->comm)); /* communicator */

187:   blz->rstor[2]  = eps->tol;     /* threshold for convergence */

189:   lflag = 0;           /* reverse communication interface flag */

191:   do {
192:     BLZpack_(blz->istor,blz->rstor,&sigma,&nneig,blz->u,blz->v,&lflag,&nvopu,blz->eig,pV);

194:     switch (lflag) {
195:     case 1:
196:       /* compute v = OP u */
197:       for (i=0;i<nvopu;i++) {
198:         VecPlaceArray(x,blz->u+i*eps->nloc);
199:         VecPlaceArray(y,blz->v+i*eps->nloc);
200:         if (blz->slice || eps->isgeneralized) {
201:           STAssociatedKSPSolve(eps->OP,x,y);
202:         } else {
203:           STApply(eps->OP,x,y);
204:         }
205:         IPOrthogonalize(eps->ip,0,PETSC_NULL,eps->nds,PETSC_NULL,eps->defl,y,PETSC_NULL,PETSC_NULL,PETSC_NULL);
206:         VecResetArray(x);
207:         VecResetArray(y);
208:       }
209:       /* monitor */
210:       eps->nconv  = BLZistorr_(blz->istor,"NTEIG",5);
211:       EPSMonitor(eps,eps->its,eps->nconv,
212:         blz->rstor+BLZistorr_(blz->istor,"IRITZ",5),
213:         eps->eigi,
214:         blz->rstor+BLZistorr_(blz->istor,"IRITZ",5)+BLZistorr_(blz->istor,"JT",2),
215:         BLZistorr_(blz->istor,"NRITZ",5));
216:       eps->its = eps->its + 1;
217:       if (eps->its >= eps->max_it || eps->nconv >= eps->nev) lflag = 5;
218:       break;
219:     case 2:
220:       /* compute v = B u */
221:       for (i=0;i<nvopu;i++) {
222:         VecPlaceArray(x,blz->u+i*eps->nloc);
223:         VecPlaceArray(y,blz->v+i*eps->nloc);
224:         IPApplyMatrix(eps->ip,x,y);
225:         VecResetArray(x);
226:         VecResetArray(y);
227:       }
228:       break;
229:     case 3:
230:       /* update shift */
231:       PetscInfo1(eps,"Factorization update (sigma=%g)\n",sigma);
232:       STSetShift(eps->OP,sigma);
233:       STGetKSP(eps->OP,&ksp);
234:       KSPGetPC(ksp,&pc);
235:       PCFactorGetMatrix(pc,&A);
236:       MatGetInertia(A,&nn,PETSC_NULL,PETSC_NULL);
237:       nneig = PetscBLASIntCast(nn);
238:       break;
239:     case 4:
240:       /* copy the initial vector */
241:       VecPlaceArray(x,blz->v);
242:       EPSGetStartVector(eps,0,x,PETSC_NULL);
243:       VecResetArray(x);
244:       break;
245:     }
246: 
247:   } while (lflag > 0);

249:   VecRestoreArray(eps->V[0],&pV);

251:   eps->nconv  = BLZistorr_(blz->istor,"NTEIG",5);
252:   eps->reason = EPS_CONVERGED_TOL;

254:   for (i=0;i<eps->nconv;i++) {
255:     eps->eigr[i]=blz->eig[i];
256:   }

258:   if (lflag!=0) {
259:     char msg[2048] = "";
260:     for (i = 0; i < 33; i++) {
261:       if (blz->istor[15] & (1 << i)) PetscStrcat(msg,blzpack_error[i]);
262:     }
263:     SETERRQ2(((PetscObject)eps)->comm,PETSC_ERR_LIB,"Error in BLZPACK (code=%d): '%s'",blz->istor[15],msg);
264:   }
265:   VecDestroy(&x);
266:   VecDestroy(&y);
267:   return(0);
268: }

272: PetscErrorCode EPSBackTransform_BLZPACK(EPS eps)
273: {
275:   EPS_BLZPACK    *blz = (EPS_BLZPACK *)eps->data;

278:   if (!blz->slice && !eps->isgeneralized) {
279:     EPSBackTransform_Default(eps);
280:   }
281:   return(0);
282: }

286: PetscErrorCode EPSReset_BLZPACK(EPS eps)
287: {
289:   EPS_BLZPACK    *blz = (EPS_BLZPACK *)eps->data;

292:   PetscFree(blz->istor);
293:   PetscFree(blz->rstor);
294:   PetscFree(blz->u);
295:   PetscFree(blz->v);
296:   PetscFree(blz->eig);
297:   EPSFreeSolution(eps);
298:   return(0);
299: }

303: PetscErrorCode EPSDestroy_BLZPACK(EPS eps)
304: {

308:   PetscFree(eps->data);
309:   PetscObjectComposeFunctionDynamic((PetscObject)eps,"EPSBlzpackSetBlockSize_C","",PETSC_NULL);
310:   PetscObjectComposeFunctionDynamic((PetscObject)eps,"EPSBlzpackSetNSteps_C","",PETSC_NULL);
311:   return(0);
312: }

316: PetscErrorCode EPSView_BLZPACK(EPS eps,PetscViewer viewer)
317: {
319:   EPS_BLZPACK    *blz = (EPS_BLZPACK*)eps->data;
320:   PetscBool      isascii;

323:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
324:   if (!isascii) SETERRQ1(((PetscObject)eps)->comm,1,"Viewer type %s not supported for EPSBLZPACK",((PetscObject)viewer)->type_name);
325:   PetscViewerASCIIPrintf(viewer,"  BLZPACK: block size=%d\n",blz->block_size);
326:   PetscViewerASCIIPrintf(viewer,"  BLZPACK: maximum number of steps per run=%d\n",blz->nsteps);
327:   if (blz->slice) {
328:     PetscViewerASCIIPrintf(viewer,"  BLZPACK: computational interval [%f,%f]\n",eps->inta,eps->intb);
329:   }
330:   return(0);
331: }

335: PetscErrorCode EPSSetFromOptions_BLZPACK(EPS eps)
336: {
338:   EPS_BLZPACK    *blz = (EPS_BLZPACK *)eps->data;
339:   PetscInt       bs,n;
340:   PetscBool      flg;

343:   PetscOptionsHead("EPS BLZPACK Options");

345:   bs = blz->block_size;
346:   PetscOptionsInt("-eps_blzpack_block_size","Block size","EPSBlzpackSetBlockSize",bs,&bs,&flg);
347:   if (flg) {EPSBlzpackSetBlockSize(eps,bs);}

349:   n = blz->nsteps;
350:   PetscOptionsInt("-eps_blzpack_nsteps","Number of steps","EPSBlzpackSetNSteps",n,&n,&flg);
351:   if (flg) {EPSBlzpackSetNSteps(eps,n);}

353:   PetscOptionsTail();
354:   return(0);
355: }

357: EXTERN_C_BEGIN
360: PetscErrorCode EPSBlzpackSetBlockSize_BLZPACK(EPS eps,PetscInt bs)
361: {
362:   EPS_BLZPACK *blz = (EPS_BLZPACK*)eps->data;;

365:   if (bs == PETSC_DEFAULT) blz->block_size = 3;
366:   else if (bs <= 0) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Block size must be positive");
367:   else blz->block_size = PetscBLASIntCast(bs);
368:   return(0);
369: }
370: EXTERN_C_END

374: /*@
375:    EPSBlzpackSetBlockSize - Sets the block size for the BLZPACK package.

377:    Collective on EPS

379:    Input Parameters:
380: +  eps - the eigenproblem solver context
381: -  bs - block size

383:    Options Database Key:
384: .  -eps_blzpack_block_size - Sets the value of the block size

386:    Level: advanced
387: @*/
388: PetscErrorCode EPSBlzpackSetBlockSize(EPS eps,PetscInt bs)
389: {

395:   PetscTryMethod(eps,"EPSBlzpackSetBlockSize_C",(EPS,PetscInt),(eps,bs));
396:   return(0);
397: }

399: EXTERN_C_BEGIN
402: PetscErrorCode EPSBlzpackSetNSteps_BLZPACK(EPS eps,PetscInt nsteps)
403: {
404:   EPS_BLZPACK *blz = (EPS_BLZPACK*)eps->data;

407:   if (nsteps == PETSC_DEFAULT) blz->nsteps = 0;
408:   else { blz->nsteps = PetscBLASIntCast(nsteps); }
409:   return(0);
410: }
411: EXTERN_C_END

415: /*@
416:    EPSBlzpackSetNSteps - Sets the maximum number of steps per run for the BLZPACK
417:    package.

419:    Collective on EPS

421:    Input Parameters:
422: +  eps     - the eigenproblem solver context
423: -  nsteps  - maximum number of steps

425:    Options Database Key:
426: .  -eps_blzpack_nsteps - Sets the maximum number of steps per run

428:    Level: advanced

430: @*/
431: PetscErrorCode EPSBlzpackSetNSteps(EPS eps,PetscInt nsteps)
432: {

438:   PetscTryMethod(eps,"EPSBlzpackSetNSteps_C",(EPS,PetscInt),(eps,nsteps));
439:   return(0);
440: }

442: EXTERN_C_BEGIN
445: PetscErrorCode EPSCreate_BLZPACK(EPS eps)
446: {
448:   EPS_BLZPACK    *blzpack;

451:   PetscNewLog(eps,EPS_BLZPACK,&blzpack);
452:   eps->data                      = (void*)blzpack;
453:   eps->ops->setup                = EPSSetUp_BLZPACK;
454:   eps->ops->setfromoptions       = EPSSetFromOptions_BLZPACK;
455:   eps->ops->destroy              = EPSDestroy_BLZPACK;
456:   eps->ops->reset                = EPSReset_BLZPACK;
457:   eps->ops->view                 = EPSView_BLZPACK;
458:   eps->ops->backtransform        = EPSBackTransform_BLZPACK;
459:   eps->ops->computevectors       = EPSComputeVectors_Default;

461:   blzpack->block_size = 3;
462:   blzpack->slice = 0;
463:   blzpack->nsteps = 0;

465:   PetscObjectComposeFunctionDynamic((PetscObject)eps,"EPSBlzpackSetBlockSize_C","EPSBlzpackSetBlockSize_BLZPACK",EPSBlzpackSetBlockSize_BLZPACK);
466:   PetscObjectComposeFunctionDynamic((PetscObject)eps,"EPSBlzpackSetNSteps_C","EPSBlzpackSetNSteps_BLZPACK",EPSBlzpackSetNSteps_BLZPACK);
467:   return(0);
468: }
469: EXTERN_C_END