Actual source code: dvd_improvex.c

  1: /*
  2:   SLEPc eigensolver: "davidson"

  4:   Step: improve the eigenvectors X

  6:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  7:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  8:    Copyright (c) 2002-2012, Universitat Politecnica de Valencia, Spain

 10:    This file is part of SLEPc.
 11:       
 12:    SLEPc is free software: you can redistribute it and/or modify it under  the
 13:    terms of version 3 of the GNU Lesser General Public License as published by
 14:    the Free Software Foundation.

 16:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY 
 17:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS 
 18:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for 
 19:    more details.

 21:    You  should have received a copy of the GNU Lesser General  Public  License
 22:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 23:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 24: */

 26:  #include davidson.h
 27: #include <slepcvec.h>
 28: #include <slepcblaslapack.h>

 30: PetscErrorCode dvd_improvex_PfuncV(dvdDashboard *d, void *funcV, Vec *D,
 31:   PetscInt max_size_D, PetscInt r_s, PetscInt r_e, Vec *auxV,
 32:   PetscScalar *auxS);
 33: PetscErrorCode dvd_pcapplyba(PC pc,PCSide side,Vec in,Vec out,Vec w);
 34: PetscErrorCode dvd_pcapply(PC pc,Vec in,Vec out);
 35: PetscErrorCode dvd_pcapplytrans(PC pc,Vec in,Vec out);
 36: PetscErrorCode dvd_matmult_jd(Mat A,Vec in,Vec out);
 37: PetscErrorCode dvd_matmulttrans_jd(Mat A,Vec in,Vec out);
 38: PetscErrorCode dvd_matgetvecs_jd(Mat A, Vec *right, Vec *left);
 39: PetscErrorCode dvd_improvex_jd_d(dvdDashboard *d);
 40: PetscErrorCode dvd_improvex_jd_start(dvdDashboard *d);
 41: PetscErrorCode dvd_improvex_jd_end(dvdDashboard *d);
 42: PetscErrorCode dvd_improvex_jd_gen(dvdDashboard *d, Vec *D,
 43:                                    PetscInt max_size_D, PetscInt r_s,
 44:                                    PetscInt r_e, PetscInt *size_D);
 45: PetscErrorCode dvd_improvex_jd_proj_cuv(dvdDashboard *d, PetscInt i_s,
 46:   PetscInt i_e, Vec **u, Vec **v, Vec *kr, Vec **auxV, PetscScalar **auxS,
 47:   PetscScalar *theta, PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY,
 48:   PetscInt ld);
 49: PetscErrorCode dvd_improvex_jd_proj_uv_KXX(dvdDashboard *d, PetscInt i_s,
 50:   PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
 51:   PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld);
 52: PetscErrorCode dvd_improvex_jd_proj_uv_KZX(dvdDashboard *d, PetscInt i_s,
 53:   PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
 54:   PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld);
 55: PetscErrorCode dvd_improvex_jd_lit_const_0(dvdDashboard *d, PetscInt i,
 56:   PetscScalar* theta, PetscScalar* thetai,
 57:   PetscInt *maxits, PetscReal *tol);
 58: PetscErrorCode dvd_improvex_apply_proj(dvdDashboard *d, Vec *V, PetscInt cV, PetscScalar *auxS);
 59: PetscErrorCode dvd_improvex_applytrans_proj(dvdDashboard *d, Vec *V, PetscInt cV, PetscScalar *auxS);

 61: #define size_Z (64*4)

 63: /**** JD update step (I - Kfg'/(g'Kf)) K(A - sB) (I - Kfg'/(g'Kf)) t = (I - Kfg'/(g'Kf))r  ****/

 65: typedef struct {
 66:   PetscInt size_X;
 67:   void
 68:     *old_improveX_data;   /* old improveX_data */
 69:   improveX_type
 70:     old_improveX;         /* old improveX */
 71:   KSP ksp;                /* correction equation solver */
 72:   Vec
 73:     friends,              /* reference vector for composite vectors */
 74:     *auxV;                /* auxiliar vectors */
 75:   PetscScalar *auxS,      /* auxiliar scalars */
 76:     *theta,
 77:     *thetai;              /* the shifts used in the correction eq. */
 78:   PetscInt maxits,        /* maximum number of iterations */
 79:     r_s, r_e,             /* the selected eigenpairs to improve */
 80:     ksp_max_size;         /* the ksp maximum subvectors size */
 81:   PetscReal tol,          /* the maximum solution tolerance */
 82:     lastTol,              /* last tol for dynamic stopping criterion */
 83:     fix;                  /* tolerance for using the approx. eigenvalue */
 84:   PetscBool
 85:     dynamic;              /* if the dynamic stopping criterion is applied */
 86:   dvdDashboard
 87:     *d;                   /* the currect dvdDashboard reference */
 88:   PC old_pc;              /* old pc in ksp */
 89:   Vec
 90:     *u,                   /* new X vectors */
 91:     *real_KZ,             /* original KZ */
 92:     *KZ;                  /* KZ vecs for the projector KZ*inv(X'*KZ)*X' */
 93:   PetscScalar
 94:    *XKZ,                  /* X'*KZ */
 95:    *iXKZ;                 /* inverse of XKZ */
 96:   PetscInt
 97:     ldXKZ,                /* leading dimension of XKZ */
 98:     size_iXKZ,            /* size of iXKZ */
 99:     ldiXKZ,               /* leading dimension of iXKZ */
100:     size_KZ,              /* size of converged KZ */
101:     size_real_KZ,         /* original size of KZ */
102:     size_cX,              /* last value of d->size_cX */
103:     old_size_X;           /* last number of improved vectors */
104:   PetscBLASInt
105:     *iXKZPivots;          /* array of pivots */
106: } dvdImprovex_jd;

108: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmult(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV);
109: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmulttrans(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV);

113: PetscErrorCode dvd_improvex_jd(dvdDashboard *d,dvdBlackboard *b,KSP ksp,PetscInt max_bs,PetscInt cX_impr,PetscBool dynamic)
114: {
115:   PetscErrorCode  ierr;
116:   dvdImprovex_jd  *data;
117:   PetscBool       useGD,her_probl,std_probl;
118:   PC              pc;
119:   PetscInt        size_P,s=1;

122:   std_probl = DVD_IS(d->sEP,DVD_EP_STD)?PETSC_TRUE:PETSC_FALSE;
123:   her_probl = DVD_IS(d->sEP,DVD_EP_HERMITIAN)?PETSC_TRUE:PETSC_FALSE;

125:   /* Setting configuration constrains */
126:   PetscObjectTypeCompare((PetscObject)ksp,KSPPREONLY,&useGD);

128:   /* If the arithmetic is real and the problem is not Hermitian, then
129:      the block size is incremented in one */
130: #if !defined(PETSC_USE_COMPLEX)
131:   if (!her_probl) {
132:     max_bs++;
133:     b->max_size_P = PetscMax(b->max_size_P,2);
134:     s = 2;
135:   } else
136: #endif
137:     b->max_size_P = PetscMax(b->max_size_P,1);
138:   b->max_size_X = PetscMax(b->max_size_X,max_bs);
139:   size_P = b->max_size_P+cX_impr;
140:   b->max_size_auxV = PetscMax(b->max_size_auxV,
141:      b->max_size_X*3+(useGD?0:2)+ /* u, kr, auxV(max_size_X+2?) */
142:      ((her_probl || !d->eps->trueres)?1:PetscMax(s*2,b->max_size_cX_proj+b->max_size_X))); /* testConv */
143: 
144:   b->own_scalars+= size_P*size_P; /* XKZ */
145:   b->max_size_auxS = PetscMax(b->max_size_auxS,
146:     b->max_size_X*3 + /* theta, thetai */
147:     size_P*size_P + /* iXKZ */
148:     FromIntToScalar(size_P) + /* iXkZPivots */
149:     PetscMax(PetscMax(
150:       3*b->max_size_proj*b->max_size_X, /* dvd_improvex_apply_proj */
151:       8*cX_impr*b->max_size_X), /* dvd_improvex_jd_proj_cuv_KZX */
152:       (her_probl || !d->eps->trueres)?0:b->max_nev*b->max_nev+PetscMax(b->max_nev*6,(b->max_nev+b->max_size_proj)*s+b->max_nev*(b->max_size_X+b->max_size_cX_proj)*(std_probl?2:4)+64))); /* preTestConv */
153:   b->own_vecs+= size_P; /* KZ */

155:   /* Setup the preconditioner */
156:   if (ksp) {
157:     KSPGetPC(ksp,&pc);
158:     dvd_static_precond_PC(d,b,pc);
159:   } else {
160:     dvd_static_precond_PC(d,b,0);
161:   }

163:   /* Setup the step */
164:   if (b->state >= DVD_STATE_CONF) {
165:     PetscMalloc(sizeof(dvdImprovex_jd),&data);
166:     data->dynamic = dynamic;
167:     data->size_real_KZ = size_P;
168:     data->real_KZ = b->free_vecs; b->free_vecs+= data->size_real_KZ;
169:     d->max_cX_in_impr = cX_impr;
170:     data->XKZ = b->free_scalars; b->free_scalars+= size_P*size_P;
171:     data->ldXKZ = size_P;
172:     data->size_X = b->max_size_X;
173:     data->old_improveX_data = d->improveX_data;
174:     d->improveX_data = data;
175:     data->old_improveX = d->improveX;
176:     data->ksp = useGD?PETSC_NULL:ksp;
177:     data->d = d;
178:     d->improveX = dvd_improvex_jd_gen;
179:     data->ksp_max_size = max_bs;

181:     DVD_FL_ADD(d->startList,dvd_improvex_jd_start);
182:     DVD_FL_ADD(d->endList,dvd_improvex_jd_end);
183:     DVD_FL_ADD(d->destroyList,dvd_improvex_jd_d);
184:   }

186:   return(0);
187: }


192: PetscErrorCode dvd_improvex_jd_start(dvdDashboard *d)
193: {
194:   PetscErrorCode  ierr;
195:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;
196:   PetscInt        rA, cA, rlA, clA;
197:   Mat             A;
198:   PetscBool       t;
199:   PC              pc;


203:   data->KZ = data->real_KZ;
204:   data->size_KZ = data->size_cX = data->old_size_X = 0;
205:   data->lastTol = data->dynamic?0.5:0.0;

207:   /* Setup the ksp */
208:   if(data->ksp) {
209:     /* Create the reference vector */
210:     VecCreateCompWithVecs(d->V, data->ksp_max_size, PETSC_NULL,
211:                                  &data->friends);

213:     /* Save the current pc and set a PCNONE */
214:     KSPGetPC(data->ksp, &data->old_pc);
215:     PetscObjectTypeCompare((PetscObject)data->old_pc, PCNONE, &t);
216: 
217:     data->lastTol = 0.5;
218:     if (t) {
219:       data->old_pc = 0;
220:     } else {
221:       PetscObjectReference((PetscObject)data->old_pc);
222:       PCCreate(((PetscObject)d->eps)->comm, &pc);
223:       PCSetType(pc, PCSHELL);
224:       PCSetOperators(pc, d->A, d->A, SAME_PRECONDITIONER);
225:       PCShellSetApply(pc,dvd_pcapply);
226:       PCShellSetApplyBA(pc,dvd_pcapplyba);
227:       PCShellSetApplyTranspose(pc,dvd_pcapplytrans);
228:       KSPSetPC(data->ksp, pc);
229:       PCDestroy(&pc);
230:     }

232:     /* Create the (I-v*u')*K*(A-s*B) matrix */
233:     MatGetSize(d->A, &rA, &cA);
234:     MatGetLocalSize(d->A, &rlA, &clA);
235:     MatCreateShell(((PetscObject)d->A)->comm, rlA*data->ksp_max_size,
236:                           clA*data->ksp_max_size, rA*data->ksp_max_size,
237:                           cA*data->ksp_max_size, data, &A);
238:     MatShellSetOperation(A, MATOP_MULT,
239:                                 (void(*)(void))dvd_matmult_jd);
240:     MatShellSetOperation(A, MATOP_MULT_TRANSPOSE,
241:                                 (void(*)(void))dvd_matmulttrans_jd);
242:     MatShellSetOperation(A, MATOP_GET_VECS,
243:                                 (void(*)(void))dvd_matgetvecs_jd);
244: 

246:     /* Try to avoid KSPReset */
247:     KSPGetOperatorsSet(data->ksp,&t,PETSC_NULL);
248:     if (t) {
249:       Mat M;
250:       PetscInt rM;
251:       KSPGetOperators(data->ksp,&M,PETSC_NULL,PETSC_NULL);
252:       MatGetSize(M,&rM,PETSC_NULL);
253:       if (rM != rA*data->ksp_max_size) { KSPReset(data->ksp); }
254:     }
255:     KSPSetOperators(data->ksp, A, A, SAME_PRECONDITIONER);
256: 
257:     KSPSetUp(data->ksp);
258:     MatDestroy(&A);
259:   } else {
260:     data->old_pc = 0;
261:     data->friends = PETSC_NULL;
262:   }
263: 
264:   return(0);
265: }


270: PetscErrorCode dvd_improvex_jd_end(dvdDashboard *d)
271: {
272:   PetscErrorCode  ierr;
273:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;


277:   if (data->friends) { VecDestroy(&data->friends);  }

279:   /* Restore the pc of ksp */
280:   if (data->old_pc) {
281:     KSPSetPC(data->ksp, data->old_pc);
282:     PCDestroy(&data->old_pc);
283:   }

285:   return(0);
286: }


291: PetscErrorCode dvd_improvex_jd_d(dvdDashboard *d)
292: {
293:   PetscErrorCode  ierr;
294:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;

297: 
298:   /* Restore changes in dvdDashboard */
299:   d->improveX_data = data->old_improveX_data;

301:   /* Free local data and objects */
302:   PetscFree(data);

304:   return(0);
305: }


310: PetscErrorCode dvd_improvex_jd_gen(dvdDashboard *d,Vec *D,PetscInt max_size_D,PetscInt r_s,PetscInt r_e,PetscInt *size_D)
311: {
312:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;
313:   PetscErrorCode  ierr;
314:   PetscInt        i,j,n,maxits,maxits0,lits,s,ld,k;
315:   PetscScalar     *pX,*pY,*auxS = d->auxS,*auxS0;
316:   PetscReal       tol,tol0;
317:   Vec             *u,*v,*kr,kr_comp,D_comp;


321:   /* Quick exit */
322:   if ((max_size_D == 0) || r_e-r_s <= 0) {
323:    *size_D = 0;
324:    /* Callback old improveX */
325:     if (data->old_improveX) {
326:       d->improveX_data = data->old_improveX_data;
327:       data->old_improveX(d, PETSC_NULL, 0, 0, 0, PETSC_NULL);
328:       d->improveX_data = data;
329:     }
330:     return(0);
331:   }
332: 
333:   n = PetscMin(PetscMin(data->size_X, max_size_D), r_e-r_s);
334:   if (n == 0) SETERRQ(PETSC_COMM_SELF,1, "n == 0");
335:   if (data->size_X < r_e-r_s) SETERRQ(PETSC_COMM_SELF,1, "size_X < r_e-r_s");

337:   DSGetLeadingDimension(d->ps,&ld);

339:   /* Restart lastTol if a new pair converged */
340:   if (data->dynamic && data->size_cX < d->size_cX)
341:     data->lastTol = 0.5;

343:   for(i=0, s=0, auxS0=auxS; i<n; i+=s) {
344:     /* If the selected eigenvalue is complex, but the arithmetic is real... */
345: #if !defined(PETSC_USE_COMPLEX)
346:     if (d->eigi[i] != 0.0) {
347:       if (i+2 <= max_size_D) s=2; else break;
348:     } else
349: #endif
350:       s=1;

352:     data->auxV = d->auxV;
353:     data->r_s = r_s+i; data->r_e = r_s+i+s;
354:     auxS = auxS0;
355:     data->theta = auxS; auxS+= 2*s;
356:     data->thetai = auxS; auxS+= s;
357:     kr = data->auxV; data->auxV+= s;

359:     /* Compute theta, maximum iterations and tolerance */
360:     maxits = 0; tol = 1;
361:     for(j=0; j<s; j++) {
362:       d->improvex_jd_lit(d, r_s+i+j, &data->theta[2*j],
363:                                 &data->thetai[j], &maxits0, &tol0);
364: 
365:       maxits+= maxits0; tol*= tol0;
366:     }
367:     maxits/= s; tol = data->dynamic?data->lastTol:exp(log(tol)/s);

369:     /* Compute u, v and kr */
370:     k = r_s+i+d->cX_in_H;
371:     DSVectors(d->ps,DS_MAT_X,&k,PETSC_NULL);
372:     DSNormalize(d->ps,DS_MAT_X,r_s+i+d->cX_in_H);
373:     k = r_s+i+d->cX_in_H;
374:     DSVectors(d->ps,DS_MAT_Y,&k,PETSC_NULL);
375:     DSNormalize(d->ps,DS_MAT_Y,r_s+i+d->cX_in_H);
376:     DSGetArray(d->ps,DS_MAT_X,&pX);
377:     DSGetArray(d->ps,DS_MAT_Y,&pY);
378:     dvd_improvex_jd_proj_cuv(d,r_s+i,r_s+i+s,&u,&v,kr,&data->auxV,&auxS,data->theta,data->thetai,pX,pY,ld);
379:     DSRestoreArray(d->ps,DS_MAT_X,&pX);
380:     DSRestoreArray(d->ps,DS_MAT_Y,&pY);
381:     data->u = u;

383:     /* Check if the first eigenpairs are converged */
384:     if (i == 0) {
385:       PetscInt n_auxV = data->auxV-d->auxV+s, n_auxS = auxS - d->auxS;
386:       d->auxV+= n_auxV; d->size_auxV-= n_auxV;
387:       d->auxS+= n_auxS; d->size_auxS-= n_auxS;
388:       d->preTestConv(d,0,s,s,d->auxV-s,PETSC_NULL,&d->npreconv);
389:       d->auxV-= n_auxV; d->size_auxV+= n_auxV;
390:       d->auxS-= n_auxS; d->size_auxS+= n_auxS;
391:       if (d->npreconv > 0) break;
392:     }

394:     /* If JD */
395:     if (data->ksp) {
396:       data->auxS = auxS;

398:       /* kr <- -kr */
399:       for(j=0; j<s; j++) {
400:         VecScale(kr[j], -1.0);
401:       }

403:       /* Compouse kr and D */
404:       VecCreateCompWithVecs(kr, data->ksp_max_size, data->friends,
405:                                    &kr_comp);
406:       VecCreateCompWithVecs(&D[i], data->ksp_max_size, data->friends,
407:                                    &D_comp);
408:       VecCompSetSubVecs(data->friends,s,PETSC_NULL);
409: 
410:       /* Solve the correction equation */
411:       KSPSetTolerances(data->ksp, tol, PETSC_DEFAULT, PETSC_DEFAULT,
412:                               maxits);
413:       KSPSolve(data->ksp, kr_comp, D_comp);
414:       KSPGetIterationNumber(data->ksp, &lits);
415:       d->eps->OP->lineariterations+= lits;
416: 
417:       /* Destroy the composed ks and D */
418:       VecDestroy(&kr_comp);
419:       VecDestroy(&D_comp);

421:     /* If GD */
422:     } else {
423:       for(j=0; j<s; j++) {
424:         d->improvex_precond(d, r_s+i+j, kr[j], D[i+j]);
425:       }
426:       dvd_improvex_apply_proj(d, &D[i], s, auxS);
427:     }
428:     /* Prevent that short vectors are discarded in the orthogonalization */
429:     if (i == 0 && d->eps->errest[d->nconv+r_s] > PETSC_MACHINE_EPSILON && d->eps->errest[d->nconv+r_s] < PETSC_MAX_REAL) {
430:       for(j=0; j<s; j++) {
431:         VecScale(D[j],1.0/d->eps->errest[d->nconv+r_s]);
432:       }
433:     }
434:   }
435:   *size_D = i;
436:   if (data->dynamic) data->lastTol = PetscMax(data->lastTol/2.0,PETSC_MACHINE_EPSILON*10.0);
437: 
438:   /* Callback old improveX */
439:   if (data->old_improveX) {
440:     d->improveX_data = data->old_improveX_data;
441:     data->old_improveX(d, PETSC_NULL, 0, 0, 0, PETSC_NULL);
442:     d->improveX_data = data;
443:   }

445:   return(0);
446: }

448: /* y <- theta[1]A*x - theta[0]*B*x
449:    auxV, two auxiliary vectors */
452: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmult(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV)
453: {
454:   PetscErrorCode  ierr;
455:   PetscInt        n,i;
456:   const Vec       *Bx;

459:   n = data->r_e - data->r_s;
460:   for(i=0; i<n; i++) {
461:     MatMult(data->d->A,x[i],y[i]);
462:   }

464:   for(i=0; i<n; i++) {
465: #if !defined(PETSC_USE_COMPLEX)
466:     if(data->d->eigi[data->r_s+i] != 0.0) {
467:       if (data->d->B) {
468:         MatMult(data->d->B,x[i],auxV[0]);
469:         MatMult(data->d->B,x[i+1],auxV[1]);
470:         Bx = auxV;
471:       } else {
472:         Bx = &x[i];
473:       }

475:       /* y_i   <- [ t_2i+1*A*x_i   - t_2i*Bx_i + ti_i*Bx_i+1;
476:          y_i+1      t_2i+1*A*x_i+1 - ti_i*Bx_i - t_2i*Bx_i+1  ] */
477:       VecAXPBYPCZ(y[i],-data->theta[2*i],data->thetai[i],data->theta[2*i+1],Bx[0],Bx[1]);
478:       VecAXPBYPCZ(y[i+1],-data->thetai[i],-data->theta[2*i],data->theta[2*i+1],Bx[0],Bx[1]);
479:       i++;
480:     } else
481: #endif
482:     {
483:       if (data->d->B) {
484:         MatMult(data->d->B,x[i],auxV[0]);
485:         Bx = auxV;
486:       } else {
487:         Bx = &x[i];
488:       }
489:       VecAXPBY(y[i],-data->theta[i*2],data->theta[i*2+1],Bx[0]);
490:     }
491:   }
492:   return(0);
493: }



497: /* y <- theta[1]'*A'*x - theta[0]'*B'*x
498:    auxV, two auxiliary vectors */
501: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmulttrans(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV)
502: {
503:   PetscErrorCode  ierr;
504:   PetscInt        n,i;
505:   const Vec       *Bx;

508:   n = data->r_e - data->r_s;
509:   for(i=0; i<n; i++) {
510:     MatMultTranspose(data->d->A,x[i],y[i]);
511:   }

513:   for(i=0; i<n; i++) {
514: #if !defined(PETSC_USE_COMPLEX)
515:     if(data->d->eigi[data->r_s+i] != 0.0) {
516:       if (data->d->B) {
517:         MatMultTranspose(data->d->B,x[i],auxV[0]);
518:         MatMultTranspose(data->d->B,x[i+1],auxV[1]);
519:         Bx = auxV;
520:       } else {
521:         Bx = &x[i];
522:       }

524:       /* y_i   <- [ t_2i+1*A*x_i   - t_2i*Bx_i - ti_i*Bx_i+1;
525:          y_i+1      t_2i+1*A*x_i+1 + ti_i*Bx_i - t_2i*Bx_i+1  ] */
526:       VecAXPBYPCZ(y[i],-data->theta[2*i],-data->thetai[i],data->theta[2*i+1],Bx[0],Bx[1]);
527:       VecAXPBYPCZ(y[i+1],data->thetai[i],-data->theta[2*i],data->theta[2*i+1],Bx[0],Bx[1]);
528:       i++;
529:     } else
530: #endif
531:     {
532:       if (data->d->B) {
533:         MatMultTranspose(data->d->B,x[i],auxV[0]);
534:         Bx = auxV;
535:       } else {
536:         Bx = &x[i];
537:       }
538:       VecAXPBY(y[i],PetscConj(-data->theta[i*2]),PetscConj(data->theta[i*2+1]),Bx[0]);
539:     }
540:   }
541:   return(0);
542: }


547: PetscErrorCode dvd_pcapplyba(PC pc,PCSide side,Vec in,Vec out,Vec w)
548: {
549:   PetscErrorCode  ierr;
550:   dvdImprovex_jd  *data;
551:   PetscInt        n,i;
552:   const Vec       *inx,*outx,*wx;
553:   Mat             A;


557:   PCGetOperators(pc,&A,PETSC_NULL,PETSC_NULL);
558:   MatShellGetContext(A,(void**)&data);
559:   VecCompGetSubVecs(in,PETSC_NULL,&inx);
560:   VecCompGetSubVecs(out,PETSC_NULL,&outx);
561:   VecCompGetSubVecs(w,PETSC_NULL,&wx);
562:   n = data->r_e - data->r_s;

564:   /* Check auxiliary vectors */
565:   if (&data->auxV[n] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");

567:   switch(side) {
568:   case PC_LEFT:
569:     /* aux <- theta[1]A*in - theta[0]*B*in */
570:     dvd_aux_matmult(data,inx,data->auxV,outx);
571: 
572:     /* out <- K * aux */
573:     for(i=0; i<n; i++) {
574:       data->d->improvex_precond(data->d,data->r_s+i,data->auxV[i],outx[i]);
575:     }
576:     break;
577:   case PC_RIGHT:
578:     /* aux <- K * in */
579:     for(i=0; i<n; i++) {
580:       data->d->improvex_precond(data->d,data->r_s+i,inx[i],data->auxV[i]);
581:     }
582: 
583:     /* out <- theta[1]A*auxV - theta[0]*B*auxV */
584:     dvd_aux_matmult(data,data->auxV,outx,wx);
585:     break;
586:   case PC_SYMMETRIC:
587:     /* aux <- K^{1/2} * in */
588:     for(i=0; i<n; i++) {
589:       PCApplySymmetricRight(data->old_pc,inx[i],data->auxV[i]);
590:     }
591: 
592:     /* out <- theta[1]A*auxV - theta[0]*B*auxV */
593:     dvd_aux_matmult(data,data->auxV,wx,outx);
594: 
595:     /* aux <- K^{1/2} * in */
596:     for(i=0; i<n; i++) {
597:       PCApplySymmetricLeft(data->old_pc,wx[i],outx[i]);
598:     }
599:     break;
600:   default:
601:     SETERRQ(PETSC_COMM_SELF,1, "Unsupported KSP side");
602:   }

604:   /* out <- out - v*(u'*out) */
605:   dvd_improvex_apply_proj(data->d,(Vec*)outx,n,data->auxS);

607:   return(0);
608: }


613: PetscErrorCode dvd_pcapply(PC pc,Vec in,Vec out)
614: {
615:   PetscErrorCode  ierr;
616:   dvdImprovex_jd  *data;
617:   PetscInt        n,i;
618:   const Vec       *inx, *outx;
619:   Mat             A;


623:   PCGetOperators(pc,&A,PETSC_NULL,PETSC_NULL);
624:   MatShellGetContext(A,(void**)&data);
625:   VecCompGetSubVecs(in,PETSC_NULL,&inx);
626:   VecCompGetSubVecs(out,PETSC_NULL,&outx);
627:   n = data->r_e - data->r_s;

629:   /* out <- K * in */
630:   for(i=0; i<n; i++) {
631:     data->d->improvex_precond(data->d,data->r_s+i,inx[i],outx[i]);
632:   }

634:   /* out <- out - v*(u'*out) */
635:   dvd_improvex_apply_proj(data->d,(Vec*)outx,n,data->auxS);

637:   return(0);
638: }

642: PetscErrorCode dvd_pcapplytrans(PC pc,Vec in,Vec out)
643: {
644:   PetscErrorCode  ierr;
645:   dvdImprovex_jd  *data;
646:   PetscInt        n,i;
647:   const Vec       *inx, *outx;
648:   Mat             A;


652:   PCGetOperators(pc,&A,PETSC_NULL,PETSC_NULL);
653:   MatShellGetContext(A,(void**)&data);
654:   VecCompGetSubVecs(in,PETSC_NULL,&inx);
655:   VecCompGetSubVecs(out,PETSC_NULL,&outx);
656:   n = data->r_e - data->r_s;

658:   /* Check auxiliary vectors */
659:   if (&data->auxV[n] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");

661:   /* auxV <- in */
662:   for(i=0; i<n; i++) {
663:     VecCopy(inx[i],data->auxV[i]);
664:   }
665: 
666:   /* auxV <- auxV - u*(v'*auxV) */
667:   dvd_improvex_applytrans_proj(data->d,data->auxV,n,data->auxS);

669:   /* out <- K' * aux */
670:   for(i=0; i<n; i++) {
671:     PCApplyTranspose(data->old_pc,data->auxV[i],outx[i]);
672:   }

674:   return(0);
675: }


680: PetscErrorCode dvd_matmult_jd(Mat A,Vec in,Vec out)
681: {
682:   PetscErrorCode  ierr;
683:   dvdImprovex_jd  *data;
684:   PetscInt        n;
685:   const Vec       *inx, *outx;
686:   PCSide          side;


690:   MatShellGetContext(A,(void**)&data);
691:   VecCompGetSubVecs(in,PETSC_NULL,&inx);
692:   VecCompGetSubVecs(out,PETSC_NULL,&outx);
693:   n = data->r_e - data->r_s;

695:   /* Check auxiliary vectors */
696:   if (&data->auxV[2] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");

698:   /* out <- theta[1]A*in - theta[0]*B*in */
699:   dvd_aux_matmult(data,inx,outx,data->auxV);

701:   KSPGetPCSide(data->ksp,&side);
702:   if (side == PC_RIGHT) {
703:     /* out <- out - v*(u'*out) */
704:     dvd_improvex_apply_proj(data->d,(Vec*)outx,n,data->auxS);
705:   }

707:   return(0);
708: }

712: PetscErrorCode dvd_matmulttrans_jd(Mat A,Vec in,Vec out)
713: {
714:   PetscErrorCode  ierr;
715:   dvdImprovex_jd  *data;
716:   PetscInt        n,i;
717:   const Vec       *inx,*outx,*r,*auxV;
718:   PCSide          side;


722:   MatShellGetContext(A,(void**)&data);
723:   VecCompGetSubVecs(in,PETSC_NULL,&inx);
724:   VecCompGetSubVecs(out,PETSC_NULL,&outx);
725:   n = data->r_e - data->r_s;

727:   /* Check auxiliary vectors */
728:   if (&data->auxV[n+2] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");

730:   KSPGetPCSide(data->ksp,&side);
731:   if (side == PC_RIGHT) {
732:     /* auxV <- in */
733:     for(i=0; i<n; i++) {
734:       VecCopy(inx[i],data->auxV[i]);
735:     }

737:     /* auxV <- auxV - v*(u'*auxV) */
738:     dvd_improvex_applytrans_proj(data->d,data->auxV,n,data->auxS);
739:     r = data->auxV;
740:     auxV = data->auxV+n;
741:   } else {
742:     r = inx;
743:     auxV = data->auxV;
744:   }

746:   /* out <- theta[1]A*r - theta[0]*B*r */
747:   dvd_aux_matmulttrans(data,r,outx,auxV);

749:   return(0);
750: }


755: PetscErrorCode dvd_matgetvecs_jd(Mat A, Vec *right, Vec *left)
756: {
757:   PetscErrorCode  ierr;
758:   Vec             *r, *l;
759:   dvdImprovex_jd  *data;
760:   PetscInt        n, i;


764:   MatShellGetContext(A, (void**)&data);
765:   n = data->ksp_max_size;
766:   if (right) {
767:     PetscMalloc(sizeof(Vec)*n, &r);
768:   }
769:   if (left) {
770:     PetscMalloc(sizeof(Vec)*n, &l);
771:   }
772:   for (i=0; i<n; i++) {
773:     MatGetVecs(data->d->A, right?&r[i]:PETSC_NULL,
774:                       left?&l[i]:PETSC_NULL);
775:   }
776:   if(right) {
777:     VecCreateCompWithVecs(r, n, data->friends, right);
778:     for (i=0; i<n; i++) {
779:       VecDestroy(&r[i]);
780:     }
781:   }
782:   if(left) {
783:     VecCreateCompWithVecs(l, n, data->friends, left);
784:     for (i=0; i<n; i++) {
785:       VecDestroy(&l[i]);
786:     }
787:   }

789:   if (right) {
790:     PetscFree(r);
791:   }
792:   if (left) {
793:     PetscFree(l);
794:   }

796:   return(0);
797: }


802: PetscErrorCode dvd_improvex_jd_proj_uv(dvdDashboard *d, dvdBlackboard *b,
803:                                        ProjType_t p)
804: {

807:   /* Setup the step */
808:   if (b->state >= DVD_STATE_CONF) {
809:     switch(p) {
810:     case DVD_PROJ_KXX:
811:       d->improvex_jd_proj_uv = dvd_improvex_jd_proj_uv_KXX; break;
812:     case DVD_PROJ_KZX:
813:       d->improvex_jd_proj_uv = dvd_improvex_jd_proj_uv_KZX; break;
814:     }
815:   }

817:   return(0);
818: }

822: /* 
823:   Compute: u <- X, v <- K*(theta[0]*A+theta[1]*B)*X,
824:   kr <- K^{-1}*(A-eig*B)*X, being X <- V*pX[i_s..i_e-1], Y <- W*pY[i_s..i_e-1]
825:   where
826:   auxV, 4*(i_e-i_s) auxiliar global vectors
827:   pX,pY, the right and left eigenvectors of the projected system
828:   ld, the leading dimension of pX and pY
829:   auxS: max(8*bs*max_cX_in_proj,size_V*size_V)
830: */
831: PetscErrorCode dvd_improvex_jd_proj_cuv(dvdDashboard *d, PetscInt i_s,
832:   PetscInt i_e, Vec **u, Vec **v, Vec *kr, Vec **auxV, PetscScalar **auxS,
833:   PetscScalar *theta, PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY,
834:   PetscInt ld)
835: {
836: #if defined(PETSC_MISSING_LAPACK_GETRF)
838:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable");
839: #else
840:   PetscErrorCode  ierr;
841:   PetscInt        n = i_e - i_s, size_KZ, V_new, rm, i, size_in;
842:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;
843:   PetscBLASInt    s, ldXKZ, info;
844:   DvdReduction    r;
845:   DvdReductionChunk
846:                   ops[2];
847:   DvdMult_copy_func
848:                   sr[2];


852:   /* Check consistency */
853:   V_new = d->size_cX - data->size_cX;
854:   if (V_new > data->old_size_X) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
855:   data->old_size_X = n;

857:   /* KZ <- KZ(rm:rm+max_cX-1) */
858:   rm = PetscMax(V_new+data->size_KZ-d->max_cX_in_impr, 0);
859:   for (i=0; i<d->max_cX_in_impr; i++) {
860:     VecCopy(data->KZ[i+rm], data->KZ[i]);
861:   }
862:   data->size_cX = d->size_cX;

864:   /* XKZ <- XKZ(rm:rm+max_cX-1,rm:rm+max_cX-1) */
865:   for (i=0; i<d->max_cX_in_impr; i++) {
866:     SlepcDenseCopy(&data->XKZ[i*data->ldXKZ+i], data->ldXKZ, &data->XKZ[(i+rm)*data->ldXKZ+i+rm], data->ldXKZ, data->size_KZ, 1);
867:   }
868:   data->size_KZ = PetscMin(d->max_cX_in_impr, data->size_KZ+V_new);

870:   /* Compute X, KZ and KR */
871:   *u = *auxV; *auxV+= n;
872:   *v = &data->KZ[data->size_KZ];
873:   d->improvex_jd_proj_uv(d, i_s, i_e, *u, *v, kr, *auxV, theta, thetai,
874:                                 pX, pY, ld);

876:   /* XKZ <- X'*KZ */
877:   size_KZ = data->size_KZ+n;
878:   size_in = 2*n*data->size_KZ+n*n;
879:   SlepcAllReduceSumBegin(ops,2,*auxS,*auxS+size_in,size_in,&r,((PetscObject)d->V[0])->comm);
880:   VecsMultS(data->XKZ,0,data->ldXKZ,d->V-data->size_KZ,0,data->size_KZ,data->KZ,data->size_KZ,size_KZ,&r,&sr[0]);
881:   VecsMultS(&data->XKZ[data->size_KZ],0,data->ldXKZ,*u,0,n,data->KZ,0,size_KZ,&r,&sr[1]);
882:   SlepcAllReduceSumEnd(&r);

884:   /* iXKZ <- inv(XKZ) */
885:   s = PetscBLASIntCast(size_KZ);
886:   data->ldiXKZ = data->size_iXKZ = size_KZ;
887:   data->iXKZ = *auxS; *auxS+= size_KZ*size_KZ;
888:   data->iXKZPivots = (PetscBLASInt*)*auxS;
889:   *auxS += FromIntToScalar(size_KZ);
890:   SlepcDenseCopy(data->iXKZ,data->ldiXKZ,data->XKZ,data->ldXKZ,size_KZ,size_KZ);
891:   ldXKZ = PetscBLASIntCast(data->ldiXKZ);
892:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
893:   LAPACKgetrf_(&s, &s, data->iXKZ, &ldXKZ, data->iXKZPivots, &info);
894:   PetscFPTrapPop();
895:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack XGETRF %d", info);

897:   return(0);
898: #endif
899: }

901: #define DVD_COMPLEX_RAYLEIGH_QUOTIENT(ur,ui,Axr,Axi,Bxr,Bxi,eigr,eigi,b,ierr)\
902: { \
903:   VecDot((Axr), (ur), &(b)[0]);  /* r*A*r */ \
904:   VecDot((Axr), (ui), &(b)[1]);  /* i*A*r */ \
905:   VecDot((Axi), (ur), &(b)[2]);  /* r*A*i */ \
906:   VecDot((Axi), (ui), &(b)[3]);  /* i*A*i */ \
907:   VecDot((Bxr), (ur), &(b)[4]);  /* r*B*r */ \
908:   VecDot((Bxr), (ui), &(b)[5]);  /* i*B*r */ \
909:   VecDot((Bxi), (ur), &(b)[6]);  /* r*B*i */ \
910:   VecDot((Bxi), (ui), &(b)[7]);  /* i*B*i */ \
911:   (b)[0]  = (b)[0]+(b)[3]; /* rAr+iAi */ \
912:   (b)[2] =  (b)[2]-(b)[1]; /* rAi-iAr */ \
913:   (b)[4] = (b)[4]+(b)[7]; /* rBr+iBi */ \
914:   (b)[6] = (b)[6]-(b)[5]; /* rBi-iBr */ \
915:   (b)[7] = (b)[4]*(b)[4] + (b)[6]*(b)[6]; /* k */ \
916:   *(eigr) = ((b)[0]*(b)[4] + (b)[2]*(b)[6]) / (b)[7]; /* eig_r */ \
917:   *(eigi) = ((b)[2]*(b)[4] - (b)[0]*(b)[6]) / (b)[7]; /* eig_i */ \
918: }

920: #if !defined(PETSC_USE_COMPLEX)
921: #define DVD_COMPUTE_N_RR(eps,i,i_s,n,eigr,eigi,u,Ax,Bx,b,ierr) \
922:   for((i)=0; (i)<(n); (i)++) { \
923:     if ((eigi)[(i_s)+(i)] != 0.0) { \
924:       /* eig_r = [(rAr+iAi)*(rBr+iBi) + (rAi-iAr)*(rBi-iBr)]/k \
925:          eig_i = [(rAi-iAr)*(rBr+iBi) - (rAr+iAi)*(rBi-iBr)]/k \
926:          k     =  (rBr+iBi)*(rBr+iBi) + (rBi-iBr)*(rBi-iBr)    */ \
927:       DVD_COMPLEX_RAYLEIGH_QUOTIENT((u)[(i)], (u)[(i)+1], (Ax)[(i)], \
928:         (Ax)[(i)+1], (Bx)[(i)], (Bx)[(i)+1], &(b)[8], &(b)[9], (b), (ierr)); \
929:       if (PetscAbsScalar((eigr)[(i_s)+(i)] - (b)[8])/ \
930:             PetscAbsScalar((eigr)[(i_s)+(i)]) > 1e-10    || \
931:           PetscAbsScalar((eigi)[(i_s)+(i)] - (b)[9])/ \
932:             PetscAbsScalar((eigi)[(i_s)+(i)]) > 1e-10         ) { \
933:         (ierr) = PetscInfo4((eps), "The eigenvalue %G+%G is far from its "\
934:                             "Rayleigh quotient value %G+%G\n", \
935:                             (eigr)[(i_s)+(i)], \
936:                             (eigi)[(i_s)+(i)], (b)[8], (b)[9]); \
937:       } \
938:       (i)++; \
939:     } \
940:   }
941: #else
942: #define DVD_COMPUTE_N_RR(eps,i,i_s,n,eigr,eigi,u,Ax,Bx,b,ierr) \
943:   for((i)=0; (i)<(n); (i)++) { \
944:       (ierr) = VecDot((Ax)[(i)], (u)[(i)], &(b)[0]);  \
945:       (ierr) = VecDot((Bx)[(i)], (u)[(i)], &(b)[1]);  \
946:       (b)[0] = (b)[0]/(b)[1]; \
947:       if (PetscAbsScalar((eigr)[(i_s)+(i)] - (b)[0])/ \
948:             PetscAbsScalar((eigr)[(i_s)+(i)]) > 1e-10     ) { \
949:         (ierr) = PetscInfo4((eps), "The eigenvalue %G+%G is far from its " \
950:                "Rayleigh quotient value %G+%G\n", \
951:                PetscRealPart((eigr)[(i_s)+(i)]), \
952:                PetscImaginaryPart((eigr)[(i_s)+(i)]), PetscRealPart((b)[0]), \
953:                PetscImaginaryPart((b)[0])); \
954:       } \
955:     }
956: #endif


961: /* 
962:   Compute: u <- X, v <- K*(theta[0]*A+theta[1]*B)*X,
963:   kr <- K^{-1}*(A-eig*B)*X, being X <- V*pX[i_s..i_e-1], Y <- W*pY[i_s..i_e-1]
964:   where
965:   auxV, 4*(i_e-i_s) auxiliar global vectors
966:   pX,pY, the right and left eigenvectors of the projected system
967:   ld, the leading dimension of pX and pY
968: */
969: PetscErrorCode dvd_improvex_jd_proj_uv_KZX(dvdDashboard *d, PetscInt i_s,
970:   PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
971:   PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld)
972: {
973:   PetscErrorCode  ierr;
974:   PetscInt        n = i_e - i_s, i;
975:   PetscScalar     b[16];
976:   Vec             *Ax, *Bx, *r=auxV, X[4];
977:   /* The memory manager doen't allow to call a subroutines */
978:   PetscScalar     Z[size_Z];


982:   /* u <- X(i) */
983:   dvd_improvex_compute_X(d,i_s,i_e,u,pX,ld);

985:   /* v <- theta[0]A*u + theta[1]*B*u */

987:   /* Bx <- B*X(i) */
988:   Bx = kr;
989:   if (d->BV) {
990:     SlepcUpdateVectorsZ(Bx, 0.0, 1.0, d->BV-d->cX_in_H, d->size_BV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);
991:   } else {
992:     for(i=0; i<n; i++) {
993:       if (d->B) {
994:         MatMult(d->B, u[i], Bx[i]);
995:       } else {
996:         VecCopy(u[i], Bx[i]);
997:       }
998:     }
999:   }

1001:   /* Ax <- A*X(i) */
1002:   Ax = r;
1003:   SlepcUpdateVectorsZ(Ax, 0.0, 1.0, d->AV-d->cX_in_H, d->size_AV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);

1005:   /* v <- Y(i) */
1006:   SlepcUpdateVectorsZ(v, 0.0, 1.0, (d->W?d->W:d->V)-d->cX_in_H, d->size_V+d->cX_in_H, &pY[ld*i_s], ld, d->size_H, n);

1008:   /* Recompute the eigenvalue */
1009:   DVD_COMPUTE_N_RR(d->eps, i, i_s, n, d->eigr, d->eigi, v, Ax, Bx, b, ierr);

1011:   for(i=0; i<n; i++) {
1012: #if !defined(PETSC_USE_COMPLEX)
1013:     if(d->eigi[i_s+i] != 0.0) {
1014:       /* [r_i r_i+1 kr_i kr_i+1]*= [ theta_2i'    0            1        0   
1015:                                        0         theta_2i'     0        1   
1016:                                      theta_2i+1 -thetai_i   -eigr_i -eigi_i 
1017:                                      thetai_i    theta_2i+1  eigi_i -eigr_i ] */
1018:       b[0] = b[5] = PetscConj(theta[2*i]);
1019:       b[2] = b[7] = -theta[2*i+1];
1020:       b[6] = -(b[3] = thetai[i]);
1021:       b[1] = b[4] = 0.0;
1022:       b[8] = b[13] = 1.0/d->nX[i_s+i];
1023:       b[10] = b[15] = -d->eigr[i_s+i]/d->nX[i_s+i];
1024:       b[14] = -(b[11] = d->eigi[i_s+i]/d->nX[i_s+i]);
1025:       b[9] = b[12] = 0.0;
1026:       X[0] = Ax[i]; X[1] = Ax[i+1]; X[2] = Bx[i]; X[3] = Bx[i+1];
1027:       SlepcUpdateVectorsD(X, 4, 1.0, b, 4, 4, 4, Z, size_Z);
1028: 
1029:       i++;
1030:     } else
1031: #endif
1032:     {
1033:       /* [Ax_i Bx_i]*= [ theta_2i'    1/nX_i
1034:                         theta_2i+1  -eig_i/nX_i ] */
1035:       b[0] = PetscConj(theta[i*2]);
1036:       b[1] = theta[i*2+1];
1037:       b[2] = 1.0/d->nX[i_s+i];
1038:       b[3] = -d->eigr[i_s+i]/d->nX[i_s+i];
1039:       X[0] = Ax[i]; X[1] = Bx[i];
1040:       SlepcUpdateVectorsD(X, 2, 1.0, b, 2, 2, 2, Z, size_Z);
1041: 
1042:     }
1043:   }
1044:   for (i=0; i<n; i++) d->nX[i_s+i] = 1.0;

1046:   /* v <- K^{-1} r = K^{-1}(theta_2i'*Ax + theta_2i+1*Bx) */
1047:   for(i=0; i<n; i++) {
1048:     d->improvex_precond(d, i_s+i, r[i], v[i]);
1049:   }

1051:   /* kr <- P*(Ax - eig_i*Bx) */
1052:   d->calcpairs_proj_res(d, i_s, i_e, kr);

1054:   return(0);
1055: }

1059: /* 
1060:   Compute: u <- K^{-1}*X, v <- X,
1061:   kr <- K^{-1}*(A-eig*B)*X, being X <- V*pX[i_s..i_e-1]
1062:   where
1063:   auxV, 4*(i_e-i_s) auxiliar global vectors
1064:   pX,pY, the right and left eigenvectors of the projected system
1065:   ld, the leading dimension of pX and pY
1066: */
1067: PetscErrorCode dvd_improvex_jd_proj_uv_KXX(dvdDashboard *d, PetscInt i_s,
1068:   PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
1069:   PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld)
1070: {
1071:   PetscErrorCode  ierr;
1072:   PetscInt        n = i_e - i_s, i;
1073:   PetscScalar     b[16];
1074:   Vec             *Ax, *Bx, *r = auxV, X[4];
1075:   /* The memory manager doen't allow to call a subroutines */
1076:   PetscScalar     Z[size_Z];


1080:   /* [v u] <- X(i) Y(i) */
1081:   dvd_improvex_compute_X(d,i_s,i_e,v,pX,ld);
1082:   SlepcUpdateVectorsZ(u, 0.0, 1.0, (d->W?d->W:d->V)-d->cX_in_H, d->size_V+d->cX_in_H, &pY[ld*i_s], ld, d->size_H, n);

1084:   /* Bx <- B*X(i) */
1085:   Bx = r;
1086:   if (d->BV) {
1087:     SlepcUpdateVectorsZ(Bx, 0.0, 1.0, d->BV-d->cX_in_H, d->size_BV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);
1088:   } else {
1089:     if (d->B) {
1090:       for(i=0; i<n; i++) {
1091:         MatMult(d->B, v[i], Bx[i]);
1092:       }
1093:     } else
1094:       Bx = v;
1095:   }

1097:   /* Ax <- A*X(i) */
1098:   Ax = kr;
1099:   SlepcUpdateVectorsZ(Ax, 0.0, 1.0, d->AV-d->cX_in_H, d->size_AV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);

1101:   /* Recompute the eigenvalue */
1102:   DVD_COMPUTE_N_RR(d->eps, i, i_s, n, d->eigr, d->eigi, u, Ax, Bx, b, ierr);

1104:   for(i=0; i<n; i++) {
1105:     if (d->eigi[i_s+i] == 0.0) {
1106:       /* kr <- Ax -eig*Bx */
1107:       VecAXPBY(kr[i], -d->eigr[i_s+i]/d->nX[i_s+i], 1.0/d->nX[i_s+i], Bx[i]);
1108:     } else {
1109:       /* [kr_i kr_i+1 r_i r_i+1]*= [   1        0 
1110:                                        0        1
1111:                                     -eigr_i -eigi_i
1112:                                      eigi_i -eigr_i] */
1113:       b[0] = b[5] = 1.0/d->nX[i_s+i];
1114:       b[2] = b[7] = -d->eigr[i_s+i]/d->nX[i_s+i];
1115:       b[6] = -(b[3] = d->eigi[i_s+i]/d->nX[i_s+i]);
1116:       b[1] = b[4] = 0.0;
1117:       X[0] = kr[i]; X[1] = kr[i+1]; X[2] = r[i]; X[3] = r[i+1];
1118:       SlepcUpdateVectorsD(X, 4, 1.0, b, 4, 4, 2, Z, size_Z);
1119: 
1120:       i++;
1121:     }
1122:   }
1123:   for (i=0; i<n; i++) d->nX[i_s+i] = 1.0;

1125:   /* kr <- P*kr */
1126:   d->calcpairs_proj_res(d, i_s, i_e, r);

1128:   /* u <- K^{-1} X(i) */
1129:   for(i=0; i<n; i++) {
1130:     d->improvex_precond(d, i_s+i, v[i], u[i]);
1131:   }

1133:   return(0);
1134: }


1139: PetscErrorCode dvd_improvex_jd_lit_const(dvdDashboard *d, dvdBlackboard *b,
1140:                                          PetscInt maxits, PetscReal tol,
1141:                                          PetscReal fix)
1142: {
1143:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;

1146:   /* Setup the step */
1147:   if (b->state >= DVD_STATE_CONF) {
1148:     data->maxits = maxits;
1149:     data->tol = tol;
1150:     data->fix = fix;
1151:     d->improvex_jd_lit = dvd_improvex_jd_lit_const_0;
1152:   }
1153:   return(0);
1154: }


1159: PetscErrorCode dvd_improvex_jd_lit_const_0(dvdDashboard *d, PetscInt i,
1160:   PetscScalar* theta, PetscScalar* thetai, PetscInt *maxits, PetscReal *tol)
1161: {
1162:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;
1163:   PetscReal       a;

1166:   a = SlepcAbsEigenvalue(d->eigr[i],d->eigi[i]);

1168:   if (d->nR[i]/a < data->fix) {
1169:     theta[0] = d->eigr[i];
1170:     theta[1] = 1.0;
1171: #if !defined(PETSC_USE_COMPLEX)
1172:     *thetai = d->eigi[i];
1173: #endif
1174:   } else {
1175:     theta[0] = d->target[0];
1176:     theta[1] = d->target[1];
1177: #if !defined(PETSC_USE_COMPLEX)
1178:     *thetai = 0.0;
1179: #endif
1180: }

1182: #if defined(PETSC_USE_COMPLEX)
1183:   if(thetai) *thetai = 0.0;
1184: #endif
1185:   *maxits = data->maxits;
1186:   *tol = data->tol;
1187:   return(0);
1188: }


1191: /**** Patterns implementation *************************************************/

1193: typedef PetscInt (*funcV0_t)(dvdDashboard*, PetscInt, PetscInt, Vec*);
1194: typedef PetscInt (*funcV1_t)(dvdDashboard*, PetscInt, PetscInt, Vec*,
1195:                              PetscScalar*, Vec);

1199: /* Compute D <- K^{-1} * funcV[r_s..r_e] */
1200: PetscErrorCode dvd_improvex_PfuncV(dvdDashboard *d, void *funcV, Vec *D,
1201:   PetscInt max_size_D, PetscInt r_s, PetscInt r_e, Vec *auxV,
1202:   PetscScalar *auxS)
1203: {
1204:   PetscErrorCode  ierr;
1205:   PetscInt        i;

1208:   if (max_size_D >= r_e-r_s+1) {
1209:     /* The optimized version needs one vector extra of D */
1210:     /* D(1:r.size) = R(r_s:r_e-1) */
1211:     if (auxS) ((funcV1_t)funcV)(d, r_s, r_e, D+1, auxS, auxV[0]);
1212:     else      ((funcV0_t)funcV)(d, r_s, r_e, D+1);

1214:     /* D = K^{-1} * R */
1215:     for (i=0; i<r_e-r_s; i++) {
1216:       d->improvex_precond(d, i+r_s, D[i+1], D[i]);
1217:     }
1218:   } else if (max_size_D == r_e-r_s) {
1219:     /* Non-optimized version */
1220:     /* auxV <- R[r_e-1] */
1221:     if (auxS) ((funcV1_t)funcV)(d, r_e-1, r_e, auxV, auxS, auxV[1]);
1222:     else      ((funcV0_t)funcV)(d, r_e-1, r_e, auxV);

1224:     /* D(1:r.size-1) = R(r_s:r_e-2) */
1225:     if (auxS) ((funcV1_t)funcV)(d, r_s, r_e-1, D+1, auxS, auxV[1]);
1226:     else      ((funcV0_t)funcV)(d, r_s, r_e-1, D+1);

1228:     /* D = K^{-1} * R */
1229:     for (i=0; i<r_e-r_s-1; i++) {
1230:       d->improvex_precond(d, i+r_s, D[i+1], D[i]);
1231:     }
1232:     d->improvex_precond(d, r_e-1, auxV[0], D[r_e-r_s-1]);
1233:   } else SETERRQ(PETSC_COMM_SELF,1, "Problem: r_e-r_s > max_size_D");
1234:   return(0);
1235: }


1240: /* Compute (I - KZ*iXKZ*X')*V where,
1241:    V, the vectors to apply the projector,
1242:    cV, the number of vectors in V,
1243:    auxS, auxiliar vector of size length 3*size_iXKZ*cV
1244: */
1245: PetscErrorCode dvd_improvex_apply_proj(dvdDashboard *d, Vec *V, PetscInt cV, PetscScalar *auxS)
1246: {
1247: #if defined(PETSC_MISSING_LAPACK_GETRS) 
1249:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routines are unavailable");
1250: #else
1251:   PetscErrorCode  ierr;
1252:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;
1253:   PetscInt        size_in = data->size_iXKZ*cV, i, ldh;
1254:   PetscScalar     *h, *in, *out;
1255:   PetscBLASInt    cV_, n, info, ld;
1256:   DvdReduction    r;
1257:   DvdReductionChunk
1258:                   ops[4];
1259:   DvdMult_copy_func
1260:                   sr[4];
1261: 
1263:   if (cV > 2) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");

1265:   /* h <- X'*V */
1266:   h = auxS; in = h+size_in; out = in+size_in; ldh = data->size_iXKZ;
1267:   SlepcAllReduceSumBegin(ops, 4, in, out, size_in, &r,
1268:                                 ((PetscObject)d->V[0])->comm);
1269:   for (i=0; i<cV; i++) {
1270:     VecsMultS(&h[i*ldh],0,ldh,d->V-data->size_KZ,0,data->size_KZ,V+i,0,1,&r,&sr[i*2]);
1271:     VecsMultS(&h[i*ldh+data->size_KZ],0,ldh,data->u,0,data->size_iXKZ-data->size_KZ,V+i,0,1,&r,&sr[i*2+1]);
1272:   }
1273:   SlepcAllReduceSumEnd(&r);

1275:   /* h <- iXKZ\h */
1276:   cV_ = PetscBLASIntCast(cV);
1277:   n = PetscBLASIntCast(data->size_iXKZ);
1278:   ld = PetscBLASIntCast(data->ldiXKZ);
1280:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1281:   LAPACKgetrs_("N", &n, &cV_, data->iXKZ, &ld, data->iXKZPivots, h, &n, &info);
1282:   PetscFPTrapPop();
1283:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack XGETRS %d", info);

1285:   /* V <- V - KZ*h */
1286:   for (i=0; i<cV; i++) {
1287:     SlepcUpdateVectorsZ(V+i,1.0,-1.0,data->KZ,data->size_iXKZ,&h[ldh*i],ldh,data->size_iXKZ,1);
1288:   }
1289:   return(0);
1290: #endif
1291: }

1295: /* Compute (I - X*iXKZ*KZ')*V where,
1296:    V, the vectors to apply the projector,
1297:    cV, the number of vectors in V,
1298:    auxS, auxiliar vector of size length 3*size_iXKZ*cV
1299: */
1300: PetscErrorCode dvd_improvex_applytrans_proj(dvdDashboard *d, Vec *V, PetscInt cV, PetscScalar *auxS)
1301: {
1302: #if defined(PETSC_MISSING_LAPACK_GETRS) 
1304:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routines are unavailable");
1305: #else
1306:   PetscErrorCode  ierr;
1307:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;
1308:   PetscInt        size_in = data->size_iXKZ*cV, i, ldh;
1309:   PetscScalar     *h, *in, *out;
1310:   PetscBLASInt    cV_, n, info, ld;
1311:   DvdReduction    r;
1312:   DvdReductionChunk
1313:                   ops[2];
1314:   DvdMult_copy_func
1315:                   sr[2];
1316: 
1318:   if (cV > 2) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");

1320:   /* h <- KZ'*V */
1321:   h = auxS; in = h+size_in; out = in+size_in; ldh = data->size_iXKZ;
1322:   SlepcAllReduceSumBegin(ops, 2, in, out, size_in, &r,
1323:                                 ((PetscObject)d->V[0])->comm);
1324:   for (i=0; i<cV; i++) {
1325:     VecsMultS(&h[i*ldh],0,ldh,data->KZ,0,data->size_KZ,V+i,0,1,&r,&sr[i]);
1326:   }
1327:   SlepcAllReduceSumEnd(&r);

1329:   /* h <- iXKZ\h */
1330:   cV_ = PetscBLASIntCast(cV);
1331:   n = PetscBLASIntCast(data->size_iXKZ);
1332:   ld = PetscBLASIntCast(data->ldiXKZ);
1334:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1335:   LAPACKgetrs_("C", &n, &cV_, data->iXKZ, &ld, data->iXKZPivots, h, &n, &info);
1336:   PetscFPTrapPop();
1337:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack XGETRS %d", info);

1339:   /* V <- V - X*h */
1340:   for (i=0; i<cV; i++) {
1341:     SlepcUpdateVectorsZ(V+i,1.0,-1.0,d->V-data->size_KZ,data->size_KZ,&h[ldh*i],ldh,data->size_KZ,1);
1342:     SlepcUpdateVectorsZ(V+i,1.0,-1.0,data->u,data->size_iXKZ-data->size_KZ,&h[ldh*i+data->size_KZ],ldh,data->size_iXKZ-data->size_KZ,1);
1343:   }
1344:   return(0);
1345: #endif
1346: }