Actual source code: dvd_improvex.c

  1: /*
  2:   SLEPc eigensolver: "davidson"

  4:   Step: improve the eigenvectors X

  6:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  7:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  8:    Copyright (c) 2002-2012, Universitat Politecnica de Valencia, Spain

 10:    This file is part of SLEPc.
 11:       
 12:    SLEPc is free software: you can redistribute it and/or modify it under  the
 13:    terms of version 3 of the GNU Lesser General Public License as published by
 14:    the Free Software Foundation.

 16:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY 
 17:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS 
 18:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for 
 19:    more details.

 21:    You  should have received a copy of the GNU Lesser General  Public  License
 22:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 23:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 24: */

 26:  #include davidson.h
 27: #include <slepcvec.h>
 28: #include <slepcblaslapack.h>

 30: PetscErrorCode dvd_improvex_PfuncV(dvdDashboard *d, void *funcV, Vec *D,
 31:   PetscInt max_size_D, PetscInt r_s, PetscInt r_e, Vec *auxV,
 32:   PetscScalar *auxS);
 33: PetscErrorCode dvd_pcapplyba(PC pc,PCSide side,Vec in,Vec out,Vec w);
 34: PetscErrorCode dvd_pcapply(PC pc,Vec in,Vec out);
 35: PetscErrorCode dvd_pcapplytrans(PC pc,Vec in,Vec out);
 36: PetscErrorCode dvd_matmult_jd(Mat A,Vec in,Vec out);
 37: PetscErrorCode dvd_matmulttrans_jd(Mat A,Vec in,Vec out);
 38: PetscErrorCode dvd_matgetvecs_jd(Mat A, Vec *right, Vec *left);
 39: PetscErrorCode dvd_improvex_jd_d(dvdDashboard *d);
 40: PetscErrorCode dvd_improvex_jd_start(dvdDashboard *d);
 41: PetscErrorCode dvd_improvex_jd_end(dvdDashboard *d);
 42: PetscErrorCode dvd_improvex_jd_gen(dvdDashboard *d, Vec *D,
 43:                                    PetscInt max_size_D, PetscInt r_s,
 44:                                    PetscInt r_e, PetscInt *size_D);
 45: PetscErrorCode dvd_improvex_jd_proj_cuv(dvdDashboard *d, PetscInt i_s,
 46:   PetscInt i_e, Vec **u, Vec **v, Vec *kr, Vec **auxV, PetscScalar **auxS,
 47:   PetscScalar *theta, PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY,
 48:   PetscInt ld);
 49: PetscErrorCode dvd_improvex_jd_proj_uv_KXX(dvdDashboard *d, PetscInt i_s,
 50:   PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
 51:   PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld);
 52: PetscErrorCode dvd_improvex_jd_proj_uv_KZX(dvdDashboard *d, PetscInt i_s,
 53:   PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
 54:   PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld);
 55: PetscErrorCode dvd_improvex_jd_lit_const_0(dvdDashboard *d, PetscInt i,
 56:   PetscScalar* theta, PetscScalar* thetai,
 57:   PetscInt *maxits, PetscReal *tol);
 58: PetscErrorCode dvd_improvex_apply_proj(dvdDashboard *d, Vec *V, PetscInt cV, PetscScalar *auxS);
 59: PetscErrorCode dvd_improvex_applytrans_proj(dvdDashboard *d, Vec *V, PetscInt cV, PetscScalar *auxS);

 61: #define size_Z (64*4)

 63: /**** JD update step (I - Kfg'/(g'Kf)) K(A - sB) (I - Kfg'/(g'Kf)) t = (I - Kfg'/(g'Kf))r  ****/

 65: typedef struct {
 66:   PetscInt size_X;
 67:   void
 68:     *old_improveX_data;   /* old improveX_data */
 69:   improveX_type
 70:     old_improveX;         /* old improveX */
 71:   KSP ksp;                /* correction equation solver */
 72:   Vec
 73:     friends,              /* reference vector for composite vectors */
 74:     *auxV;                /* auxiliar vectors */
 75:   PetscScalar *auxS,      /* auxiliar scalars */
 76:     *theta,
 77:     *thetai;              /* the shifts used in the correction eq. */
 78:   PetscInt maxits,        /* maximum number of iterations */
 79:     r_s, r_e,             /* the selected eigenpairs to improve */
 80:     ksp_max_size;         /* the ksp maximum subvectors size */
 81:   PetscReal tol,          /* the maximum solution tolerance */
 82:     lastTol,              /* last tol for dynamic stopping criterion */
 83:     fix;                  /* tolerance for using the approx. eigenvalue */
 84:   PetscBool
 85:     dynamic;              /* if the dynamic stopping criterion is applied */
 86:   dvdDashboard
 87:     *d;                   /* the currect dvdDashboard reference */
 88:   PC old_pc;              /* old pc in ksp */
 89:   Vec
 90:     *u,                   /* new X vectors */
 91:     *real_KZ,             /* original KZ */
 92:     *KZ;                  /* KZ vecs for the projector KZ*inv(X'*KZ)*X' */
 93:   PetscScalar
 94:    *XKZ,                  /* X'*KZ */
 95:    *iXKZ;                 /* inverse of XKZ */
 96:   PetscInt
 97:     ldXKZ,                /* leading dimension of XKZ */
 98:     size_iXKZ,            /* size of iXKZ */
 99:     ldiXKZ,               /* leading dimension of iXKZ */
100:     size_KZ,              /* size of converged KZ */
101:     size_real_KZ,         /* original size of KZ */
102:     size_cX,              /* last value of d->size_cX */
103:     old_size_X;           /* last number of improved vectors */
104:   PetscBLASInt
105:     *iXKZPivots;          /* array of pivots */
106: } dvdImprovex_jd;

108: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmult(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV);
109: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmulttrans(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV);

113: PetscErrorCode dvd_improvex_jd(dvdDashboard *d,dvdBlackboard *b,KSP ksp,PetscInt max_bs,PetscInt cX_impr,PetscBool dynamic)
114: {
115:   PetscErrorCode  ierr;
116:   dvdImprovex_jd  *data;
117:   PetscBool       useGD,her_probl,std_probl;
118:   PC              pc;
119:   PetscInt        size_P,s=1;

122:   std_probl = DVD_IS(d->sEP,DVD_EP_STD)?PETSC_TRUE:PETSC_FALSE;
123:   her_probl = DVD_IS(d->sEP,DVD_EP_HERMITIAN)?PETSC_TRUE:PETSC_FALSE;

125:   /* Setting configuration constrains */
126:   PetscObjectTypeCompare((PetscObject)ksp,KSPPREONLY,&useGD);

128:   /* If the arithmetic is real and the problem is not Hermitian, then
129:      the block size is incremented in one */
130: #if !defined(PETSC_USE_COMPLEX)
131:   if (!her_probl) {
132:     max_bs++;
133:     b->max_size_P = PetscMax(b->max_size_P,2);
134:     s = 2;
135:   } else
136: #endif
137:     b->max_size_P = PetscMax(b->max_size_P,1);
138:   b->max_size_X = PetscMax(b->max_size_X,max_bs);
139:   size_P = b->max_size_P+cX_impr;
140:   b->max_size_auxV = PetscMax(b->max_size_auxV,
141:      b->max_size_X*3+(useGD?0:2)+ /* u, kr, auxV(max_size_X+2?) */
142:      ((her_probl || !d->eps->trueres)?1:PetscMax(s*2,b->max_size_cX_proj+b->max_size_X))); /* testConv */
143: 
144:   b->own_scalars+= size_P*size_P; /* XKZ */
145:   b->max_size_auxS = PetscMax(b->max_size_auxS,
146:     b->max_size_X*3 + /* theta, thetai */
147:     size_P*size_P + /* iXKZ */
148:     FromIntToScalar(size_P) + /* iXkZPivots */
149:     PetscMax(PetscMax(
150:       3*b->max_size_proj*b->max_size_X, /* dvd_improvex_apply_proj */
151:       8*cX_impr*b->max_size_X), /* dvd_improvex_jd_proj_cuv_KZX */
152:       (her_probl || !d->eps->trueres)?0:b->max_nev*b->max_nev+PetscMax(b->max_nev*6,(b->max_nev+b->max_size_proj)*s+b->max_nev*(b->max_size_X+b->max_size_cX_proj)*(std_probl?2:4)+64))); /* preTestConv */
153:   b->own_vecs+= size_P; /* KZ */

155:   /* Setup the preconditioner */
156:   if (ksp) {
157:     KSPGetPC(ksp,&pc);
158:     dvd_static_precond_PC(d,b,pc);
159:   } else {
160:     dvd_static_precond_PC(d,b,0);
161:   }

163:   /* Setup the step */
164:   if (b->state >= DVD_STATE_CONF) {
165:     PetscMalloc(sizeof(dvdImprovex_jd),&data);
166:     data->dynamic = dynamic;
167:     data->size_real_KZ = size_P;
168:     data->real_KZ = b->free_vecs; b->free_vecs+= data->size_real_KZ;
169:     d->max_cX_in_impr = cX_impr;
170:     data->XKZ = b->free_scalars; b->free_scalars+= size_P*size_P;
171:     data->ldXKZ = size_P;
172:     data->size_X = b->max_size_X;
173:     data->old_improveX_data = d->improveX_data;
174:     d->improveX_data = data;
175:     data->old_improveX = d->improveX;
176:     data->ksp = useGD?PETSC_NULL:ksp;
177:     data->d = d;
178:     d->improveX = dvd_improvex_jd_gen;
179:     data->ksp_max_size = max_bs;

181:     DVD_FL_ADD(d->startList,dvd_improvex_jd_start);
182:     DVD_FL_ADD(d->endList,dvd_improvex_jd_end);
183:     DVD_FL_ADD(d->destroyList,dvd_improvex_jd_d);
184:   }

186:   return(0);
187: }


192: PetscErrorCode dvd_improvex_jd_start(dvdDashboard *d)
193: {
194:   PetscErrorCode  ierr;
195:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;
196:   PetscInt        rA, cA, rlA, clA;
197:   Mat             A;
198:   PetscBool       t;
199:   PC              pc;


203:   data->KZ = data->real_KZ;
204:   data->size_KZ = data->size_cX = data->old_size_X = 0;
205:   data->lastTol = data->dynamic?0.5:0.0;

207:   /* Setup the ksp */
208:   if(data->ksp) {
209:     /* Create the reference vector */
210:     VecCreateCompWithVecs(d->V, data->ksp_max_size, PETSC_NULL,
211:                                  &data->friends);

213:     /* Save the current pc and set a PCNONE */
214:     KSPGetPC(data->ksp, &data->old_pc);
215:     PetscObjectTypeCompare((PetscObject)data->old_pc, PCNONE, &t);
216: 
217:     data->lastTol = 0.5;
218:     if (t) {
219:       data->old_pc = 0;
220:     } else {
221:       PetscObjectReference((PetscObject)data->old_pc);
222:       PCCreate(((PetscObject)d->eps)->comm, &pc);
223:       PCSetType(pc, PCSHELL);
224:       PCSetOperators(pc, d->A, d->A, SAME_PRECONDITIONER);
225:       PCShellSetApply(pc,dvd_pcapply);
226:       PCShellSetApplyBA(pc,dvd_pcapplyba);
227:       PCShellSetApplyTranspose(pc,dvd_pcapplytrans);
228:       KSPSetPC(data->ksp, pc);
229:       PCDestroy(&pc);
230:     }

232:     /* Create the (I-v*u')*K*(A-s*B) matrix */
233:     MatGetSize(d->A, &rA, &cA);
234:     MatGetLocalSize(d->A, &rlA, &clA);
235:     MatCreateShell(((PetscObject)d->A)->comm, rlA*data->ksp_max_size,
236:                           clA*data->ksp_max_size, rA*data->ksp_max_size,
237:                           cA*data->ksp_max_size, data, &A);
238:     MatShellSetOperation(A, MATOP_MULT,
239:                                 (void(*)(void))dvd_matmult_jd);
240:     MatShellSetOperation(A, MATOP_MULT_TRANSPOSE,
241:                                 (void(*)(void))dvd_matmulttrans_jd);
242:     MatShellSetOperation(A, MATOP_GET_VECS,
243:                                 (void(*)(void))dvd_matgetvecs_jd);
244: 

246:     /* Try to avoid KSPReset */
247:     KSPGetOperatorsSet(data->ksp,&t,PETSC_NULL);
248:     if (t) {
249:       Mat M;
250:       PetscInt rM;
251:       KSPGetOperators(data->ksp,&M,PETSC_NULL,PETSC_NULL);
252:       MatGetSize(M,&rM,PETSC_NULL);
253:       if (rM != rA*data->ksp_max_size) { KSPReset(data->ksp); }
254:     }
255:     KSPSetOperators(data->ksp, A, A, SAME_PRECONDITIONER);
256: 
257:     KSPSetUp(data->ksp);
258:     MatDestroy(&A);
259:   } else {
260:     data->old_pc = 0;
261:     data->friends = PETSC_NULL;
262:   }
263: 
264:   return(0);
265: }


270: PetscErrorCode dvd_improvex_jd_end(dvdDashboard *d)
271: {
272:   PetscErrorCode  ierr;
273:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;


277:   if (data->friends) { VecDestroy(&data->friends);  }

279:   /* Restore the pc of ksp */
280:   if (data->old_pc) {
281:     KSPSetPC(data->ksp, data->old_pc);
282:     PCDestroy(&data->old_pc);
283:   }

285:   return(0);
286: }


291: PetscErrorCode dvd_improvex_jd_d(dvdDashboard *d)
292: {
293:   PetscErrorCode  ierr;
294:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;

297: 
298:   /* Restore changes in dvdDashboard */
299:   d->improveX_data = data->old_improveX_data;

301:   /* Free local data and objects */
302:   PetscFree(data);

304:   return(0);
305: }


310: PetscErrorCode dvd_improvex_jd_gen(dvdDashboard *d,Vec *D,PetscInt max_size_D,PetscInt r_s,PetscInt r_e,PetscInt *size_D)
311: {
312:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;
313:   PetscErrorCode  ierr;
314:   PetscInt        i,j,n,maxits,maxits0,lits,s,ld,k;
315:   PetscScalar     *pX,*pY,*auxS = d->auxS,*auxS0;
316:   PetscReal       tol,tol0;
317:   Vec             *u,*v,*kr,kr_comp,D_comp;
318:   PetscBool       odd_situation = PETSC_FALSE;


322:   /* Quick exit */
323:   if ((max_size_D == 0) || r_e-r_s <= 0) {
324:    *size_D = 0;
325:    /* Callback old improveX */
326:     if (data->old_improveX) {
327:       d->improveX_data = data->old_improveX_data;
328:       data->old_improveX(d, PETSC_NULL, 0, 0, 0, PETSC_NULL);
329:       d->improveX_data = data;
330:     }
331:     return(0);
332:   }
333: 
334:   n = PetscMin(PetscMin(data->size_X, max_size_D), r_e-r_s);
335:   if (n == 0) SETERRQ(PETSC_COMM_SELF,1, "n == 0");
336:   if (data->size_X < r_e-r_s) SETERRQ(PETSC_COMM_SELF,1, "size_X < r_e-r_s");

338:   DSGetLeadingDimension(d->ps,&ld);

340:   /* Restart lastTol if a new pair converged */
341:   if (data->dynamic && data->size_cX < d->size_cX)
342:     data->lastTol = 0.5;

344:   for(i=0, s=0, auxS0=auxS; i<n; i+=s) {
345:     /* If the selected eigenvalue is complex, but the arithmetic is real... */
346: #if !defined(PETSC_USE_COMPLEX)
347:     if (d->eigi[i] != 0.0) {
348:       if (i+2 <= max_size_D) s=2; else break;
349:     } else
350: #endif
351:       s=1;

353:     data->auxV = d->auxV;
354:     data->r_s = r_s+i; data->r_e = r_s+i+s;
355:     auxS = auxS0;
356:     data->theta = auxS; auxS+= 2*s;
357:     data->thetai = auxS; auxS+= s;
358:     kr = data->auxV; data->auxV+= s;

360:     /* Compute theta, maximum iterations and tolerance */
361:     maxits = 0; tol = 1;
362:     for(j=0; j<s; j++) {
363:       d->improvex_jd_lit(d, r_s+i+j, &data->theta[2*j],
364:                                 &data->thetai[j], &maxits0, &tol0);
365: 
366:       maxits+= maxits0; tol*= tol0;
367:     }
368:     maxits/= s; tol = data->dynamic?data->lastTol:exp(log(tol)/s);

370:     /* Compute u, v and kr */
371:     k = r_s+i+d->cX_in_H;
372:     DSVectors(d->ps,DS_MAT_X,&k,PETSC_NULL);
373:     DSNormalize(d->ps,DS_MAT_X,r_s+i+d->cX_in_H);
374:     k = r_s+i+d->cX_in_H;
375:     DSVectors(d->ps,DS_MAT_Y,&k,PETSC_NULL);
376:     DSNormalize(d->ps,DS_MAT_Y,r_s+i+d->cX_in_H);
377:     DSGetArray(d->ps,DS_MAT_X,&pX);
378:     DSGetArray(d->ps,DS_MAT_Y,&pY);
379:     dvd_improvex_jd_proj_cuv(d,r_s+i,r_s+i+s,&u,&v,kr,&data->auxV,&auxS,data->theta,data->thetai,pX,pY,ld);
380:     DSRestoreArray(d->ps,DS_MAT_X,&pX);
381:     DSRestoreArray(d->ps,DS_MAT_Y,&pY);
382:     data->u = u;

384:     /* Check if the first eigenpairs are converged */
385:     if (i == 0) {
386:       PetscInt n_auxV = data->auxV-d->auxV+s, n_auxS = auxS - d->auxS;
387:       d->auxV+= n_auxV; d->size_auxV-= n_auxV;
388:       d->auxS+= n_auxS; d->size_auxS-= n_auxS;
389:       d->preTestConv(d,0,s,s,d->auxV-s,PETSC_NULL,&d->npreconv);
390:       d->auxV-= n_auxV; d->size_auxV+= n_auxV;
391:       d->auxS-= n_auxS; d->size_auxS+= n_auxS;
392:       if (d->npreconv > 0) break;
393:     }

395:     /* Test the odd situation of solving Ax=b with A=I */
396: #if !defined(PETSC_USE_COMPLEX)
397:     odd_situation = (data->ksp && data->theta[0] == 1. && data->theta[1] == 0. && data->thetai[0] == 0. && d->B == PETSC_NULL)?PETSC_TRUE:PETSC_FALSE;
398: #else
399:     odd_situation = (data->ksp && data->theta[0] == 1. && data->theta[1] == 0. && d->B == PETSC_NULL)?PETSC_TRUE:PETSC_FALSE;
400: #endif
401:     /* If JD */
402:     if (data->ksp && !odd_situation) {
403:       data->auxS = auxS;

405:       /* kr <- -kr */
406:       for(j=0; j<s; j++) {
407:         VecScale(kr[j], -1.0);
408:       }

410:       /* Compouse kr and D */
411:       VecCreateCompWithVecs(kr, data->ksp_max_size, data->friends,
412:                                    &kr_comp);
413:       VecCreateCompWithVecs(&D[i], data->ksp_max_size, data->friends,
414:                                    &D_comp);
415:       VecCompSetSubVecs(data->friends,s,PETSC_NULL);
416: 
417:       /* Solve the correction equation */
418:       KSPSetTolerances(data->ksp, tol, PETSC_DEFAULT, PETSC_DEFAULT,
419:                               maxits);
420:       KSPSolve(data->ksp, kr_comp, D_comp);
421:       KSPGetIterationNumber(data->ksp, &lits);
422:       d->eps->OP->lineariterations+= lits;
423: 
424:       /* Destroy the composed ks and D */
425:       VecDestroy(&kr_comp);
426:       VecDestroy(&D_comp);

428:     /* If GD */
429:     } else {
430:       for(j=0; j<s; j++) {
431:         d->improvex_precond(d, r_s+i+j, kr[j], D[i+j]);
432:       }
433:       dvd_improvex_apply_proj(d, &D[i], s, auxS);
434:     }
435:     /* Prevent that short vectors are discarded in the orthogonalization */
436:     if (i == 0 && d->eps->errest[d->nconv+r_s] > PETSC_MACHINE_EPSILON && d->eps->errest[d->nconv+r_s] < PETSC_MAX_REAL) {
437:       for(j=0; j<s; j++) {
438:         VecScale(D[j],1.0/d->eps->errest[d->nconv+r_s]);
439:       }
440:     }
441:   }
442:   *size_D = i;
443:   if (data->dynamic) data->lastTol = PetscMax(data->lastTol/2.0,PETSC_MACHINE_EPSILON*10.0);
444: 
445:   /* Callback old improveX */
446:   if (data->old_improveX) {
447:     d->improveX_data = data->old_improveX_data;
448:     data->old_improveX(d, PETSC_NULL, 0, 0, 0, PETSC_NULL);
449:     d->improveX_data = data;
450:   }

452:   return(0);
453: }

455: /* y <- theta[1]A*x - theta[0]*B*x
456:    auxV, two auxiliary vectors */
459: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmult(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV)
460: {
461:   PetscErrorCode  ierr;
462:   PetscInt        n,i;
463:   const Vec       *Bx;

466:   n = data->r_e - data->r_s;
467:   for(i=0; i<n; i++) {
468:     MatMult(data->d->A,x[i],y[i]);
469:   }

471:   for(i=0; i<n; i++) {
472: #if !defined(PETSC_USE_COMPLEX)
473:     if(data->d->eigi[data->r_s+i] != 0.0) {
474:       if (data->d->B) {
475:         MatMult(data->d->B,x[i],auxV[0]);
476:         MatMult(data->d->B,x[i+1],auxV[1]);
477:         Bx = auxV;
478:       } else {
479:         Bx = &x[i];
480:       }

482:       /* y_i   <- [ t_2i+1*A*x_i   - t_2i*Bx_i + ti_i*Bx_i+1;
483:          y_i+1      t_2i+1*A*x_i+1 - ti_i*Bx_i - t_2i*Bx_i+1  ] */
484:       VecAXPBYPCZ(y[i],-data->theta[2*i],data->thetai[i],data->theta[2*i+1],Bx[0],Bx[1]);
485:       VecAXPBYPCZ(y[i+1],-data->thetai[i],-data->theta[2*i],data->theta[2*i+1],Bx[0],Bx[1]);
486:       i++;
487:     } else
488: #endif
489:     {
490:       if (data->d->B) {
491:         MatMult(data->d->B,x[i],auxV[0]);
492:         Bx = auxV;
493:       } else {
494:         Bx = &x[i];
495:       }
496:       VecAXPBY(y[i],-data->theta[i*2],data->theta[i*2+1],Bx[0]);
497:     }
498:   }
499:   return(0);
500: }



504: /* y <- theta[1]'*A'*x - theta[0]'*B'*x
505:    auxV, two auxiliary vectors */
508: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmulttrans(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV)
509: {
510:   PetscErrorCode  ierr;
511:   PetscInt        n,i;
512:   const Vec       *Bx;

515:   n = data->r_e - data->r_s;
516:   for(i=0; i<n; i++) {
517:     MatMultTranspose(data->d->A,x[i],y[i]);
518:   }

520:   for(i=0; i<n; i++) {
521: #if !defined(PETSC_USE_COMPLEX)
522:     if(data->d->eigi[data->r_s+i] != 0.0) {
523:       if (data->d->B) {
524:         MatMultTranspose(data->d->B,x[i],auxV[0]);
525:         MatMultTranspose(data->d->B,x[i+1],auxV[1]);
526:         Bx = auxV;
527:       } else {
528:         Bx = &x[i];
529:       }

531:       /* y_i   <- [ t_2i+1*A*x_i   - t_2i*Bx_i - ti_i*Bx_i+1;
532:          y_i+1      t_2i+1*A*x_i+1 + ti_i*Bx_i - t_2i*Bx_i+1  ] */
533:       VecAXPBYPCZ(y[i],-data->theta[2*i],-data->thetai[i],data->theta[2*i+1],Bx[0],Bx[1]);
534:       VecAXPBYPCZ(y[i+1],data->thetai[i],-data->theta[2*i],data->theta[2*i+1],Bx[0],Bx[1]);
535:       i++;
536:     } else
537: #endif
538:     {
539:       if (data->d->B) {
540:         MatMultTranspose(data->d->B,x[i],auxV[0]);
541:         Bx = auxV;
542:       } else {
543:         Bx = &x[i];
544:       }
545:       VecAXPBY(y[i],PetscConj(-data->theta[i*2]),PetscConj(data->theta[i*2+1]),Bx[0]);
546:     }
547:   }
548:   return(0);
549: }


554: PetscErrorCode dvd_pcapplyba(PC pc,PCSide side,Vec in,Vec out,Vec w)
555: {
556:   PetscErrorCode  ierr;
557:   dvdImprovex_jd  *data;
558:   PetscInt        n,i;
559:   const Vec       *inx,*outx,*wx;
560:   Mat             A;


564:   PCGetOperators(pc,&A,PETSC_NULL,PETSC_NULL);
565:   MatShellGetContext(A,(void**)&data);
566:   VecCompGetSubVecs(in,PETSC_NULL,&inx);
567:   VecCompGetSubVecs(out,PETSC_NULL,&outx);
568:   VecCompGetSubVecs(w,PETSC_NULL,&wx);
569:   n = data->r_e - data->r_s;

571:   /* Check auxiliary vectors */
572:   if (&data->auxV[n] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");

574:   switch(side) {
575:   case PC_LEFT:
576:     /* aux <- theta[1]A*in - theta[0]*B*in */
577:     dvd_aux_matmult(data,inx,data->auxV,outx);
578: 
579:     /* out <- K * aux */
580:     for(i=0; i<n; i++) {
581:       data->d->improvex_precond(data->d,data->r_s+i,data->auxV[i],outx[i]);
582:     }
583:     break;
584:   case PC_RIGHT:
585:     /* aux <- K * in */
586:     for(i=0; i<n; i++) {
587:       data->d->improvex_precond(data->d,data->r_s+i,inx[i],data->auxV[i]);
588:     }
589: 
590:     /* out <- theta[1]A*auxV - theta[0]*B*auxV */
591:     dvd_aux_matmult(data,data->auxV,outx,wx);
592:     break;
593:   case PC_SYMMETRIC:
594:     /* aux <- K^{1/2} * in */
595:     for(i=0; i<n; i++) {
596:       PCApplySymmetricRight(data->old_pc,inx[i],data->auxV[i]);
597:     }
598: 
599:     /* out <- theta[1]A*auxV - theta[0]*B*auxV */
600:     dvd_aux_matmult(data,data->auxV,wx,outx);
601: 
602:     /* aux <- K^{1/2} * in */
603:     for(i=0; i<n; i++) {
604:       PCApplySymmetricLeft(data->old_pc,wx[i],outx[i]);
605:     }
606:     break;
607:   default:
608:     SETERRQ(PETSC_COMM_SELF,1, "Unsupported KSP side");
609:   }

611:   /* out <- out - v*(u'*out) */
612:   dvd_improvex_apply_proj(data->d,(Vec*)outx,n,data->auxS);

614:   return(0);
615: }


620: PetscErrorCode dvd_pcapply(PC pc,Vec in,Vec out)
621: {
622:   PetscErrorCode  ierr;
623:   dvdImprovex_jd  *data;
624:   PetscInt        n,i;
625:   const Vec       *inx, *outx;
626:   Mat             A;


630:   PCGetOperators(pc,&A,PETSC_NULL,PETSC_NULL);
631:   MatShellGetContext(A,(void**)&data);
632:   VecCompGetSubVecs(in,PETSC_NULL,&inx);
633:   VecCompGetSubVecs(out,PETSC_NULL,&outx);
634:   n = data->r_e - data->r_s;

636:   /* out <- K * in */
637:   for(i=0; i<n; i++) {
638:     data->d->improvex_precond(data->d,data->r_s+i,inx[i],outx[i]);
639:   }

641:   /* out <- out - v*(u'*out) */
642:   dvd_improvex_apply_proj(data->d,(Vec*)outx,n,data->auxS);

644:   return(0);
645: }

649: PetscErrorCode dvd_pcapplytrans(PC pc,Vec in,Vec out)
650: {
651:   PetscErrorCode  ierr;
652:   dvdImprovex_jd  *data;
653:   PetscInt        n,i;
654:   const Vec       *inx, *outx;
655:   Mat             A;


659:   PCGetOperators(pc,&A,PETSC_NULL,PETSC_NULL);
660:   MatShellGetContext(A,(void**)&data);
661:   VecCompGetSubVecs(in,PETSC_NULL,&inx);
662:   VecCompGetSubVecs(out,PETSC_NULL,&outx);
663:   n = data->r_e - data->r_s;

665:   /* Check auxiliary vectors */
666:   if (&data->auxV[n] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");

668:   /* auxV <- in */
669:   for(i=0; i<n; i++) {
670:     VecCopy(inx[i],data->auxV[i]);
671:   }
672: 
673:   /* auxV <- auxV - u*(v'*auxV) */
674:   dvd_improvex_applytrans_proj(data->d,data->auxV,n,data->auxS);

676:   /* out <- K' * aux */
677:   for(i=0; i<n; i++) {
678:     PCApplyTranspose(data->old_pc,data->auxV[i],outx[i]);
679:   }

681:   return(0);
682: }


687: PetscErrorCode dvd_matmult_jd(Mat A,Vec in,Vec out)
688: {
689:   PetscErrorCode  ierr;
690:   dvdImprovex_jd  *data;
691:   PetscInt        n;
692:   const Vec       *inx, *outx;
693:   PCSide          side;


697:   MatShellGetContext(A,(void**)&data);
698:   VecCompGetSubVecs(in,PETSC_NULL,&inx);
699:   VecCompGetSubVecs(out,PETSC_NULL,&outx);
700:   n = data->r_e - data->r_s;

702:   /* Check auxiliary vectors */
703:   if (&data->auxV[2] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");

705:   /* out <- theta[1]A*in - theta[0]*B*in */
706:   dvd_aux_matmult(data,inx,outx,data->auxV);

708:   KSPGetPCSide(data->ksp,&side);
709:   if (side == PC_RIGHT) {
710:     /* out <- out - v*(u'*out) */
711:     dvd_improvex_apply_proj(data->d,(Vec*)outx,n,data->auxS);
712:   }

714:   return(0);
715: }

719: PetscErrorCode dvd_matmulttrans_jd(Mat A,Vec in,Vec out)
720: {
721:   PetscErrorCode  ierr;
722:   dvdImprovex_jd  *data;
723:   PetscInt        n,i;
724:   const Vec       *inx,*outx,*r,*auxV;
725:   PCSide          side;


729:   MatShellGetContext(A,(void**)&data);
730:   VecCompGetSubVecs(in,PETSC_NULL,&inx);
731:   VecCompGetSubVecs(out,PETSC_NULL,&outx);
732:   n = data->r_e - data->r_s;

734:   /* Check auxiliary vectors */
735:   if (&data->auxV[n+2] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");

737:   KSPGetPCSide(data->ksp,&side);
738:   if (side == PC_RIGHT) {
739:     /* auxV <- in */
740:     for(i=0; i<n; i++) {
741:       VecCopy(inx[i],data->auxV[i]);
742:     }

744:     /* auxV <- auxV - v*(u'*auxV) */
745:     dvd_improvex_applytrans_proj(data->d,data->auxV,n,data->auxS);
746:     r = data->auxV;
747:     auxV = data->auxV+n;
748:   } else {
749:     r = inx;
750:     auxV = data->auxV;
751:   }

753:   /* out <- theta[1]A*r - theta[0]*B*r */
754:   dvd_aux_matmulttrans(data,r,outx,auxV);

756:   return(0);
757: }


762: PetscErrorCode dvd_matgetvecs_jd(Mat A, Vec *right, Vec *left)
763: {
764:   PetscErrorCode  ierr;
765:   Vec             *r, *l;
766:   dvdImprovex_jd  *data;
767:   PetscInt        n, i;


771:   MatShellGetContext(A, (void**)&data);
772:   n = data->ksp_max_size;
773:   if (right) {
774:     PetscMalloc(sizeof(Vec)*n, &r);
775:   }
776:   if (left) {
777:     PetscMalloc(sizeof(Vec)*n, &l);
778:   }
779:   for (i=0; i<n; i++) {
780:     MatGetVecs(data->d->A, right?&r[i]:PETSC_NULL,
781:                       left?&l[i]:PETSC_NULL);
782:   }
783:   if(right) {
784:     VecCreateCompWithVecs(r, n, data->friends, right);
785:     for (i=0; i<n; i++) {
786:       VecDestroy(&r[i]);
787:     }
788:   }
789:   if(left) {
790:     VecCreateCompWithVecs(l, n, data->friends, left);
791:     for (i=0; i<n; i++) {
792:       VecDestroy(&l[i]);
793:     }
794:   }

796:   if (right) {
797:     PetscFree(r);
798:   }
799:   if (left) {
800:     PetscFree(l);
801:   }

803:   return(0);
804: }


809: PetscErrorCode dvd_improvex_jd_proj_uv(dvdDashboard *d, dvdBlackboard *b,
810:                                        ProjType_t p)
811: {

814:   /* Setup the step */
815:   if (b->state >= DVD_STATE_CONF) {
816:     switch(p) {
817:     case DVD_PROJ_KXX:
818:       d->improvex_jd_proj_uv = dvd_improvex_jd_proj_uv_KXX; break;
819:     case DVD_PROJ_KZX:
820:       d->improvex_jd_proj_uv = dvd_improvex_jd_proj_uv_KZX; break;
821:     }
822:   }

824:   return(0);
825: }

829: /* 
830:   Compute: u <- X, v <- K*(theta[0]*A+theta[1]*B)*X,
831:   kr <- K^{-1}*(A-eig*B)*X, being X <- V*pX[i_s..i_e-1], Y <- W*pY[i_s..i_e-1]
832:   where
833:   auxV, 4*(i_e-i_s) auxiliar global vectors
834:   pX,pY, the right and left eigenvectors of the projected system
835:   ld, the leading dimension of pX and pY
836:   auxS: max(8*bs*max_cX_in_proj,size_V*size_V)
837: */
838: PetscErrorCode dvd_improvex_jd_proj_cuv(dvdDashboard *d, PetscInt i_s,
839:   PetscInt i_e, Vec **u, Vec **v, Vec *kr, Vec **auxV, PetscScalar **auxS,
840:   PetscScalar *theta, PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY,
841:   PetscInt ld)
842: {
843: #if defined(PETSC_MISSING_LAPACK_GETRF)
845:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable");
846: #else
847:   PetscErrorCode  ierr;
848:   PetscInt        n = i_e - i_s, size_KZ, V_new, rm, i, size_in;
849:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;
850:   PetscBLASInt    s, ldXKZ, info;
851:   DvdReduction    r;
852:   DvdReductionChunk
853:                   ops[2];
854:   DvdMult_copy_func
855:                   sr[2];


859:   /* Check consistency */
860:   V_new = d->size_cX - data->size_cX;
861:   if (V_new > data->old_size_X) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
862:   data->old_size_X = n;

864:   /* KZ <- KZ(rm:rm+max_cX-1) */
865:   rm = PetscMax(V_new+data->size_KZ-d->max_cX_in_impr, 0);
866:   for (i=0; i<d->max_cX_in_impr; i++) {
867:     VecCopy(data->KZ[i+rm], data->KZ[i]);
868:   }
869:   data->size_cX = d->size_cX;

871:   /* XKZ <- XKZ(rm:rm+max_cX-1,rm:rm+max_cX-1) */
872:   for (i=0; i<d->max_cX_in_impr; i++) {
873:     SlepcDenseCopy(&data->XKZ[i*data->ldXKZ+i], data->ldXKZ, &data->XKZ[(i+rm)*data->ldXKZ+i+rm], data->ldXKZ, data->size_KZ, 1);
874:   }
875:   data->size_KZ = PetscMin(d->max_cX_in_impr, data->size_KZ+V_new);

877:   /* Compute X, KZ and KR */
878:   *u = *auxV; *auxV+= n;
879:   *v = &data->KZ[data->size_KZ];
880:   d->improvex_jd_proj_uv(d, i_s, i_e, *u, *v, kr, *auxV, theta, thetai,
881:                                 pX, pY, ld);

883:   /* XKZ <- X'*KZ */
884:   size_KZ = data->size_KZ+n;
885:   size_in = 2*n*data->size_KZ+n*n;
886:   SlepcAllReduceSumBegin(ops,2,*auxS,*auxS+size_in,size_in,&r,((PetscObject)d->V[0])->comm);
887:   VecsMultS(data->XKZ,0,data->ldXKZ,d->V-data->size_KZ,0,data->size_KZ,data->KZ,data->size_KZ,size_KZ,&r,&sr[0]);
888:   VecsMultS(&data->XKZ[data->size_KZ],0,data->ldXKZ,*u,0,n,data->KZ,0,size_KZ,&r,&sr[1]);
889:   SlepcAllReduceSumEnd(&r);

891:   /* iXKZ <- inv(XKZ) */
892:   s = PetscBLASIntCast(size_KZ);
893:   data->ldiXKZ = data->size_iXKZ = size_KZ;
894:   data->iXKZ = *auxS; *auxS+= size_KZ*size_KZ;
895:   data->iXKZPivots = (PetscBLASInt*)*auxS;
896:   *auxS += FromIntToScalar(size_KZ);
897:   SlepcDenseCopy(data->iXKZ,data->ldiXKZ,data->XKZ,data->ldXKZ,size_KZ,size_KZ);
898:   ldXKZ = PetscBLASIntCast(data->ldiXKZ);
899:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
900:   LAPACKgetrf_(&s, &s, data->iXKZ, &ldXKZ, data->iXKZPivots, &info);
901:   PetscFPTrapPop();
902:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack XGETRF %d", info);

904:   return(0);
905: #endif
906: }

908: #define DVD_COMPLEX_RAYLEIGH_QUOTIENT(ur,ui,Axr,Axi,Bxr,Bxi,eigr,eigi,b,ierr)\
909: { \
910:   VecDot((Axr), (ur), &(b)[0]);  /* r*A*r */ \
911:   VecDot((Axr), (ui), &(b)[1]);  /* i*A*r */ \
912:   VecDot((Axi), (ur), &(b)[2]);  /* r*A*i */ \
913:   VecDot((Axi), (ui), &(b)[3]);  /* i*A*i */ \
914:   VecDot((Bxr), (ur), &(b)[4]);  /* r*B*r */ \
915:   VecDot((Bxr), (ui), &(b)[5]);  /* i*B*r */ \
916:   VecDot((Bxi), (ur), &(b)[6]);  /* r*B*i */ \
917:   VecDot((Bxi), (ui), &(b)[7]);  /* i*B*i */ \
918:   (b)[0]  = (b)[0]+(b)[3]; /* rAr+iAi */ \
919:   (b)[2] =  (b)[2]-(b)[1]; /* rAi-iAr */ \
920:   (b)[4] = (b)[4]+(b)[7]; /* rBr+iBi */ \
921:   (b)[6] = (b)[6]-(b)[5]; /* rBi-iBr */ \
922:   (b)[7] = (b)[4]*(b)[4] + (b)[6]*(b)[6]; /* k */ \
923:   *(eigr) = ((b)[0]*(b)[4] + (b)[2]*(b)[6]) / (b)[7]; /* eig_r */ \
924:   *(eigi) = ((b)[2]*(b)[4] - (b)[0]*(b)[6]) / (b)[7]; /* eig_i */ \
925: }

927: #if !defined(PETSC_USE_COMPLEX)
928: #define DVD_COMPUTE_N_RR(eps,i,i_s,n,eigr,eigi,u,Ax,Bx,b,ierr) \
929:   for((i)=0; (i)<(n); (i)++) { \
930:     if ((eigi)[(i_s)+(i)] != 0.0) { \
931:       /* eig_r = [(rAr+iAi)*(rBr+iBi) + (rAi-iAr)*(rBi-iBr)]/k \
932:          eig_i = [(rAi-iAr)*(rBr+iBi) - (rAr+iAi)*(rBi-iBr)]/k \
933:          k     =  (rBr+iBi)*(rBr+iBi) + (rBi-iBr)*(rBi-iBr)    */ \
934:       DVD_COMPLEX_RAYLEIGH_QUOTIENT((u)[(i)], (u)[(i)+1], (Ax)[(i)], \
935:         (Ax)[(i)+1], (Bx)[(i)], (Bx)[(i)+1], &(b)[8], &(b)[9], (b), (ierr)); \
936:       if (PetscAbsScalar((eigr)[(i_s)+(i)] - (b)[8])/ \
937:             PetscAbsScalar((eigr)[(i_s)+(i)]) > 1e-10    || \
938:           PetscAbsScalar((eigi)[(i_s)+(i)] - (b)[9])/ \
939:             PetscAbsScalar((eigi)[(i_s)+(i)]) > 1e-10         ) { \
940:         (ierr) = PetscInfo4((eps), "The eigenvalue %G+%G is far from its "\
941:                             "Rayleigh quotient value %G+%G\n", \
942:                             (eigr)[(i_s)+(i)], \
943:                             (eigi)[(i_s)+(i)], (b)[8], (b)[9]); \
944:       } \
945:       (i)++; \
946:     } \
947:   }
948: #else
949: #define DVD_COMPUTE_N_RR(eps,i,i_s,n,eigr,eigi,u,Ax,Bx,b,ierr) \
950:   for((i)=0; (i)<(n); (i)++) { \
951:       (ierr) = VecDot((Ax)[(i)], (u)[(i)], &(b)[0]);  \
952:       (ierr) = VecDot((Bx)[(i)], (u)[(i)], &(b)[1]);  \
953:       (b)[0] = (b)[0]/(b)[1]; \
954:       if (PetscAbsScalar((eigr)[(i_s)+(i)] - (b)[0])/ \
955:             PetscAbsScalar((eigr)[(i_s)+(i)]) > 1e-10     ) { \
956:         (ierr) = PetscInfo4((eps), "The eigenvalue %G+%G is far from its " \
957:                "Rayleigh quotient value %G+%G\n", \
958:                PetscRealPart((eigr)[(i_s)+(i)]), \
959:                PetscImaginaryPart((eigr)[(i_s)+(i)]), PetscRealPart((b)[0]), \
960:                PetscImaginaryPart((b)[0])); \
961:       } \
962:     }
963: #endif


968: /* 
969:   Compute: u <- X, v <- K*(theta[0]*A+theta[1]*B)*X,
970:   kr <- K^{-1}*(A-eig*B)*X, being X <- V*pX[i_s..i_e-1], Y <- W*pY[i_s..i_e-1]
971:   where
972:   auxV, 4*(i_e-i_s) auxiliar global vectors
973:   pX,pY, the right and left eigenvectors of the projected system
974:   ld, the leading dimension of pX and pY
975: */
976: PetscErrorCode dvd_improvex_jd_proj_uv_KZX(dvdDashboard *d, PetscInt i_s,
977:   PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
978:   PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld)
979: {
980:   PetscErrorCode  ierr;
981:   PetscInt        n = i_e - i_s, i;
982:   PetscScalar     b[16];
983:   Vec             *Ax, *Bx, *r=auxV, X[4];
984:   /* The memory manager doen't allow to call a subroutines */
985:   PetscScalar     Z[size_Z];


989:   /* u <- X(i) */
990:   dvd_improvex_compute_X(d,i_s,i_e,u,pX,ld);

992:   /* v <- theta[0]A*u + theta[1]*B*u */

994:   /* Bx <- B*X(i) */
995:   Bx = kr;
996:   if (d->BV) {
997:     SlepcUpdateVectorsZ(Bx, 0.0, 1.0, d->BV-d->cX_in_H, d->size_BV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);
998:   } else {
999:     for(i=0; i<n; i++) {
1000:       if (d->B) {
1001:         MatMult(d->B, u[i], Bx[i]);
1002:       } else {
1003:         VecCopy(u[i], Bx[i]);
1004:       }
1005:     }
1006:   }

1008:   /* Ax <- A*X(i) */
1009:   Ax = r;
1010:   SlepcUpdateVectorsZ(Ax, 0.0, 1.0, d->AV-d->cX_in_H, d->size_AV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);

1012:   /* v <- Y(i) */
1013:   SlepcUpdateVectorsZ(v, 0.0, 1.0, (d->W?d->W:d->V)-d->cX_in_H, d->size_V+d->cX_in_H, &pY[ld*i_s], ld, d->size_H, n);

1015:   /* Recompute the eigenvalue */
1016:   DVD_COMPUTE_N_RR(d->eps, i, i_s, n, d->eigr, d->eigi, v, Ax, Bx, b, ierr);

1018:   for(i=0; i<n; i++) {
1019: #if !defined(PETSC_USE_COMPLEX)
1020:     if(d->eigi[i_s+i] != 0.0) {
1021:       /* [r_i r_i+1 kr_i kr_i+1]*= [ theta_2i'    0            1        0   
1022:                                        0         theta_2i'     0        1   
1023:                                      theta_2i+1 -thetai_i   -eigr_i -eigi_i 
1024:                                      thetai_i    theta_2i+1  eigi_i -eigr_i ] */
1025:       b[0] = b[5] = PetscConj(theta[2*i]);
1026:       b[2] = b[7] = -theta[2*i+1];
1027:       b[6] = -(b[3] = thetai[i]);
1028:       b[1] = b[4] = 0.0;
1029:       b[8] = b[13] = 1.0/d->nX[i_s+i];
1030:       b[10] = b[15] = -d->eigr[i_s+i]/d->nX[i_s+i];
1031:       b[14] = -(b[11] = d->eigi[i_s+i]/d->nX[i_s+i]);
1032:       b[9] = b[12] = 0.0;
1033:       X[0] = Ax[i]; X[1] = Ax[i+1]; X[2] = Bx[i]; X[3] = Bx[i+1];
1034:       SlepcUpdateVectorsD(X, 4, 1.0, b, 4, 4, 4, Z, size_Z);
1035: 
1036:       i++;
1037:     } else
1038: #endif
1039:     {
1040:       /* [Ax_i Bx_i]*= [ theta_2i'    1/nX_i
1041:                         theta_2i+1  -eig_i/nX_i ] */
1042:       b[0] = PetscConj(theta[i*2]);
1043:       b[1] = theta[i*2+1];
1044:       b[2] = 1.0/d->nX[i_s+i];
1045:       b[3] = -d->eigr[i_s+i]/d->nX[i_s+i];
1046:       X[0] = Ax[i]; X[1] = Bx[i];
1047:       SlepcUpdateVectorsD(X, 2, 1.0, b, 2, 2, 2, Z, size_Z);
1048: 
1049:     }
1050:   }
1051:   for (i=0; i<n; i++) d->nX[i_s+i] = 1.0;

1053:   /* v <- K^{-1} r = K^{-1}(theta_2i'*Ax + theta_2i+1*Bx) */
1054:   for(i=0; i<n; i++) {
1055:     d->improvex_precond(d, i_s+i, r[i], v[i]);
1056:   }

1058:   /* kr <- P*(Ax - eig_i*Bx) */
1059:   d->calcpairs_proj_res(d, i_s, i_e, kr);

1061:   return(0);
1062: }

1066: /* 
1067:   Compute: u <- K^{-1}*X, v <- X,
1068:   kr <- K^{-1}*(A-eig*B)*X, being X <- V*pX[i_s..i_e-1]
1069:   where
1070:   auxV, 4*(i_e-i_s) auxiliar global vectors
1071:   pX,pY, the right and left eigenvectors of the projected system
1072:   ld, the leading dimension of pX and pY
1073: */
1074: PetscErrorCode dvd_improvex_jd_proj_uv_KXX(dvdDashboard *d, PetscInt i_s,
1075:   PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
1076:   PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld)
1077: {
1078:   PetscErrorCode  ierr;
1079:   PetscInt        n = i_e - i_s, i;
1080:   PetscScalar     b[16];
1081:   Vec             *Ax, *Bx, *r = auxV, X[4];
1082:   /* The memory manager doen't allow to call a subroutines */
1083:   PetscScalar     Z[size_Z];


1087:   /* [v u] <- X(i) Y(i) */
1088:   dvd_improvex_compute_X(d,i_s,i_e,v,pX,ld);
1089:   SlepcUpdateVectorsZ(u, 0.0, 1.0, (d->W?d->W:d->V)-d->cX_in_H, d->size_V+d->cX_in_H, &pY[ld*i_s], ld, d->size_H, n);

1091:   /* Bx <- B*X(i) */
1092:   Bx = r;
1093:   if (d->BV) {
1094:     SlepcUpdateVectorsZ(Bx, 0.0, 1.0, d->BV-d->cX_in_H, d->size_BV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);
1095:   } else {
1096:     if (d->B) {
1097:       for(i=0; i<n; i++) {
1098:         MatMult(d->B, v[i], Bx[i]);
1099:       }
1100:     } else
1101:       Bx = v;
1102:   }

1104:   /* Ax <- A*X(i) */
1105:   Ax = kr;
1106:   SlepcUpdateVectorsZ(Ax, 0.0, 1.0, d->AV-d->cX_in_H, d->size_AV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);

1108:   /* Recompute the eigenvalue */
1109:   DVD_COMPUTE_N_RR(d->eps, i, i_s, n, d->eigr, d->eigi, u, Ax, Bx, b, ierr);

1111:   for(i=0; i<n; i++) {
1112:     if (d->eigi[i_s+i] == 0.0) {
1113:       /* kr <- Ax -eig*Bx */
1114:       VecAXPBY(kr[i], -d->eigr[i_s+i]/d->nX[i_s+i], 1.0/d->nX[i_s+i], Bx[i]);
1115:     } else {
1116:       /* [kr_i kr_i+1 r_i r_i+1]*= [   1        0 
1117:                                        0        1
1118:                                     -eigr_i -eigi_i
1119:                                      eigi_i -eigr_i] */
1120:       b[0] = b[5] = 1.0/d->nX[i_s+i];
1121:       b[2] = b[7] = -d->eigr[i_s+i]/d->nX[i_s+i];
1122:       b[6] = -(b[3] = d->eigi[i_s+i]/d->nX[i_s+i]);
1123:       b[1] = b[4] = 0.0;
1124:       X[0] = kr[i]; X[1] = kr[i+1]; X[2] = r[i]; X[3] = r[i+1];
1125:       SlepcUpdateVectorsD(X, 4, 1.0, b, 4, 4, 2, Z, size_Z);
1126: 
1127:       i++;
1128:     }
1129:   }
1130:   for (i=0; i<n; i++) d->nX[i_s+i] = 1.0;

1132:   /* kr <- P*kr */
1133:   d->calcpairs_proj_res(d, i_s, i_e, r);

1135:   /* u <- K^{-1} X(i) */
1136:   for(i=0; i<n; i++) {
1137:     d->improvex_precond(d, i_s+i, v[i], u[i]);
1138:   }

1140:   return(0);
1141: }


1146: PetscErrorCode dvd_improvex_jd_lit_const(dvdDashboard *d, dvdBlackboard *b,
1147:                                          PetscInt maxits, PetscReal tol,
1148:                                          PetscReal fix)
1149: {
1150:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;

1153:   /* Setup the step */
1154:   if (b->state >= DVD_STATE_CONF) {
1155:     data->maxits = maxits;
1156:     data->tol = tol;
1157:     data->fix = fix;
1158:     d->improvex_jd_lit = dvd_improvex_jd_lit_const_0;
1159:   }
1160:   return(0);
1161: }


1166: PetscErrorCode dvd_improvex_jd_lit_const_0(dvdDashboard *d, PetscInt i,
1167:   PetscScalar* theta, PetscScalar* thetai, PetscInt *maxits, PetscReal *tol)
1168: {
1169:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;
1170:   PetscReal       a;

1173:   a = SlepcAbsEigenvalue(d->eigr[i],d->eigi[i]);

1175:   if (d->nR[i]/a < data->fix) {
1176:     theta[0] = d->eigr[i];
1177:     theta[1] = 1.0;
1178: #if !defined(PETSC_USE_COMPLEX)
1179:     *thetai = d->eigi[i];
1180: #endif
1181:   } else {
1182:     theta[0] = d->target[0];
1183:     theta[1] = d->target[1];
1184: #if !defined(PETSC_USE_COMPLEX)
1185:     *thetai = 0.0;
1186: #endif
1187: }

1189: #if defined(PETSC_USE_COMPLEX)
1190:   if(thetai) *thetai = 0.0;
1191: #endif
1192:   *maxits = data->maxits;
1193:   *tol = data->tol;
1194:   return(0);
1195: }


1198: /**** Patterns implementation *************************************************/

1200: typedef PetscInt (*funcV0_t)(dvdDashboard*, PetscInt, PetscInt, Vec*);
1201: typedef PetscInt (*funcV1_t)(dvdDashboard*, PetscInt, PetscInt, Vec*,
1202:                              PetscScalar*, Vec);

1206: /* Compute D <- K^{-1} * funcV[r_s..r_e] */
1207: PetscErrorCode dvd_improvex_PfuncV(dvdDashboard *d, void *funcV, Vec *D,
1208:   PetscInt max_size_D, PetscInt r_s, PetscInt r_e, Vec *auxV,
1209:   PetscScalar *auxS)
1210: {
1211:   PetscErrorCode  ierr;
1212:   PetscInt        i;

1215:   if (max_size_D >= r_e-r_s+1) {
1216:     /* The optimized version needs one vector extra of D */
1217:     /* D(1:r.size) = R(r_s:r_e-1) */
1218:     if (auxS) ((funcV1_t)funcV)(d, r_s, r_e, D+1, auxS, auxV[0]);
1219:     else      ((funcV0_t)funcV)(d, r_s, r_e, D+1);

1221:     /* D = K^{-1} * R */
1222:     for (i=0; i<r_e-r_s; i++) {
1223:       d->improvex_precond(d, i+r_s, D[i+1], D[i]);
1224:     }
1225:   } else if (max_size_D == r_e-r_s) {
1226:     /* Non-optimized version */
1227:     /* auxV <- R[r_e-1] */
1228:     if (auxS) ((funcV1_t)funcV)(d, r_e-1, r_e, auxV, auxS, auxV[1]);
1229:     else      ((funcV0_t)funcV)(d, r_e-1, r_e, auxV);

1231:     /* D(1:r.size-1) = R(r_s:r_e-2) */
1232:     if (auxS) ((funcV1_t)funcV)(d, r_s, r_e-1, D+1, auxS, auxV[1]);
1233:     else      ((funcV0_t)funcV)(d, r_s, r_e-1, D+1);

1235:     /* D = K^{-1} * R */
1236:     for (i=0; i<r_e-r_s-1; i++) {
1237:       d->improvex_precond(d, i+r_s, D[i+1], D[i]);
1238:     }
1239:     d->improvex_precond(d, r_e-1, auxV[0], D[r_e-r_s-1]);
1240:   } else SETERRQ(PETSC_COMM_SELF,1, "Problem: r_e-r_s > max_size_D");
1241:   return(0);
1242: }


1247: /* Compute (I - KZ*iXKZ*X')*V where,
1248:    V, the vectors to apply the projector,
1249:    cV, the number of vectors in V,
1250:    auxS, auxiliar vector of size length 3*size_iXKZ*cV
1251: */
1252: PetscErrorCode dvd_improvex_apply_proj(dvdDashboard *d, Vec *V, PetscInt cV, PetscScalar *auxS)
1253: {
1254: #if defined(PETSC_MISSING_LAPACK_GETRS) 
1256:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routines are unavailable");
1257: #else
1258:   PetscErrorCode  ierr;
1259:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;
1260:   PetscInt        size_in = data->size_iXKZ*cV, i, ldh;
1261:   PetscScalar     *h, *in, *out;
1262:   PetscBLASInt    cV_, n, info, ld;
1263:   DvdReduction    r;
1264:   DvdReductionChunk
1265:                   ops[4];
1266:   DvdMult_copy_func
1267:                   sr[4];
1268: 
1270:   if (cV > 2) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");

1272:   /* h <- X'*V */
1273:   h = auxS; in = h+size_in; out = in+size_in; ldh = data->size_iXKZ;
1274:   SlepcAllReduceSumBegin(ops, 4, in, out, size_in, &r,
1275:                                 ((PetscObject)d->V[0])->comm);
1276:   for (i=0; i<cV; i++) {
1277:     VecsMultS(&h[i*ldh],0,ldh,d->V-data->size_KZ,0,data->size_KZ,V+i,0,1,&r,&sr[i*2]);
1278:     VecsMultS(&h[i*ldh+data->size_KZ],0,ldh,data->u,0,data->size_iXKZ-data->size_KZ,V+i,0,1,&r,&sr[i*2+1]);
1279:   }
1280:   SlepcAllReduceSumEnd(&r);

1282:   /* h <- iXKZ\h */
1283:   cV_ = PetscBLASIntCast(cV);
1284:   n = PetscBLASIntCast(data->size_iXKZ);
1285:   ld = PetscBLASIntCast(data->ldiXKZ);
1287:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1288:   LAPACKgetrs_("N", &n, &cV_, data->iXKZ, &ld, data->iXKZPivots, h, &n, &info);
1289:   PetscFPTrapPop();
1290:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack XGETRS %d", info);

1292:   /* V <- V - KZ*h */
1293:   for (i=0; i<cV; i++) {
1294:     SlepcUpdateVectorsZ(V+i,1.0,-1.0,data->KZ,data->size_iXKZ,&h[ldh*i],ldh,data->size_iXKZ,1);
1295:   }
1296:   return(0);
1297: #endif
1298: }

1302: /* Compute (I - X*iXKZ*KZ')*V where,
1303:    V, the vectors to apply the projector,
1304:    cV, the number of vectors in V,
1305:    auxS, auxiliar vector of size length 3*size_iXKZ*cV
1306: */
1307: PetscErrorCode dvd_improvex_applytrans_proj(dvdDashboard *d, Vec *V, PetscInt cV, PetscScalar *auxS)
1308: {
1309: #if defined(PETSC_MISSING_LAPACK_GETRS) 
1311:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routines are unavailable");
1312: #else
1313:   PetscErrorCode  ierr;
1314:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;
1315:   PetscInt        size_in = data->size_iXKZ*cV, i, ldh;
1316:   PetscScalar     *h, *in, *out;
1317:   PetscBLASInt    cV_, n, info, ld;
1318:   DvdReduction    r;
1319:   DvdReductionChunk
1320:                   ops[2];
1321:   DvdMult_copy_func
1322:                   sr[2];
1323: 
1325:   if (cV > 2) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");

1327:   /* h <- KZ'*V */
1328:   h = auxS; in = h+size_in; out = in+size_in; ldh = data->size_iXKZ;
1329:   SlepcAllReduceSumBegin(ops, 2, in, out, size_in, &r,
1330:                                 ((PetscObject)d->V[0])->comm);
1331:   for (i=0; i<cV; i++) {
1332:     VecsMultS(&h[i*ldh],0,ldh,data->KZ,0,data->size_KZ,V+i,0,1,&r,&sr[i]);
1333:   }
1334:   SlepcAllReduceSumEnd(&r);

1336:   /* h <- iXKZ\h */
1337:   cV_ = PetscBLASIntCast(cV);
1338:   n = PetscBLASIntCast(data->size_iXKZ);
1339:   ld = PetscBLASIntCast(data->ldiXKZ);
1341:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1342:   LAPACKgetrs_("C", &n, &cV_, data->iXKZ, &ld, data->iXKZPivots, h, &n, &info);
1343:   PetscFPTrapPop();
1344:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack XGETRS %d", info);

1346:   /* V <- V - X*h */
1347:   for (i=0; i<cV; i++) {
1348:     SlepcUpdateVectorsZ(V+i,1.0,-1.0,d->V-data->size_KZ,data->size_KZ,&h[ldh*i],ldh,data->size_KZ,1);
1349:     SlepcUpdateVectorsZ(V+i,1.0,-1.0,data->u,data->size_iXKZ-data->size_KZ,&h[ldh*i+data->size_KZ],ldh,data->size_iXKZ-data->size_KZ,1);
1350:   }
1351:   return(0);
1352: #endif
1353: }