Actual source code: dvd_improvex.c

  1: /*
  2:   SLEPc eigensolver: "davidson"

  4:   Step: improve the eigenvectors X

  6:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  7:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  8:    Copyright (c) 2002-2012, Universitat Politecnica de Valencia, Spain

 10:    This file is part of SLEPc.
 11:       
 12:    SLEPc is free software: you can redistribute it and/or modify it under  the
 13:    terms of version 3 of the GNU Lesser General Public License as published by
 14:    the Free Software Foundation.

 16:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY 
 17:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS 
 18:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for 
 19:    more details.

 21:    You  should have received a copy of the GNU Lesser General  Public  License
 22:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 23:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 24: */

 26:  #include davidson.h
 27: #include <slepcvec.h>
 28: #include <slepcblaslapack.h>

 30: PetscErrorCode dvd_improvex_PfuncV(dvdDashboard *d, void *funcV, Vec *D,
 31:   PetscInt max_size_D, PetscInt r_s, PetscInt r_e, Vec *auxV,
 32:   PetscScalar *auxS);
 33: PetscErrorCode dvd_pcapplyba(PC pc,PCSide side,Vec in,Vec out,Vec w);
 34: PetscErrorCode dvd_pcapply(PC pc,Vec in,Vec out);
 35: PetscErrorCode dvd_pcapplytrans(PC pc,Vec in,Vec out);
 36: PetscErrorCode dvd_matmult_jd(Mat A,Vec in,Vec out);
 37: PetscErrorCode dvd_matmulttrans_jd(Mat A,Vec in,Vec out);
 38: PetscErrorCode dvd_matgetvecs_jd(Mat A, Vec *right, Vec *left);
 39: PetscErrorCode dvd_improvex_jd_d(dvdDashboard *d);
 40: PetscErrorCode dvd_improvex_jd_start(dvdDashboard *d);
 41: PetscErrorCode dvd_improvex_jd_end(dvdDashboard *d);
 42: PetscErrorCode dvd_improvex_jd_gen(dvdDashboard *d, Vec *D,
 43:                                    PetscInt max_size_D, PetscInt r_s,
 44:                                    PetscInt r_e, PetscInt *size_D);
 45: PetscErrorCode dvd_improvex_jd_proj_cuv(dvdDashboard *d, PetscInt i_s,
 46:   PetscInt i_e, Vec **u, Vec **v, Vec *kr, Vec **auxV, PetscScalar **auxS,
 47:   PetscScalar *theta, PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY,
 48:   PetscInt ld);
 49: PetscErrorCode dvd_improvex_jd_proj_uv_KXX(dvdDashboard *d, PetscInt i_s,
 50:   PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
 51:   PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld);
 52: PetscErrorCode dvd_improvex_jd_proj_uv_KZX(dvdDashboard *d, PetscInt i_s,
 53:   PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
 54:   PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld);
 55: PetscErrorCode dvd_improvex_jd_lit_const_0(dvdDashboard *d, PetscInt i,
 56:   PetscScalar* theta, PetscScalar* thetai,
 57:   PetscInt *maxits, PetscReal *tol);
 58: PetscErrorCode dvd_improvex_apply_proj(dvdDashboard *d, Vec *V, PetscInt cV, PetscScalar *auxS);
 59: PetscErrorCode dvd_improvex_applytrans_proj(dvdDashboard *d, Vec *V, PetscInt cV, PetscScalar *auxS);

 61: #define size_Z (64*4)

 63: /**** JD update step (I - Kfg'/(g'Kf)) K(A - sB) (I - Kfg'/(g'Kf)) t = (I - Kfg'/(g'Kf))r  ****/

 65: typedef struct {
 66:   PetscInt size_X;
 67:   void
 68:     *old_improveX_data;   /* old improveX_data */
 69:   improveX_type
 70:     old_improveX;         /* old improveX */
 71:   KSP ksp;                /* correction equation solver */
 72:   Vec
 73:     friends,              /* reference vector for composite vectors */
 74:     *auxV;                /* auxiliar vectors */
 75:   PetscScalar *auxS,      /* auxiliar scalars */
 76:     *theta,
 77:     *thetai;              /* the shifts used in the correction eq. */
 78:   PetscInt maxits,        /* maximum number of iterations */
 79:     r_s, r_e,             /* the selected eigenpairs to improve */
 80:     ksp_max_size;         /* the ksp maximum subvectors size */
 81:   PetscReal tol,          /* the maximum solution tolerance */
 82:     lastTol,              /* last tol for dynamic stopping criterion */
 83:     fix;                  /* tolerance for using the approx. eigenvalue */
 84:   PetscBool
 85:     dynamic;              /* if the dynamic stopping criterion is applied */
 86:   dvdDashboard
 87:     *d;                   /* the currect dvdDashboard reference */
 88:   PC old_pc;              /* old pc in ksp */
 89:   Vec
 90:     *u,                   /* new X vectors */
 91:     *real_KZ,             /* original KZ */
 92:     *KZ;                  /* KZ vecs for the projector KZ*inv(X'*KZ)*X' */
 93:   PetscScalar
 94:    *XKZ,                  /* X'*KZ */
 95:    *iXKZ;                 /* inverse of XKZ */
 96:   PetscInt
 97:     ldXKZ,                /* leading dimension of XKZ */
 98:     size_iXKZ,            /* size of iXKZ */
 99:     ldiXKZ,               /* leading dimension of iXKZ */
100:     size_KZ,              /* size of converged KZ */
101:     size_real_KZ,         /* original size of KZ */
102:     size_cX,              /* last value of d->size_cX */
103:     old_size_X;           /* last number of improved vectors */
104:   PetscBLASInt
105:     *iXKZPivots;          /* array of pivots */
106: } dvdImprovex_jd;

108: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmult(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV);
109: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmulttrans(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV);

113: PetscErrorCode dvd_improvex_jd(dvdDashboard *d,dvdBlackboard *b,KSP ksp,PetscInt max_bs,PetscInt cX_impr,PetscBool dynamic)
114: {
115:   PetscErrorCode  ierr;
116:   dvdImprovex_jd  *data;
117:   PetscBool       useGD,her_probl,std_probl;
118:   PC              pc;
119:   PetscInt        size_P,s=1;

122:   std_probl = DVD_IS(d->sEP,DVD_EP_STD)?PETSC_TRUE:PETSC_FALSE;
123:   her_probl = DVD_IS(d->sEP,DVD_EP_HERMITIAN)?PETSC_TRUE:PETSC_FALSE;

125:   /* Setting configuration constrains */
126:   PetscObjectTypeCompare((PetscObject)ksp,KSPPREONLY,&useGD);

128:   /* If the arithmetic is real and the problem is not Hermitian, then
129:      the block size is incremented in one */
130: #if !defined(PETSC_USE_COMPLEX)
131:   if (!her_probl) {
132:     max_bs++;
133:     b->max_size_P = PetscMax(b->max_size_P,2);
134:     s = 2;
135:   } else
136: #endif
137:     b->max_size_P = PetscMax(b->max_size_P,1);
138:   b->max_size_X = PetscMax(b->max_size_X,max_bs);
139:   size_P = b->max_size_P+cX_impr;
140:   b->max_size_auxV = PetscMax(b->max_size_auxV,
141:      b->max_size_X*3+(useGD?0:2)+ /* u, kr, auxV(max_size_X+2?) */
142:      ((her_probl || !d->eps->trueres)?1:PetscMax(s*2,b->max_size_cX_proj+b->max_size_X))); /* testConv */
143: 
144:   b->own_scalars+= size_P*size_P; /* XKZ */
145:   b->max_size_auxS = PetscMax(b->max_size_auxS,
146:     b->max_size_X*3 + /* theta, thetai */
147:     size_P*size_P + /* iXKZ */
148:     FromIntToScalar(size_P) + /* iXkZPivots */
149:     PetscMax(PetscMax(
150:       3*b->max_size_proj*b->max_size_X, /* dvd_improvex_apply_proj */
151:       8*cX_impr*b->max_size_X), /* dvd_improvex_jd_proj_cuv_KZX */
152:       (her_probl || !d->eps->trueres)?0:b->max_nev*b->max_nev+PetscMax(b->max_nev*6,(b->max_nev+b->max_size_proj)*s+b->max_nev*(b->max_size_X+b->max_size_cX_proj)*(std_probl?2:4)+64))); /* preTestConv */
153:   b->own_vecs+= size_P; /* KZ */

155:   /* Setup the preconditioner */
156:   if (ksp) {
157:     KSPGetPC(ksp,&pc);
158:     dvd_static_precond_PC(d,b,pc);
159:   } else {
160:     dvd_static_precond_PC(d,b,0);
161:   }

163:   /* Setup the step */
164:   if (b->state >= DVD_STATE_CONF) {
165:     PetscMalloc(sizeof(dvdImprovex_jd),&data);
166:     data->dynamic = dynamic;
167:     data->size_real_KZ = size_P;
168:     data->real_KZ = b->free_vecs; b->free_vecs+= data->size_real_KZ;
169:     d->max_cX_in_impr = cX_impr;
170:     data->XKZ = b->free_scalars; b->free_scalars+= size_P*size_P;
171:     data->ldXKZ = size_P;
172:     data->size_X = b->max_size_X;
173:     data->old_improveX_data = d->improveX_data;
174:     d->improveX_data = data;
175:     data->old_improveX = d->improveX;
176:     data->ksp = useGD?PETSC_NULL:ksp;
177:     data->d = d;
178:     d->improveX = dvd_improvex_jd_gen;
179:     data->ksp_max_size = max_bs;

181:     DVD_FL_ADD(d->startList,dvd_improvex_jd_start);
182:     DVD_FL_ADD(d->endList,dvd_improvex_jd_end);
183:     DVD_FL_ADD(d->destroyList,dvd_improvex_jd_d);
184:   }

186:   return(0);
187: }


192: PetscErrorCode dvd_improvex_jd_start(dvdDashboard *d)
193: {
194:   PetscErrorCode  ierr;
195:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;
196:   PetscInt        rA, cA, rlA, clA;
197:   Mat             A;
198:   PetscBool       t;
199:   PC              pc;


203:   data->KZ = data->real_KZ;
204:   data->size_KZ = data->size_cX = data->old_size_X = 0;
205:   data->lastTol = data->dynamic?0.5:0.0;

207:   /* Setup the ksp */
208:   if(data->ksp) {
209:     /* Create the reference vector */
210:     VecCreateCompWithVecs(d->V, data->ksp_max_size, PETSC_NULL,
211:                                  &data->friends);

213:     /* Save the current pc and set a PCNONE */
214:     KSPGetPC(data->ksp, &data->old_pc);
215:     PetscObjectTypeCompare((PetscObject)data->old_pc, PCNONE, &t);
216: 
217:     data->lastTol = 0.5;
218:     if (t) {
219:       data->old_pc = 0;
220:     } else {
221:       PetscObjectReference((PetscObject)data->old_pc);
222:       PCCreate(((PetscObject)d->eps)->comm, &pc);
223:       PCSetType(pc, PCSHELL);
224:       PCSetOperators(pc, d->A, d->A, SAME_PRECONDITIONER);
225:       PCShellSetApply(pc,dvd_pcapply);
226:       PCShellSetApplyBA(pc,dvd_pcapplyba);
227:       PCShellSetApplyTranspose(pc,dvd_pcapplytrans);
228:       KSPSetPC(data->ksp, pc);
229:       PCDestroy(&pc);
230:     }

232:     /* Create the (I-v*u')*K*(A-s*B) matrix */
233:     MatGetSize(d->A, &rA, &cA);
234:     MatGetLocalSize(d->A, &rlA, &clA);
235:     MatCreateShell(((PetscObject)d->A)->comm, rlA*data->ksp_max_size,
236:                           clA*data->ksp_max_size, rA*data->ksp_max_size,
237:                           cA*data->ksp_max_size, data, &A);
238:     MatShellSetOperation(A, MATOP_MULT,
239:                                 (void(*)(void))dvd_matmult_jd);
240:     MatShellSetOperation(A, MATOP_MULT_TRANSPOSE,
241:                                 (void(*)(void))dvd_matmulttrans_jd);
242:     MatShellSetOperation(A, MATOP_GET_VECS,
243:                                 (void(*)(void))dvd_matgetvecs_jd);
244: 

246:     /* Try to avoid KSPReset */
247:     KSPGetOperatorsSet(data->ksp,&t,PETSC_NULL);
248:     if (t) {
249:       Mat M;
250:       PetscInt rM;
251:       KSPGetOperators(data->ksp,&M,PETSC_NULL,PETSC_NULL);
252:       MatGetSize(M,&rM,PETSC_NULL);
253:       if (rM != rA*data->ksp_max_size) { KSPReset(data->ksp); }
254:     }
255:     KSPSetOperators(data->ksp, A, A, SAME_PRECONDITIONER);
256: 
257:     KSPSetUp(data->ksp);
258:     MatDestroy(&A);
259:   } else {
260:     data->old_pc = 0;
261:     data->friends = PETSC_NULL;
262:   }
263: 
264:   return(0);
265: }


270: PetscErrorCode dvd_improvex_jd_end(dvdDashboard *d)
271: {
272:   PetscErrorCode  ierr;
273:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;


277:   if (data->friends) { VecDestroy(&data->friends);  }

279:   /* Restore the pc of ksp */
280:   if (data->old_pc) {
281:     KSPSetPC(data->ksp, data->old_pc);
282:     PCDestroy(&data->old_pc);
283:   }

285:   return(0);
286: }


291: PetscErrorCode dvd_improvex_jd_d(dvdDashboard *d)
292: {
293:   PetscErrorCode  ierr;
294:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;

297: 
298:   /* Restore changes in dvdDashboard */
299:   d->improveX_data = data->old_improveX_data;

301:   /* Free local data and objects */
302:   PetscFree(data);

304:   return(0);
305: }


310: PetscErrorCode dvd_improvex_jd_gen(dvdDashboard *d,Vec *D,PetscInt max_size_D,PetscInt r_s,PetscInt r_e,PetscInt *size_D)
311: {
312:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;
313:   PetscErrorCode  ierr;
314:   PetscInt        i,j,n,maxits,maxits0,lits,s,ld,k;
315:   PetscScalar     *pX,*pY,*auxS = d->auxS,*auxS0;
316:   PetscReal       tol,tol0;
317:   Vec             *u,*v,*kr,kr_comp,D_comp;


321:   /* Quick exit */
322:   if ((max_size_D == 0) || r_e-r_s <= 0) {
323:    /* Callback old improveX */
324:     if (data->old_improveX) {
325:       d->improveX_data = data->old_improveX_data;
326:       data->old_improveX(d, PETSC_NULL, 0, 0, 0, PETSC_NULL);
327:       d->improveX_data = data;
328:     }
329:     return(0);
330:   }
331: 
332:   n = PetscMin(PetscMin(data->size_X, max_size_D), r_e-r_s);
333:   if (n == 0) SETERRQ(PETSC_COMM_SELF,1, "n == 0");
334:   if (data->size_X < r_e-r_s) SETERRQ(PETSC_COMM_SELF,1, "size_X < r_e-r_s");

336:   DSGetLeadingDimension(d->ps,&ld);

338:   /* Restart lastTol if a new pair converged */
339:   if (data->dynamic && data->size_cX < d->size_cX)
340:     data->lastTol = 0.5;

342:   for(i=0, s=0, auxS0=auxS; i<n; i+=s) {
343:     /* If the selected eigenvalue is complex, but the arithmetic is real... */
344: #if !defined(PETSC_USE_COMPLEX)
345:     if (d->eigi[i] != 0.0) {
346:       if (i+2 <= max_size_D) s=2; else break;
347:     } else
348: #endif
349:       s=1;

351:     data->auxV = d->auxV;
352:     data->r_s = r_s+i; data->r_e = r_s+i+s;
353:     auxS = auxS0;
354:     data->theta = auxS; auxS+= 2*s;
355:     data->thetai = auxS; auxS+= s;
356:     kr = data->auxV; data->auxV+= s;

358:     /* Compute theta, maximum iterations and tolerance */
359:     maxits = 0; tol = 1;
360:     for(j=0; j<s; j++) {
361:       d->improvex_jd_lit(d, r_s+i+j, &data->theta[2*j],
362:                                 &data->thetai[j], &maxits0, &tol0);
363: 
364:       maxits+= maxits0; tol*= tol0;
365:     }
366:     maxits/= s; tol = data->dynamic?data->lastTol:exp(log(tol)/s);

368:     /* Compute u, v and kr */
369:     k = r_s+i+d->cX_in_H;
370:     DSVectors(d->ps,DS_MAT_X,&k,PETSC_NULL);
371:     DSNormalize(d->ps,DS_MAT_X,r_s+i+d->cX_in_H);
372:     k = r_s+i+d->cX_in_H;
373:     DSVectors(d->ps,DS_MAT_Y,&k,PETSC_NULL);
374:     DSNormalize(d->ps,DS_MAT_Y,r_s+i+d->cX_in_H);
375:     DSGetArray(d->ps,DS_MAT_X,&pX);
376:     DSGetArray(d->ps,DS_MAT_Y,&pY);
377:     dvd_improvex_jd_proj_cuv(d,r_s+i,r_s+i+s,&u,&v,kr,&data->auxV,&auxS,data->theta,data->thetai,pX,pY,ld);
378:     DSRestoreArray(d->ps,DS_MAT_X,&pX);
379:     DSRestoreArray(d->ps,DS_MAT_Y,&pY);
380:     data->u = u;

382:     /* Check if the first eigenpairs are converged */
383:     if (i == 0) {
384:       PetscInt n_auxV = data->auxV-d->auxV+s, n_auxS = auxS - d->auxS;
385:       d->auxV+= n_auxV; d->size_auxV-= n_auxV;
386:       d->auxS+= n_auxS; d->size_auxS-= n_auxS;
387:       d->preTestConv(d,0,s,s,d->auxV-s,PETSC_NULL,&d->npreconv);
388:       d->auxV-= n_auxV; d->size_auxV+= n_auxV;
389:       d->auxS-= n_auxS; d->size_auxS+= n_auxS;
390:       if (d->npreconv > 0) break;
391:     }

393:     /* If JD */
394:     if (data->ksp) {
395:       data->auxS = auxS;

397:       /* kr <- -kr */
398:       for(j=0; j<s; j++) {
399:         VecScale(kr[j], -1.0);
400:       }

402:       /* Compouse kr and D */
403:       VecCreateCompWithVecs(kr, data->ksp_max_size, data->friends,
404:                                    &kr_comp);
405:       VecCreateCompWithVecs(&D[i], data->ksp_max_size, data->friends,
406:                                    &D_comp);
407:       VecCompSetSubVecs(data->friends,s,PETSC_NULL);
408: 
409:       /* Solve the correction equation */
410:       KSPSetTolerances(data->ksp, tol, PETSC_DEFAULT, PETSC_DEFAULT,
411:                               maxits);
412:       KSPSolve(data->ksp, kr_comp, D_comp);
413:       KSPGetIterationNumber(data->ksp, &lits);
414:       d->eps->OP->lineariterations+= lits;
415: 
416:       /* Destroy the composed ks and D */
417:       VecDestroy(&kr_comp);
418:       VecDestroy(&D_comp);

420:     /* If GD */
421:     } else {
422:       for(j=0; j<s; j++) {
423:         d->improvex_precond(d, r_s+i+j, kr[j], D[i+j]);
424:       }
425:       dvd_improvex_apply_proj(d, &D[i], s, auxS);
426:     }
427:   }
428:   *size_D = i;
429:   if (data->dynamic) data->lastTol = PetscMax(data->lastTol/2.0,PETSC_MACHINE_EPSILON*10.0);
430: 
431:   /* Callback old improveX */
432:   if (data->old_improveX) {
433:     d->improveX_data = data->old_improveX_data;
434:     data->old_improveX(d, PETSC_NULL, 0, 0, 0, PETSC_NULL);
435:     d->improveX_data = data;
436:   }

438:   return(0);
439: }

441: /* y <- theta[1]A*x - theta[0]*B*x
442:    auxV, two auxiliary vectors */
445: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmult(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV)
446: {
447:   PetscErrorCode  ierr;
448:   PetscInt        n,i;
449:   const Vec       *Bx;

452:   n = data->r_e - data->r_s;
453:   for(i=0; i<n; i++) {
454:     MatMult(data->d->A,x[i],y[i]);
455:   }

457:   for(i=0; i<n; i++) {
458: #if !defined(PETSC_USE_COMPLEX)
459:     if(data->d->eigi[data->r_s+i] != 0.0) {
460:       if (data->d->B) {
461:         MatMult(data->d->B,x[i],auxV[0]);
462:         MatMult(data->d->B,x[i+1],auxV[1]);
463:         Bx = auxV;
464:       } else {
465:         Bx = &x[i];
466:       }

468:       /* y_i   <- [ t_2i+1*A*x_i   - t_2i*Bx_i + ti_i*Bx_i+1;
469:          y_i+1      t_2i+1*A*x_i+1 - ti_i*Bx_i - t_2i*Bx_i+1  ] */
470:       VecAXPBYPCZ(y[i],-data->theta[2*i],data->thetai[i],data->theta[2*i+1],Bx[0],Bx[1]);
471:       VecAXPBYPCZ(y[i+1],-data->thetai[i],-data->theta[2*i],data->theta[2*i+1],Bx[0],Bx[1]);
472:       i++;
473:     } else
474: #endif
475:     {
476:       if (data->d->B) {
477:         MatMult(data->d->B,x[i],auxV[0]);
478:         Bx = auxV;
479:       } else {
480:         Bx = &x[i];
481:       }
482:       VecAXPBY(y[i],-data->theta[i*2],data->theta[i*2+1],Bx[0]);
483:     }
484:   }
485:   return(0);
486: }



490: /* y <- theta[1]'*A'*x - theta[0]'*B'*x
491:    auxV, two auxiliary vectors */
494: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmulttrans(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV)
495: {
496:   PetscErrorCode  ierr;
497:   PetscInt        n,i;
498:   const Vec       *Bx;

501:   n = data->r_e - data->r_s;
502:   for(i=0; i<n; i++) {
503:     MatMultTranspose(data->d->A,x[i],y[i]);
504:   }

506:   for(i=0; i<n; i++) {
507: #if !defined(PETSC_USE_COMPLEX)
508:     if(data->d->eigi[data->r_s+i] != 0.0) {
509:       if (data->d->B) {
510:         MatMultTranspose(data->d->B,x[i],auxV[0]);
511:         MatMultTranspose(data->d->B,x[i+1],auxV[1]);
512:         Bx = auxV;
513:       } else {
514:         Bx = &x[i];
515:       }

517:       /* y_i   <- [ t_2i+1*A*x_i   - t_2i*Bx_i - ti_i*Bx_i+1;
518:          y_i+1      t_2i+1*A*x_i+1 + ti_i*Bx_i - t_2i*Bx_i+1  ] */
519:       VecAXPBYPCZ(y[i],-data->theta[2*i],-data->thetai[i],data->theta[2*i+1],Bx[0],Bx[1]);
520:       VecAXPBYPCZ(y[i+1],data->thetai[i],-data->theta[2*i],data->theta[2*i+1],Bx[0],Bx[1]);
521:       i++;
522:     } else
523: #endif
524:     {
525:       if (data->d->B) {
526:         MatMultTranspose(data->d->B,x[i],auxV[0]);
527:         Bx = auxV;
528:       } else {
529:         Bx = &x[i];
530:       }
531:       VecAXPBY(y[i],PetscConj(-data->theta[i*2]),PetscConj(data->theta[i*2+1]),Bx[0]);
532:     }
533:   }
534:   return(0);
535: }


540: PetscErrorCode dvd_pcapplyba(PC pc,PCSide side,Vec in,Vec out,Vec w)
541: {
542:   PetscErrorCode  ierr;
543:   dvdImprovex_jd  *data;
544:   PetscInt        n,i;
545:   const Vec       *inx,*outx,*wx;
546:   Mat             A;


550:   PCGetOperators(pc,&A,PETSC_NULL,PETSC_NULL);
551:   MatShellGetContext(A,(void**)&data);
552:   VecCompGetSubVecs(in,PETSC_NULL,&inx);
553:   VecCompGetSubVecs(out,PETSC_NULL,&outx);
554:   VecCompGetSubVecs(w,PETSC_NULL,&wx);
555:   n = data->r_e - data->r_s;

557:   /* Check auxiliary vectors */
558:   if (&data->auxV[n] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");

560:   switch(side) {
561:   case PC_LEFT:
562:     /* aux <- theta[1]A*in - theta[0]*B*in */
563:     dvd_aux_matmult(data,inx,data->auxV,outx);
564: 
565:     /* out <- K * aux */
566:     for(i=0; i<n; i++) {
567:       data->d->improvex_precond(data->d,data->r_s+i,data->auxV[i],outx[i]);
568:     }
569:     break;
570:   case PC_RIGHT:
571:     /* aux <- K * in */
572:     for(i=0; i<n; i++) {
573:       data->d->improvex_precond(data->d,data->r_s+i,inx[i],data->auxV[i]);
574:     }
575: 
576:     /* out <- theta[1]A*auxV - theta[0]*B*auxV */
577:     dvd_aux_matmult(data,data->auxV,outx,wx);
578:     break;
579:   case PC_SYMMETRIC:
580:     /* aux <- K^{1/2} * in */
581:     for(i=0; i<n; i++) {
582:       PCApplySymmetricRight(data->old_pc,inx[i],data->auxV[i]);
583:     }
584: 
585:     /* out <- theta[1]A*auxV - theta[0]*B*auxV */
586:     dvd_aux_matmult(data,data->auxV,wx,outx);
587: 
588:     /* aux <- K^{1/2} * in */
589:     for(i=0; i<n; i++) {
590:       PCApplySymmetricLeft(data->old_pc,wx[i],outx[i]);
591:     }
592:     break;
593:   default:
594:     SETERRQ(PETSC_COMM_SELF,1, "Unsupported KSP side");
595:   }

597:   /* out <- out - v*(u'*out) */
598:   dvd_improvex_apply_proj(data->d,(Vec*)outx,n,data->auxS);

600:   return(0);
601: }


606: PetscErrorCode dvd_pcapply(PC pc,Vec in,Vec out)
607: {
608:   PetscErrorCode  ierr;
609:   dvdImprovex_jd  *data;
610:   PetscInt        n,i;
611:   const Vec       *inx, *outx;
612:   Mat             A;


616:   PCGetOperators(pc,&A,PETSC_NULL,PETSC_NULL);
617:   MatShellGetContext(A,(void**)&data);
618:   VecCompGetSubVecs(in,PETSC_NULL,&inx);
619:   VecCompGetSubVecs(out,PETSC_NULL,&outx);
620:   n = data->r_e - data->r_s;

622:   /* out <- K * in */
623:   for(i=0; i<n; i++) {
624:     data->d->improvex_precond(data->d,data->r_s+i,inx[i],outx[i]);
625:   }

627:   /* out <- out - v*(u'*out) */
628:   dvd_improvex_apply_proj(data->d,(Vec*)outx,n,data->auxS);

630:   return(0);
631: }

635: PetscErrorCode dvd_pcapplytrans(PC pc,Vec in,Vec out)
636: {
637:   PetscErrorCode  ierr;
638:   dvdImprovex_jd  *data;
639:   PetscInt        n,i;
640:   const Vec       *inx, *outx;
641:   Mat             A;


645:   PCGetOperators(pc,&A,PETSC_NULL,PETSC_NULL);
646:   MatShellGetContext(A,(void**)&data);
647:   VecCompGetSubVecs(in,PETSC_NULL,&inx);
648:   VecCompGetSubVecs(out,PETSC_NULL,&outx);
649:   n = data->r_e - data->r_s;

651:   /* Check auxiliary vectors */
652:   if (&data->auxV[n] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");

654:   /* auxV <- in */
655:   for(i=0; i<n; i++) {
656:     VecCopy(inx[i],data->auxV[i]);
657:   }
658: 
659:   /* auxV <- auxV - u*(v'*auxV) */
660:   dvd_improvex_applytrans_proj(data->d,data->auxV,n,data->auxS);

662:   /* out <- K' * aux */
663:   for(i=0; i<n; i++) {
664:     PCApplyTranspose(data->old_pc,data->auxV[i],outx[i]);
665:   }

667:   return(0);
668: }


673: PetscErrorCode dvd_matmult_jd(Mat A,Vec in,Vec out)
674: {
675:   PetscErrorCode  ierr;
676:   dvdImprovex_jd  *data;
677:   PetscInt        n;
678:   const Vec       *inx, *outx;
679:   PCSide          side;


683:   MatShellGetContext(A,(void**)&data);
684:   VecCompGetSubVecs(in,PETSC_NULL,&inx);
685:   VecCompGetSubVecs(out,PETSC_NULL,&outx);
686:   n = data->r_e - data->r_s;

688:   /* Check auxiliary vectors */
689:   if (&data->auxV[2] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");

691:   /* out <- theta[1]A*in - theta[0]*B*in */
692:   dvd_aux_matmult(data,inx,outx,data->auxV);

694:   KSPGetPCSide(data->ksp,&side);
695:   if (side == PC_RIGHT) {
696:     /* out <- out - v*(u'*out) */
697:     dvd_improvex_apply_proj(data->d,(Vec*)outx,n,data->auxS);
698:   }

700:   return(0);
701: }

705: PetscErrorCode dvd_matmulttrans_jd(Mat A,Vec in,Vec out)
706: {
707:   PetscErrorCode  ierr;
708:   dvdImprovex_jd  *data;
709:   PetscInt        n,i;
710:   const Vec       *inx,*outx,*r,*auxV;
711:   PCSide          side;


715:   MatShellGetContext(A,(void**)&data);
716:   VecCompGetSubVecs(in,PETSC_NULL,&inx);
717:   VecCompGetSubVecs(out,PETSC_NULL,&outx);
718:   n = data->r_e - data->r_s;

720:   /* Check auxiliary vectors */
721:   if (&data->auxV[n+2] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");

723:   KSPGetPCSide(data->ksp,&side);
724:   if (side == PC_RIGHT) {
725:     /* auxV <- in */
726:     for(i=0; i<n; i++) {
727:       VecCopy(inx[i],data->auxV[i]);
728:     }

730:     /* auxV <- auxV - v*(u'*auxV) */
731:     dvd_improvex_applytrans_proj(data->d,data->auxV,n,data->auxS);
732:     r = data->auxV;
733:     auxV = data->auxV+n;
734:   } else {
735:     r = inx;
736:     auxV = data->auxV;
737:   }

739:   /* out <- theta[1]A*r - theta[0]*B*r */
740:   dvd_aux_matmulttrans(data,r,outx,auxV);

742:   return(0);
743: }


748: PetscErrorCode dvd_matgetvecs_jd(Mat A, Vec *right, Vec *left)
749: {
750:   PetscErrorCode  ierr;
751:   Vec             *r, *l;
752:   dvdImprovex_jd  *data;
753:   PetscInt        n, i;


757:   MatShellGetContext(A, (void**)&data);
758:   n = data->ksp_max_size;
759:   if (right) {
760:     PetscMalloc(sizeof(Vec)*n, &r);
761:   }
762:   if (left) {
763:     PetscMalloc(sizeof(Vec)*n, &l);
764:   }
765:   for (i=0; i<n; i++) {
766:     MatGetVecs(data->d->A, right?&r[i]:PETSC_NULL,
767:                       left?&l[i]:PETSC_NULL);
768:   }
769:   if(right) {
770:     VecCreateCompWithVecs(r, n, data->friends, right);
771:     for (i=0; i<n; i++) {
772:       VecDestroy(&r[i]);
773:     }
774:   }
775:   if(left) {
776:     VecCreateCompWithVecs(l, n, data->friends, left);
777:     for (i=0; i<n; i++) {
778:       VecDestroy(&l[i]);
779:     }
780:   }

782:   if (right) {
783:     PetscFree(r);
784:   }
785:   if (left) {
786:     PetscFree(l);
787:   }

789:   return(0);
790: }


795: PetscErrorCode dvd_improvex_jd_proj_uv(dvdDashboard *d, dvdBlackboard *b,
796:                                        ProjType_t p)
797: {

800:   /* Setup the step */
801:   if (b->state >= DVD_STATE_CONF) {
802:     switch(p) {
803:     case DVD_PROJ_KXX:
804:       d->improvex_jd_proj_uv = dvd_improvex_jd_proj_uv_KXX; break;
805:     case DVD_PROJ_KZX:
806:       d->improvex_jd_proj_uv = dvd_improvex_jd_proj_uv_KZX; break;
807:     }
808:   }

810:   return(0);
811: }

815: /* 
816:   Compute: u <- X, v <- K*(theta[0]*A+theta[1]*B)*X,
817:   kr <- K^{-1}*(A-eig*B)*X, being X <- V*pX[i_s..i_e-1], Y <- W*pY[i_s..i_e-1]
818:   where
819:   auxV, 4*(i_e-i_s) auxiliar global vectors
820:   pX,pY, the right and left eigenvectors of the projected system
821:   ld, the leading dimension of pX and pY
822:   auxS: max(8*bs*max_cX_in_proj,size_V*size_V)
823: */
824: PetscErrorCode dvd_improvex_jd_proj_cuv(dvdDashboard *d, PetscInt i_s,
825:   PetscInt i_e, Vec **u, Vec **v, Vec *kr, Vec **auxV, PetscScalar **auxS,
826:   PetscScalar *theta, PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY,
827:   PetscInt ld)
828: {
829: #if defined(PETSC_MISSING_LAPACK_GETRF)
831:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable");
832: #else
833:   PetscErrorCode  ierr;
834:   PetscInt        n = i_e - i_s, size_KZ, V_new, rm, i, size_in;
835:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;
836:   PetscBLASInt    s, ldXKZ, info;
837:   DvdReduction    r;
838:   DvdReductionChunk
839:                   ops[2];
840:   DvdMult_copy_func
841:                   sr[2];


845:   /* Check consistency */
846:   V_new = d->size_cX - data->size_cX;
847:   if (V_new > data->old_size_X) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
848:   data->old_size_X = n;

850:   /* KZ <- KZ(rm:rm+max_cX-1) */
851:   rm = PetscMax(V_new+data->size_KZ-d->max_cX_in_impr, 0);
852:   for (i=0; i<d->max_cX_in_impr; i++) {
853:     VecCopy(data->KZ[i+rm], data->KZ[i]);
854:   }
855:   data->size_cX = d->size_cX;

857:   /* XKZ <- XKZ(rm:rm+max_cX-1,rm:rm+max_cX-1) */
858:   for (i=0; i<d->max_cX_in_impr; i++) {
859:     SlepcDenseCopy(&data->XKZ[i*data->ldXKZ+i], data->ldXKZ, &data->XKZ[(i+rm)*data->ldXKZ+i+rm], data->ldXKZ, data->size_KZ, 1);
860:   }
861:   data->size_KZ = PetscMin(d->max_cX_in_impr, data->size_KZ+V_new);

863:   /* Compute X, KZ and KR */
864:   *u = *auxV; *auxV+= n;
865:   *v = &data->KZ[data->size_KZ];
866:   d->improvex_jd_proj_uv(d, i_s, i_e, *u, *v, kr, *auxV, theta, thetai,
867:                                 pX, pY, ld);

869:   /* XKZ <- X'*KZ */
870:   size_KZ = data->size_KZ+n;
871:   size_in = 2*n*data->size_KZ+n*n;
872:   SlepcAllReduceSumBegin(ops,2,*auxS,*auxS+size_in,size_in,&r,((PetscObject)d->V[0])->comm);
873:   VecsMultS(data->XKZ,0,data->ldXKZ,d->V-data->size_KZ,0,data->size_KZ,data->KZ,data->size_KZ,size_KZ,&r,&sr[0]);
874:   VecsMultS(&data->XKZ[data->size_KZ],0,data->ldXKZ,*u,0,n,data->KZ,0,size_KZ,&r,&sr[1]);
875:   SlepcAllReduceSumEnd(&r);

877:   /* iXKZ <- inv(XKZ) */
878:   s = PetscBLASIntCast(size_KZ);
879:   data->ldiXKZ = data->size_iXKZ = size_KZ;
880:   data->iXKZ = *auxS; *auxS+= size_KZ*size_KZ;
881:   data->iXKZPivots = (PetscBLASInt*)*auxS;
882:   *auxS += FromIntToScalar(size_KZ);
883:   SlepcDenseCopy(data->iXKZ,data->ldiXKZ,data->XKZ,data->ldXKZ,size_KZ,size_KZ);
884:   ldXKZ = PetscBLASIntCast(data->ldiXKZ);
885:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
886:   LAPACKgetrf_(&s, &s, data->iXKZ, &ldXKZ, data->iXKZPivots, &info);
887:   PetscFPTrapPop();
888:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack XGETRF %d", info);

890:   return(0);
891: #endif
892: }

894: #define DVD_COMPLEX_RAYLEIGH_QUOTIENT(ur,ui,Axr,Axi,Bxr,Bxi,eigr,eigi,b,ierr)\
895: { \
896:   VecDot((Axr), (ur), &(b)[0]);  /* r*A*r */ \
897:   VecDot((Axr), (ui), &(b)[1]);  /* i*A*r */ \
898:   VecDot((Axi), (ur), &(b)[2]);  /* r*A*i */ \
899:   VecDot((Axi), (ui), &(b)[3]);  /* i*A*i */ \
900:   VecDot((Bxr), (ur), &(b)[4]);  /* r*B*r */ \
901:   VecDot((Bxr), (ui), &(b)[5]);  /* i*B*r */ \
902:   VecDot((Bxi), (ur), &(b)[6]);  /* r*B*i */ \
903:   VecDot((Bxi), (ui), &(b)[7]);  /* i*B*i */ \
904:   (b)[0]  = (b)[0]+(b)[3]; /* rAr+iAi */ \
905:   (b)[2] =  (b)[2]-(b)[1]; /* rAi-iAr */ \
906:   (b)[4] = (b)[4]+(b)[7]; /* rBr+iBi */ \
907:   (b)[6] = (b)[6]-(b)[5]; /* rBi-iBr */ \
908:   (b)[7] = (b)[4]*(b)[4] + (b)[6]*(b)[6]; /* k */ \
909:   *(eigr) = ((b)[0]*(b)[4] + (b)[2]*(b)[6]) / (b)[7]; /* eig_r */ \
910:   *(eigi) = ((b)[2]*(b)[4] - (b)[0]*(b)[6]) / (b)[7]; /* eig_i */ \
911: }

913: #if !defined(PETSC_USE_COMPLEX)
914: #define DVD_COMPUTE_N_RR(eps,i,i_s,n,eigr,eigi,u,Ax,Bx,b,ierr) \
915:   for((i)=0; (i)<(n); (i)++) { \
916:     if ((eigi)[(i_s)+(i)] != 0.0) { \
917:       /* eig_r = [(rAr+iAi)*(rBr+iBi) + (rAi-iAr)*(rBi-iBr)]/k \
918:          eig_i = [(rAi-iAr)*(rBr+iBi) - (rAr+iAi)*(rBi-iBr)]/k \
919:          k     =  (rBr+iBi)*(rBr+iBi) + (rBi-iBr)*(rBi-iBr)    */ \
920:       DVD_COMPLEX_RAYLEIGH_QUOTIENT((u)[(i)], (u)[(i)+1], (Ax)[(i)], \
921:         (Ax)[(i)+1], (Bx)[(i)], (Bx)[(i)+1], &(b)[8], &(b)[9], (b), (ierr)); \
922:       if (PetscAbsScalar((eigr)[(i_s)+(i)] - (b)[8])/ \
923:             PetscAbsScalar((eigr)[(i_s)+(i)]) > 1e-10    || \
924:           PetscAbsScalar((eigi)[(i_s)+(i)] - (b)[9])/ \
925:             PetscAbsScalar((eigi)[(i_s)+(i)]) > 1e-10         ) { \
926:         (ierr) = PetscInfo4((eps), "The eigenvalue %G+%G is far from its "\
927:                             "Rayleigh quotient value %G+%G\n", \
928:                             (eigr)[(i_s)+(i)], \
929:                             (eigi)[(i_s)+1], (b)[8], (b)[9]); \
930:       } \
931:       (i)++; \
932:     } \
933:   }
934: #else
935: #define DVD_COMPUTE_N_RR(eps,i,i_s,n,eigr,eigi,u,Ax,Bx,b,ierr) \
936:   for((i)=0; (i)<(n); (i)++) { \
937:       (ierr) = VecDot((Ax)[(i)], (u)[(i)], &(b)[0]);  \
938:       (ierr) = VecDot((Bx)[(i)], (u)[(i)], &(b)[1]);  \
939:       (b)[0] = (b)[0]/(b)[1]; \
940:       if (PetscAbsScalar((eigr)[(i_s)+(i)] - (b)[0])/ \
941:             PetscAbsScalar((eigr)[(i_s)+(i)]) > 1e-10     ) { \
942:         (ierr) = PetscInfo4((eps), "The eigenvalue %G+%G is far from its " \
943:                "Rayleigh quotient value %G+%G\n", \
944:                PetscRealPart((eigr)[(i_s)+(i)]), \
945:                PetscImaginaryPart((eigr)[(i_s)+(i)]), PetscRealPart((b)[0]), \
946:                PetscImaginaryPart((b)[0])); \
947:       } \
948:     }
949: #endif


954: /* 
955:   Compute: u <- X, v <- K*(theta[0]*A+theta[1]*B)*X,
956:   kr <- K^{-1}*(A-eig*B)*X, being X <- V*pX[i_s..i_e-1], Y <- W*pY[i_s..i_e-1]
957:   where
958:   auxV, 4*(i_e-i_s) auxiliar global vectors
959:   pX,pY, the right and left eigenvectors of the projected system
960:   ld, the leading dimension of pX and pY
961: */
962: PetscErrorCode dvd_improvex_jd_proj_uv_KZX(dvdDashboard *d, PetscInt i_s,
963:   PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
964:   PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld)
965: {
966:   PetscErrorCode  ierr;
967:   PetscInt        n = i_e - i_s, i;
968:   PetscScalar     b[16];
969:   Vec             *Ax, *Bx, *r=auxV, X[4];
970:   /* The memory manager doen't allow to call a subroutines */
971:   PetscScalar     Z[size_Z];


975:   /* u <- X(i) */
976:   dvd_improvex_compute_X(d,i_s,i_e,u,pX,ld);

978:   /* v <- theta[0]A*u + theta[1]*B*u */

980:   /* Bx <- B*X(i) */
981:   Bx = kr;
982:   if (d->BV) {
983:     SlepcUpdateVectorsZ(Bx, 0.0, 1.0, d->BV-d->cX_in_H, d->size_BV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);
984:   } else {
985:     for(i=0; i<n; i++) {
986:       if (d->B) {
987:         MatMult(d->B, u[i], Bx[i]);
988:       } else {
989:         VecCopy(u[i], Bx[i]);
990:       }
991:     }
992:   }

994:   /* Ax <- A*X(i) */
995:   Ax = r;
996:   SlepcUpdateVectorsZ(Ax, 0.0, 1.0, d->AV-d->cX_in_H, d->size_AV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);

998:   /* v <- Y(i) */
999:   SlepcUpdateVectorsZ(v, 0.0, 1.0, (d->W?d->W:d->V)-d->cX_in_H, d->size_V+d->cX_in_H, &pY[ld*i_s], ld, d->size_H, n);

1001:   /* Recompute the eigenvalue */
1002:   DVD_COMPUTE_N_RR(d->eps, i, i_s, n, d->eigr, d->eigi, v, Ax, Bx, b, ierr);

1004:   for(i=0; i<n; i++) {
1005: #if !defined(PETSC_USE_COMPLEX)
1006:     if(d->eigi[i_s+i] != 0.0) {
1007:       /* [r_i r_i+1 kr_i kr_i+1]*= [ theta_2i'    0            1        0   
1008:                                        0         theta_2i'     0        1   
1009:                                      theta_2i+1 -thetai_i   -eigr_i -eigi_i 
1010:                                      thetai_i    theta_2i+1  eigi_i -eigr_i ] */
1011:       b[0] = b[5] = PetscConj(theta[2*i]);
1012:       b[2] = b[7] = -theta[2*i+1];
1013:       b[6] = -(b[3] = thetai[i]);
1014:       b[1] = b[4] = 0.0;
1015:       b[8] = b[13] = 1.0/d->nX[i_s+i];
1016:       b[10] = b[15] = -d->eigr[i_s+i]/d->nX[i_s+i];
1017:       b[14] = -(b[11] = d->eigi[i_s+i]/d->nX[i_s+i]);
1018:       b[9] = b[12] = 0.0;
1019:       X[0] = Ax[i]; X[1] = Ax[i+1]; X[2] = Bx[i]; X[3] = Bx[i+1];
1020:       SlepcUpdateVectorsD(X, 4, 1.0, b, 4, 4, 4, Z, size_Z);
1021: 
1022:       i++;
1023:     } else
1024: #endif
1025:     {
1026:       /* [Ax_i Bx_i]*= [ theta_2i'    1/nX_i
1027:                         theta_2i+1  -eig_i/nX_i ] */
1028:       b[0] = PetscConj(theta[i*2]);
1029:       b[1] = theta[i*2+1];
1030:       b[2] = 1.0/d->nX[i_s+i];
1031:       b[3] = -d->eigr[i_s+i]/d->nX[i_s+i];
1032:       X[0] = Ax[i]; X[1] = Bx[i];
1033:       SlepcUpdateVectorsD(X, 2, 1.0, b, 2, 2, 2, Z, size_Z);
1034: 
1035:     }
1036:   }
1037:   for (i=0; i<n; i++) d->nX[i_s+i] = 1.0;

1039:   /* v <- K^{-1} r = K^{-1}(theta_2i'*Ax + theta_2i+1*Bx) */
1040:   for(i=0; i<n; i++) {
1041:     d->improvex_precond(d, i_s+i, r[i], v[i]);
1042:   }

1044:   /* kr <- P*(Ax - eig_i*Bx) */
1045:   d->calcpairs_proj_res(d, i_s, i_e, kr);

1047:   return(0);
1048: }

1052: /* 
1053:   Compute: u <- K^{-1}*X, v <- X,
1054:   kr <- K^{-1}*(A-eig*B)*X, being X <- V*pX[i_s..i_e-1]
1055:   where
1056:   auxV, 4*(i_e-i_s) auxiliar global vectors
1057:   pX,pY, the right and left eigenvectors of the projected system
1058:   ld, the leading dimension of pX and pY
1059: */
1060: PetscErrorCode dvd_improvex_jd_proj_uv_KXX(dvdDashboard *d, PetscInt i_s,
1061:   PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
1062:   PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld)
1063: {
1064:   PetscErrorCode  ierr;
1065:   PetscInt        n = i_e - i_s, i;
1066:   PetscScalar     b[16];
1067:   Vec             *Ax, *Bx, *r = auxV, X[4];
1068:   /* The memory manager doen't allow to call a subroutines */
1069:   PetscScalar     Z[size_Z];


1073:   /* [v u] <- X(i) Y(i) */
1074:   dvd_improvex_compute_X(d,i_s,i_e,v,pX,ld);
1075:   SlepcUpdateVectorsZ(u, 0.0, 1.0, (d->W?d->W:d->V)-d->cX_in_H, d->size_V+d->cX_in_H, &pY[ld*i_s], ld, d->size_H, n);

1077:   /* Bx <- B*X(i) */
1078:   Bx = r;
1079:   if (d->BV) {
1080:     SlepcUpdateVectorsZ(Bx, 0.0, 1.0, d->BV-d->cX_in_H, d->size_BV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);
1081:   } else {
1082:     if (d->B) {
1083:       for(i=0; i<n; i++) {
1084:         MatMult(d->B, v[i], Bx[i]);
1085:       }
1086:     } else
1087:       Bx = v;
1088:   }

1090:   /* Ax <- A*X(i) */
1091:   Ax = kr;
1092:   SlepcUpdateVectorsZ(Ax, 0.0, 1.0, d->AV-d->cX_in_H, d->size_AV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);

1094:   /* Recompute the eigenvalue */
1095:   DVD_COMPUTE_N_RR(d->eps, i, i_s, n, d->eigr, d->eigi, u, Ax, Bx, b, ierr);

1097:   for(i=0; i<n; i++) {
1098:     if (d->eigi[i_s+i] == 0.0) {
1099:       /* kr <- Ax -eig*Bx */
1100:       VecAXPBY(kr[i], -d->eigr[i_s+i]/d->nX[i_s+i], 1.0/d->nX[i_s+i], Bx[i]);
1101:     } else {
1102:       /* [kr_i kr_i+1 r_i r_i+1]*= [   1        0 
1103:                                        0        1
1104:                                     -eigr_i -eigi_i
1105:                                      eigi_i -eigr_i] */
1106:       b[0] = b[5] = 1.0/d->nX[i_s+i];
1107:       b[2] = b[7] = -d->eigr[i_s+i]/d->nX[i_s+i];
1108:       b[6] = -(b[3] = d->eigi[i_s+i]/d->nX[i_s+i]);
1109:       b[1] = b[4] = 0.0;
1110:       X[0] = kr[i]; X[1] = kr[i+1]; X[2] = r[i]; X[3] = r[i+1];
1111:       SlepcUpdateVectorsD(X, 4, 1.0, b, 4, 4, 2, Z, size_Z);
1112: 
1113:       i++;
1114:     }
1115:   }
1116:   for (i=0; i<n; i++) d->nX[i_s+i] = 1.0;

1118:   /* kr <- P*kr */
1119:   d->calcpairs_proj_res(d, i_s, i_e, r);

1121:   /* u <- K^{-1} X(i) */
1122:   for(i=0; i<n; i++) {
1123:     d->improvex_precond(d, i_s+i, v[i], u[i]);
1124:   }

1126:   return(0);
1127: }


1132: PetscErrorCode dvd_improvex_jd_lit_const(dvdDashboard *d, dvdBlackboard *b,
1133:                                          PetscInt maxits, PetscReal tol,
1134:                                          PetscReal fix)
1135: {
1136:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;

1139:   /* Setup the step */
1140:   if (b->state >= DVD_STATE_CONF) {
1141:     data->maxits = maxits;
1142:     data->tol = tol;
1143:     data->fix = fix;
1144:     d->improvex_jd_lit = dvd_improvex_jd_lit_const_0;
1145:   }
1146:   return(0);
1147: }


1152: PetscErrorCode dvd_improvex_jd_lit_const_0(dvdDashboard *d, PetscInt i,
1153:   PetscScalar* theta, PetscScalar* thetai, PetscInt *maxits, PetscReal *tol)
1154: {
1155:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;
1156:   PetscReal       a;

1159:   a = SlepcAbsEigenvalue(d->eigr[i],d->eigi[i]);

1161:   if (d->nR[i]/a < data->fix) {
1162:     theta[0] = d->eigr[i];
1163:     theta[1] = 1.0;
1164: #if !defined(PETSC_USE_COMPLEX)
1165:     *thetai = d->eigi[i];
1166: #endif
1167:   } else {
1168:     theta[0] = d->target[0];
1169:     theta[1] = d->target[1];
1170: #if !defined(PETSC_USE_COMPLEX)
1171:     *thetai = 0.0;
1172: #endif
1173: }

1175: #if defined(PETSC_USE_COMPLEX)
1176:   if(thetai) *thetai = 0.0;
1177: #endif
1178:   *maxits = data->maxits;
1179:   *tol = data->tol;
1180:   return(0);
1181: }


1184: /**** Patterns implementation *************************************************/

1186: typedef PetscInt (*funcV0_t)(dvdDashboard*, PetscInt, PetscInt, Vec*);
1187: typedef PetscInt (*funcV1_t)(dvdDashboard*, PetscInt, PetscInt, Vec*,
1188:                              PetscScalar*, Vec);

1192: /* Compute D <- K^{-1} * funcV[r_s..r_e] */
1193: PetscErrorCode dvd_improvex_PfuncV(dvdDashboard *d, void *funcV, Vec *D,
1194:   PetscInt max_size_D, PetscInt r_s, PetscInt r_e, Vec *auxV,
1195:   PetscScalar *auxS)
1196: {
1197:   PetscErrorCode  ierr;
1198:   PetscInt        i;

1201:   if (max_size_D >= r_e-r_s+1) {
1202:     /* The optimized version needs one vector extra of D */
1203:     /* D(1:r.size) = R(r_s:r_e-1) */
1204:     if (auxS) ((funcV1_t)funcV)(d, r_s, r_e, D+1, auxS, auxV[0]);
1205:     else      ((funcV0_t)funcV)(d, r_s, r_e, D+1);

1207:     /* D = K^{-1} * R */
1208:     for (i=0; i<r_e-r_s; i++) {
1209:       d->improvex_precond(d, i+r_s, D[i+1], D[i]);
1210:     }
1211:   } else if (max_size_D == r_e-r_s) {
1212:     /* Non-optimized version */
1213:     /* auxV <- R[r_e-1] */
1214:     if (auxS) ((funcV1_t)funcV)(d, r_e-1, r_e, auxV, auxS, auxV[1]);
1215:     else      ((funcV0_t)funcV)(d, r_e-1, r_e, auxV);

1217:     /* D(1:r.size-1) = R(r_s:r_e-2) */
1218:     if (auxS) ((funcV1_t)funcV)(d, r_s, r_e-1, D+1, auxS, auxV[1]);
1219:     else      ((funcV0_t)funcV)(d, r_s, r_e-1, D+1);

1221:     /* D = K^{-1} * R */
1222:     for (i=0; i<r_e-r_s-1; i++) {
1223:       d->improvex_precond(d, i+r_s, D[i+1], D[i]);
1224:     }
1225:     d->improvex_precond(d, r_e-1, auxV[0], D[r_e-r_s-1]);
1226:   } else SETERRQ(PETSC_COMM_SELF,1, "Problem: r_e-r_s > max_size_D");
1227:   return(0);
1228: }


1233: /* Compute (I - KZ*iXKZ*X')*V where,
1234:    V, the vectors to apply the projector,
1235:    cV, the number of vectors in V,
1236:    auxS, auxiliar vector of size length 3*size_iXKZ*cV
1237: */
1238: PetscErrorCode dvd_improvex_apply_proj(dvdDashboard *d, Vec *V, PetscInt cV, PetscScalar *auxS)
1239: {
1240: #if defined(PETSC_MISSING_LAPACK_GETRS) 
1242:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routines are unavailable");
1243: #else
1244:   PetscErrorCode  ierr;
1245:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;
1246:   PetscInt        size_in = data->size_iXKZ*cV, i, ldh;
1247:   PetscScalar     *h, *in, *out;
1248:   PetscBLASInt    cV_, n, info, ld;
1249:   DvdReduction    r;
1250:   DvdReductionChunk
1251:                   ops[4];
1252:   DvdMult_copy_func
1253:                   sr[4];
1254: 
1256:   if (cV > 2) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");

1258:   /* h <- X'*V */
1259:   h = auxS; in = h+size_in; out = in+size_in; ldh = data->size_iXKZ;
1260:   SlepcAllReduceSumBegin(ops, 4, in, out, size_in, &r,
1261:                                 ((PetscObject)d->V[0])->comm);
1262:   for (i=0; i<cV; i++) {
1263:     VecsMultS(&h[i*ldh],0,ldh,d->V-data->size_KZ,0,data->size_KZ,V+i,0,1,&r,&sr[i*2]);
1264:     VecsMultS(&h[i*ldh+data->size_KZ],0,ldh,data->u,0,data->size_iXKZ-data->size_KZ,V+i,0,1,&r,&sr[i*2+1]);
1265:   }
1266:   SlepcAllReduceSumEnd(&r);

1268:   /* h <- iXKZ\h */
1269:   cV_ = PetscBLASIntCast(cV);
1270:   n = PetscBLASIntCast(data->size_iXKZ);
1271:   ld = PetscBLASIntCast(data->ldiXKZ);
1273:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1274:   LAPACKgetrs_("N", &n, &cV_, data->iXKZ, &ld, data->iXKZPivots, h, &n, &info);
1275:   PetscFPTrapPop();
1276:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack XGETRS %d", info);

1278:   /* V <- V - KZ*h */
1279:   for (i=0; i<cV; i++) {
1280:     SlepcUpdateVectorsZ(V+i,1.0,-1.0,data->KZ,data->size_iXKZ,&h[ldh*i],ldh,data->size_iXKZ,1);
1281:   }
1282:   return(0);
1283: #endif
1284: }

1288: /* Compute (I - X*iXKZ*KZ')*V where,
1289:    V, the vectors to apply the projector,
1290:    cV, the number of vectors in V,
1291:    auxS, auxiliar vector of size length 3*size_iXKZ*cV
1292: */
1293: PetscErrorCode dvd_improvex_applytrans_proj(dvdDashboard *d, Vec *V, PetscInt cV, PetscScalar *auxS)
1294: {
1295: #if defined(PETSC_MISSING_LAPACK_GETRS) 
1297:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routines are unavailable");
1298: #else
1299:   PetscErrorCode  ierr;
1300:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;
1301:   PetscInt        size_in = data->size_iXKZ*cV, i, ldh;
1302:   PetscScalar     *h, *in, *out;
1303:   PetscBLASInt    cV_, n, info, ld;
1304:   DvdReduction    r;
1305:   DvdReductionChunk
1306:                   ops[2];
1307:   DvdMult_copy_func
1308:                   sr[2];
1309: 
1311:   if (cV > 2) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");

1313:   /* h <- KZ'*V */
1314:   h = auxS; in = h+size_in; out = in+size_in; ldh = data->size_iXKZ;
1315:   SlepcAllReduceSumBegin(ops, 2, in, out, size_in, &r,
1316:                                 ((PetscObject)d->V[0])->comm);
1317:   for (i=0; i<cV; i++) {
1318:     VecsMultS(&h[i*ldh],0,ldh,data->KZ,0,data->size_KZ,V+i,0,1,&r,&sr[i]);
1319:   }
1320:   SlepcAllReduceSumEnd(&r);

1322:   /* h <- iXKZ\h */
1323:   cV_ = PetscBLASIntCast(cV);
1324:   n = PetscBLASIntCast(data->size_iXKZ);
1325:   ld = PetscBLASIntCast(data->ldiXKZ);
1327:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1328:   LAPACKgetrs_("C", &n, &cV_, data->iXKZ, &ld, data->iXKZPivots, h, &n, &info);
1329:   PetscFPTrapPop();
1330:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack XGETRS %d", info);

1332:   /* V <- V - X*h */
1333:   for (i=0; i<cV; i++) {
1334:     SlepcUpdateVectorsZ(V+i,1.0,-1.0,d->V-data->size_KZ,data->size_KZ,&h[ldh*i],ldh,data->size_KZ,1);
1335:     SlepcUpdateVectorsZ(V+i,1.0,-1.0,data->u,data->size_iXKZ-data->size_KZ,&h[ldh*i+data->size_KZ],ldh,data->size_iXKZ-data->size_KZ,1);
1336:   }
1337:   return(0);
1338: #endif
1339: }