Actual source code: dvd_improvex.c
1: /*
2: SLEPc eigensolver: "davidson"
4: Step: improve the eigenvectors X
6: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
7: SLEPc - Scalable Library for Eigenvalue Problem Computations
8: Copyright (c) 2002-2012, Universitat Politecnica de Valencia, Spain
10: This file is part of SLEPc.
11:
12: SLEPc is free software: you can redistribute it and/or modify it under the
13: terms of version 3 of the GNU Lesser General Public License as published by
14: the Free Software Foundation.
16: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
17: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
19: more details.
21: You should have received a copy of the GNU Lesser General Public License
22: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
23: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
24: */
26: #include davidson.h
27: #include <slepcvec.h>
28: #include <slepcblaslapack.h>
30: PetscErrorCode dvd_improvex_PfuncV(dvdDashboard *d, void *funcV, Vec *D,
31: PetscInt max_size_D, PetscInt r_s, PetscInt r_e, Vec *auxV,
32: PetscScalar *auxS);
33: PetscErrorCode dvd_pcapplyba(PC pc,PCSide side,Vec in,Vec out,Vec w);
34: PetscErrorCode dvd_pcapply(PC pc,Vec in,Vec out);
35: PetscErrorCode dvd_pcapplytrans(PC pc,Vec in,Vec out);
36: PetscErrorCode dvd_matmult_jd(Mat A,Vec in,Vec out);
37: PetscErrorCode dvd_matmulttrans_jd(Mat A,Vec in,Vec out);
38: PetscErrorCode dvd_matgetvecs_jd(Mat A, Vec *right, Vec *left);
39: PetscErrorCode dvd_improvex_jd_d(dvdDashboard *d);
40: PetscErrorCode dvd_improvex_jd_start(dvdDashboard *d);
41: PetscErrorCode dvd_improvex_jd_end(dvdDashboard *d);
42: PetscErrorCode dvd_improvex_jd_gen(dvdDashboard *d, Vec *D,
43: PetscInt max_size_D, PetscInt r_s,
44: PetscInt r_e, PetscInt *size_D);
45: PetscErrorCode dvd_improvex_jd_proj_cuv(dvdDashboard *d, PetscInt i_s,
46: PetscInt i_e, Vec **u, Vec **v, Vec *kr, Vec **auxV, PetscScalar **auxS,
47: PetscScalar *theta, PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY,
48: PetscInt ld);
49: PetscErrorCode dvd_improvex_jd_proj_uv_KXX(dvdDashboard *d, PetscInt i_s,
50: PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
51: PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld);
52: PetscErrorCode dvd_improvex_jd_proj_uv_KZX(dvdDashboard *d, PetscInt i_s,
53: PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
54: PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld);
55: PetscErrorCode dvd_improvex_jd_lit_const_0(dvdDashboard *d, PetscInt i,
56: PetscScalar* theta, PetscScalar* thetai,
57: PetscInt *maxits, PetscReal *tol);
58: PetscErrorCode dvd_improvex_apply_proj(dvdDashboard *d, Vec *V, PetscInt cV, PetscScalar *auxS);
59: PetscErrorCode dvd_improvex_applytrans_proj(dvdDashboard *d, Vec *V, PetscInt cV, PetscScalar *auxS);
61: #define size_Z (64*4)
63: /**** JD update step (I - Kfg'/(g'Kf)) K(A - sB) (I - Kfg'/(g'Kf)) t = (I - Kfg'/(g'Kf))r ****/
65: typedef struct {
66: PetscInt size_X;
67: void
68: *old_improveX_data; /* old improveX_data */
69: improveX_type
70: old_improveX; /* old improveX */
71: KSP ksp; /* correction equation solver */
72: Vec
73: friends, /* reference vector for composite vectors */
74: *auxV; /* auxiliar vectors */
75: PetscScalar *auxS, /* auxiliar scalars */
76: *theta,
77: *thetai; /* the shifts used in the correction eq. */
78: PetscInt maxits, /* maximum number of iterations */
79: r_s, r_e, /* the selected eigenpairs to improve */
80: ksp_max_size; /* the ksp maximum subvectors size */
81: PetscReal tol, /* the maximum solution tolerance */
82: lastTol, /* last tol for dynamic stopping criterion */
83: fix; /* tolerance for using the approx. eigenvalue */
84: PetscBool
85: dynamic; /* if the dynamic stopping criterion is applied */
86: dvdDashboard
87: *d; /* the currect dvdDashboard reference */
88: PC old_pc; /* old pc in ksp */
89: Vec
90: *u, /* new X vectors */
91: *real_KZ, /* original KZ */
92: *KZ; /* KZ vecs for the projector KZ*inv(X'*KZ)*X' */
93: PetscScalar
94: *XKZ, /* X'*KZ */
95: *iXKZ; /* inverse of XKZ */
96: PetscInt
97: ldXKZ, /* leading dimension of XKZ */
98: size_iXKZ, /* size of iXKZ */
99: ldiXKZ, /* leading dimension of iXKZ */
100: size_KZ, /* size of converged KZ */
101: size_real_KZ, /* original size of KZ */
102: size_cX, /* last value of d->size_cX */
103: old_size_X; /* last number of improved vectors */
104: PetscBLASInt
105: *iXKZPivots; /* array of pivots */
106: } dvdImprovex_jd;
108: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmult(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV);
109: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmulttrans(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV);
113: PetscErrorCode dvd_improvex_jd(dvdDashboard *d,dvdBlackboard *b,KSP ksp,PetscInt max_bs,PetscInt cX_impr,PetscBool dynamic)
114: {
115: PetscErrorCode ierr;
116: dvdImprovex_jd *data;
117: PetscBool useGD,her_probl,std_probl;
118: PC pc;
119: PetscInt size_P,s=1;
122: std_probl = DVD_IS(d->sEP,DVD_EP_STD)?PETSC_TRUE:PETSC_FALSE;
123: her_probl = DVD_IS(d->sEP,DVD_EP_HERMITIAN)?PETSC_TRUE:PETSC_FALSE;
125: /* Setting configuration constrains */
126: PetscObjectTypeCompare((PetscObject)ksp,KSPPREONLY,&useGD);
128: /* If the arithmetic is real and the problem is not Hermitian, then
129: the block size is incremented in one */
130: #if !defined(PETSC_USE_COMPLEX)
131: if (!her_probl) {
132: max_bs++;
133: b->max_size_P = PetscMax(b->max_size_P,2);
134: s = 2;
135: } else
136: #endif
137: b->max_size_P = PetscMax(b->max_size_P,1);
138: b->max_size_X = PetscMax(b->max_size_X,max_bs);
139: size_P = b->max_size_P+cX_impr;
140: b->max_size_auxV = PetscMax(b->max_size_auxV,
141: b->max_size_X*3+(useGD?0:2)+ /* u, kr, auxV(max_size_X+2?) */
142: ((her_probl || !d->eps->trueres)?1:PetscMax(s*2,b->max_size_cX_proj+b->max_size_X))); /* testConv */
143:
144: b->own_scalars+= size_P*size_P; /* XKZ */
145: b->max_size_auxS = PetscMax(b->max_size_auxS,
146: b->max_size_X*3 + /* theta, thetai */
147: size_P*size_P + /* iXKZ */
148: FromIntToScalar(size_P) + /* iXkZPivots */
149: PetscMax(PetscMax(
150: 3*b->max_size_proj*b->max_size_X, /* dvd_improvex_apply_proj */
151: 8*cX_impr*b->max_size_X), /* dvd_improvex_jd_proj_cuv_KZX */
152: (her_probl || !d->eps->trueres)?0:b->max_nev*b->max_nev+PetscMax(b->max_nev*6,(b->max_nev+b->max_size_proj)*s+b->max_nev*(b->max_size_X+b->max_size_cX_proj)*(std_probl?2:4)+64))); /* preTestConv */
153: b->own_vecs+= size_P; /* KZ */
155: /* Setup the preconditioner */
156: if (ksp) {
157: KSPGetPC(ksp,&pc);
158: dvd_static_precond_PC(d,b,pc);
159: } else {
160: dvd_static_precond_PC(d,b,0);
161: }
163: /* Setup the step */
164: if (b->state >= DVD_STATE_CONF) {
165: PetscMalloc(sizeof(dvdImprovex_jd),&data);
166: data->dynamic = dynamic;
167: data->size_real_KZ = size_P;
168: data->real_KZ = b->free_vecs; b->free_vecs+= data->size_real_KZ;
169: d->max_cX_in_impr = cX_impr;
170: data->XKZ = b->free_scalars; b->free_scalars+= size_P*size_P;
171: data->ldXKZ = size_P;
172: data->size_X = b->max_size_X;
173: data->old_improveX_data = d->improveX_data;
174: d->improveX_data = data;
175: data->old_improveX = d->improveX;
176: data->ksp = useGD?PETSC_NULL:ksp;
177: data->d = d;
178: d->improveX = dvd_improvex_jd_gen;
179: data->ksp_max_size = max_bs;
181: DVD_FL_ADD(d->startList,dvd_improvex_jd_start);
182: DVD_FL_ADD(d->endList,dvd_improvex_jd_end);
183: DVD_FL_ADD(d->destroyList,dvd_improvex_jd_d);
184: }
186: return(0);
187: }
192: PetscErrorCode dvd_improvex_jd_start(dvdDashboard *d)
193: {
194: PetscErrorCode ierr;
195: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
196: PetscInt rA, cA, rlA, clA;
197: Mat A;
198: PetscBool t;
199: PC pc;
203: data->KZ = data->real_KZ;
204: data->size_KZ = data->size_cX = data->old_size_X = 0;
205: data->lastTol = data->dynamic?0.5:0.0;
207: /* Setup the ksp */
208: if(data->ksp) {
209: /* Create the reference vector */
210: VecCreateCompWithVecs(d->V, data->ksp_max_size, PETSC_NULL,
211: &data->friends);
213: /* Save the current pc and set a PCNONE */
214: KSPGetPC(data->ksp, &data->old_pc);
215: PetscObjectTypeCompare((PetscObject)data->old_pc, PCNONE, &t);
216:
217: data->lastTol = 0.5;
218: if (t) {
219: data->old_pc = 0;
220: } else {
221: PetscObjectReference((PetscObject)data->old_pc);
222: PCCreate(((PetscObject)d->eps)->comm, &pc);
223: PCSetType(pc, PCSHELL);
224: PCSetOperators(pc, d->A, d->A, SAME_PRECONDITIONER);
225: PCShellSetApply(pc,dvd_pcapply);
226: PCShellSetApplyBA(pc,dvd_pcapplyba);
227: PCShellSetApplyTranspose(pc,dvd_pcapplytrans);
228: KSPSetPC(data->ksp, pc);
229: PCDestroy(&pc);
230: }
232: /* Create the (I-v*u')*K*(A-s*B) matrix */
233: MatGetSize(d->A, &rA, &cA);
234: MatGetLocalSize(d->A, &rlA, &clA);
235: MatCreateShell(((PetscObject)d->A)->comm, rlA*data->ksp_max_size,
236: clA*data->ksp_max_size, rA*data->ksp_max_size,
237: cA*data->ksp_max_size, data, &A);
238: MatShellSetOperation(A, MATOP_MULT,
239: (void(*)(void))dvd_matmult_jd);
240: MatShellSetOperation(A, MATOP_MULT_TRANSPOSE,
241: (void(*)(void))dvd_matmulttrans_jd);
242: MatShellSetOperation(A, MATOP_GET_VECS,
243: (void(*)(void))dvd_matgetvecs_jd);
244:
246: /* Try to avoid KSPReset */
247: KSPGetOperatorsSet(data->ksp,&t,PETSC_NULL);
248: if (t) {
249: Mat M;
250: PetscInt rM;
251: KSPGetOperators(data->ksp,&M,PETSC_NULL,PETSC_NULL);
252: MatGetSize(M,&rM,PETSC_NULL);
253: if (rM != rA*data->ksp_max_size) { KSPReset(data->ksp); }
254: }
255: KSPSetOperators(data->ksp, A, A, SAME_PRECONDITIONER);
256:
257: KSPSetUp(data->ksp);
258: MatDestroy(&A);
259: } else {
260: data->old_pc = 0;
261: data->friends = PETSC_NULL;
262: }
263:
264: return(0);
265: }
270: PetscErrorCode dvd_improvex_jd_end(dvdDashboard *d)
271: {
272: PetscErrorCode ierr;
273: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
277: if (data->friends) { VecDestroy(&data->friends); }
279: /* Restore the pc of ksp */
280: if (data->old_pc) {
281: KSPSetPC(data->ksp, data->old_pc);
282: PCDestroy(&data->old_pc);
283: }
285: return(0);
286: }
291: PetscErrorCode dvd_improvex_jd_d(dvdDashboard *d)
292: {
293: PetscErrorCode ierr;
294: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
297:
298: /* Restore changes in dvdDashboard */
299: d->improveX_data = data->old_improveX_data;
301: /* Free local data and objects */
302: PetscFree(data);
304: return(0);
305: }
310: PetscErrorCode dvd_improvex_jd_gen(dvdDashboard *d,Vec *D,PetscInt max_size_D,PetscInt r_s,PetscInt r_e,PetscInt *size_D)
311: {
312: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
313: PetscErrorCode ierr;
314: PetscInt i,j,n,maxits,maxits0,lits,s,ld,k;
315: PetscScalar *pX,*pY,*auxS = d->auxS,*auxS0;
316: PetscReal tol,tol0;
317: Vec *u,*v,*kr,kr_comp,D_comp;
318: PetscBool odd_situation = PETSC_FALSE;
322: /* Quick exit */
323: if ((max_size_D == 0) || r_e-r_s <= 0) {
324: *size_D = 0;
325: /* Callback old improveX */
326: if (data->old_improveX) {
327: d->improveX_data = data->old_improveX_data;
328: data->old_improveX(d, PETSC_NULL, 0, 0, 0, PETSC_NULL);
329: d->improveX_data = data;
330: }
331: return(0);
332: }
333:
334: n = PetscMin(PetscMin(data->size_X, max_size_D), r_e-r_s);
335: if (n == 0) SETERRQ(PETSC_COMM_SELF,1, "n == 0");
336: if (data->size_X < r_e-r_s) SETERRQ(PETSC_COMM_SELF,1, "size_X < r_e-r_s");
338: DSGetLeadingDimension(d->ps,&ld);
340: /* Restart lastTol if a new pair converged */
341: if (data->dynamic && data->size_cX < d->size_cX)
342: data->lastTol = 0.5;
344: for(i=0, s=0, auxS0=auxS; i<n; i+=s) {
345: /* If the selected eigenvalue is complex, but the arithmetic is real... */
346: #if !defined(PETSC_USE_COMPLEX)
347: if (d->eigi[i] != 0.0) {
348: if (i+2 <= max_size_D) s=2; else break;
349: } else
350: #endif
351: s=1;
353: data->auxV = d->auxV;
354: data->r_s = r_s+i; data->r_e = r_s+i+s;
355: auxS = auxS0;
356: data->theta = auxS; auxS+= 2*s;
357: data->thetai = auxS; auxS+= s;
358: kr = data->auxV; data->auxV+= s;
360: /* Compute theta, maximum iterations and tolerance */
361: maxits = 0; tol = 1;
362: for(j=0; j<s; j++) {
363: d->improvex_jd_lit(d, r_s+i+j, &data->theta[2*j],
364: &data->thetai[j], &maxits0, &tol0);
365:
366: maxits+= maxits0; tol*= tol0;
367: }
368: maxits/= s; tol = data->dynamic?data->lastTol:exp(log(tol)/s);
370: /* Compute u, v and kr */
371: k = r_s+i+d->cX_in_H;
372: DSVectors(d->ps,DS_MAT_X,&k,PETSC_NULL);
373: DSNormalize(d->ps,DS_MAT_X,r_s+i+d->cX_in_H);
374: k = r_s+i+d->cX_in_H;
375: DSVectors(d->ps,DS_MAT_Y,&k,PETSC_NULL);
376: DSNormalize(d->ps,DS_MAT_Y,r_s+i+d->cX_in_H);
377: DSGetArray(d->ps,DS_MAT_X,&pX);
378: DSGetArray(d->ps,DS_MAT_Y,&pY);
379: dvd_improvex_jd_proj_cuv(d,r_s+i,r_s+i+s,&u,&v,kr,&data->auxV,&auxS,data->theta,data->thetai,pX,pY,ld);
380: DSRestoreArray(d->ps,DS_MAT_X,&pX);
381: DSRestoreArray(d->ps,DS_MAT_Y,&pY);
382: data->u = u;
384: /* Check if the first eigenpairs are converged */
385: if (i == 0) {
386: PetscInt n_auxV = data->auxV-d->auxV+s, n_auxS = auxS - d->auxS;
387: d->auxV+= n_auxV; d->size_auxV-= n_auxV;
388: d->auxS+= n_auxS; d->size_auxS-= n_auxS;
389: d->preTestConv(d,0,s,s,d->auxV-s,PETSC_NULL,&d->npreconv);
390: d->auxV-= n_auxV; d->size_auxV+= n_auxV;
391: d->auxS-= n_auxS; d->size_auxS+= n_auxS;
392: if (d->npreconv > 0) break;
393: }
395: /* Test the odd situation of solving Ax=b with A=I */
396: #if !defined(PETSC_USE_COMPLEX)
397: odd_situation = (data->ksp && data->theta[0] == 1. && data->theta[1] == 0. && data->thetai[0] == 0. && d->B == PETSC_NULL)?PETSC_TRUE:PETSC_FALSE;
398: #else
399: odd_situation = (data->ksp && data->theta[0] == 1. && data->theta[1] == 0. && d->B == PETSC_NULL)?PETSC_TRUE:PETSC_FALSE;
400: #endif
401: /* If JD */
402: if (data->ksp && !odd_situation) {
403: data->auxS = auxS;
405: /* kr <- -kr */
406: for(j=0; j<s; j++) {
407: VecScale(kr[j], -1.0);
408: }
410: /* Compouse kr and D */
411: VecCreateCompWithVecs(kr, data->ksp_max_size, data->friends,
412: &kr_comp);
413: VecCreateCompWithVecs(&D[i], data->ksp_max_size, data->friends,
414: &D_comp);
415: VecCompSetSubVecs(data->friends,s,PETSC_NULL);
416:
417: /* Solve the correction equation */
418: KSPSetTolerances(data->ksp, tol, PETSC_DEFAULT, PETSC_DEFAULT,
419: maxits);
420: KSPSolve(data->ksp, kr_comp, D_comp);
421: KSPGetIterationNumber(data->ksp, &lits);
422: d->eps->OP->lineariterations+= lits;
423:
424: /* Destroy the composed ks and D */
425: VecDestroy(&kr_comp);
426: VecDestroy(&D_comp);
428: /* If GD */
429: } else {
430: for(j=0; j<s; j++) {
431: d->improvex_precond(d, r_s+i+j, kr[j], D[i+j]);
432: }
433: dvd_improvex_apply_proj(d, &D[i], s, auxS);
434: }
435: /* Prevent that short vectors are discarded in the orthogonalization */
436: if (i == 0 && d->eps->errest[d->nconv+r_s] > PETSC_MACHINE_EPSILON && d->eps->errest[d->nconv+r_s] < PETSC_MAX_REAL) {
437: for(j=0; j<s; j++) {
438: VecScale(D[j],1.0/d->eps->errest[d->nconv+r_s]);
439: }
440: }
441: }
442: *size_D = i;
443: if (data->dynamic) data->lastTol = PetscMax(data->lastTol/2.0,PETSC_MACHINE_EPSILON*10.0);
444:
445: /* Callback old improveX */
446: if (data->old_improveX) {
447: d->improveX_data = data->old_improveX_data;
448: data->old_improveX(d, PETSC_NULL, 0, 0, 0, PETSC_NULL);
449: d->improveX_data = data;
450: }
452: return(0);
453: }
455: /* y <- theta[1]A*x - theta[0]*B*x
456: auxV, two auxiliary vectors */
459: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmult(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV)
460: {
461: PetscErrorCode ierr;
462: PetscInt n,i;
463: const Vec *Bx;
466: n = data->r_e - data->r_s;
467: for(i=0; i<n; i++) {
468: MatMult(data->d->A,x[i],y[i]);
469: }
471: for(i=0; i<n; i++) {
472: #if !defined(PETSC_USE_COMPLEX)
473: if(data->d->eigi[data->r_s+i] != 0.0) {
474: if (data->d->B) {
475: MatMult(data->d->B,x[i],auxV[0]);
476: MatMult(data->d->B,x[i+1],auxV[1]);
477: Bx = auxV;
478: } else {
479: Bx = &x[i];
480: }
482: /* y_i <- [ t_2i+1*A*x_i - t_2i*Bx_i + ti_i*Bx_i+1;
483: y_i+1 t_2i+1*A*x_i+1 - ti_i*Bx_i - t_2i*Bx_i+1 ] */
484: VecAXPBYPCZ(y[i],-data->theta[2*i],data->thetai[i],data->theta[2*i+1],Bx[0],Bx[1]);
485: VecAXPBYPCZ(y[i+1],-data->thetai[i],-data->theta[2*i],data->theta[2*i+1],Bx[0],Bx[1]);
486: i++;
487: } else
488: #endif
489: {
490: if (data->d->B) {
491: MatMult(data->d->B,x[i],auxV[0]);
492: Bx = auxV;
493: } else {
494: Bx = &x[i];
495: }
496: VecAXPBY(y[i],-data->theta[i*2],data->theta[i*2+1],Bx[0]);
497: }
498: }
499: return(0);
500: }
504: /* y <- theta[1]'*A'*x - theta[0]'*B'*x
505: auxV, two auxiliary vectors */
508: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmulttrans(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV)
509: {
510: PetscErrorCode ierr;
511: PetscInt n,i;
512: const Vec *Bx;
515: n = data->r_e - data->r_s;
516: for(i=0; i<n; i++) {
517: MatMultTranspose(data->d->A,x[i],y[i]);
518: }
520: for(i=0; i<n; i++) {
521: #if !defined(PETSC_USE_COMPLEX)
522: if(data->d->eigi[data->r_s+i] != 0.0) {
523: if (data->d->B) {
524: MatMultTranspose(data->d->B,x[i],auxV[0]);
525: MatMultTranspose(data->d->B,x[i+1],auxV[1]);
526: Bx = auxV;
527: } else {
528: Bx = &x[i];
529: }
531: /* y_i <- [ t_2i+1*A*x_i - t_2i*Bx_i - ti_i*Bx_i+1;
532: y_i+1 t_2i+1*A*x_i+1 + ti_i*Bx_i - t_2i*Bx_i+1 ] */
533: VecAXPBYPCZ(y[i],-data->theta[2*i],-data->thetai[i],data->theta[2*i+1],Bx[0],Bx[1]);
534: VecAXPBYPCZ(y[i+1],data->thetai[i],-data->theta[2*i],data->theta[2*i+1],Bx[0],Bx[1]);
535: i++;
536: } else
537: #endif
538: {
539: if (data->d->B) {
540: MatMultTranspose(data->d->B,x[i],auxV[0]);
541: Bx = auxV;
542: } else {
543: Bx = &x[i];
544: }
545: VecAXPBY(y[i],PetscConj(-data->theta[i*2]),PetscConj(data->theta[i*2+1]),Bx[0]);
546: }
547: }
548: return(0);
549: }
554: PetscErrorCode dvd_pcapplyba(PC pc,PCSide side,Vec in,Vec out,Vec w)
555: {
556: PetscErrorCode ierr;
557: dvdImprovex_jd *data;
558: PetscInt n,i;
559: const Vec *inx,*outx,*wx;
560: Mat A;
564: PCGetOperators(pc,&A,PETSC_NULL,PETSC_NULL);
565: MatShellGetContext(A,(void**)&data);
566: VecCompGetSubVecs(in,PETSC_NULL,&inx);
567: VecCompGetSubVecs(out,PETSC_NULL,&outx);
568: VecCompGetSubVecs(w,PETSC_NULL,&wx);
569: n = data->r_e - data->r_s;
571: /* Check auxiliary vectors */
572: if (&data->auxV[n] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");
574: switch(side) {
575: case PC_LEFT:
576: /* aux <- theta[1]A*in - theta[0]*B*in */
577: dvd_aux_matmult(data,inx,data->auxV,outx);
578:
579: /* out <- K * aux */
580: for(i=0; i<n; i++) {
581: data->d->improvex_precond(data->d,data->r_s+i,data->auxV[i],outx[i]);
582: }
583: break;
584: case PC_RIGHT:
585: /* aux <- K * in */
586: for(i=0; i<n; i++) {
587: data->d->improvex_precond(data->d,data->r_s+i,inx[i],data->auxV[i]);
588: }
589:
590: /* out <- theta[1]A*auxV - theta[0]*B*auxV */
591: dvd_aux_matmult(data,data->auxV,outx,wx);
592: break;
593: case PC_SYMMETRIC:
594: /* aux <- K^{1/2} * in */
595: for(i=0; i<n; i++) {
596: PCApplySymmetricRight(data->old_pc,inx[i],data->auxV[i]);
597: }
598:
599: /* out <- theta[1]A*auxV - theta[0]*B*auxV */
600: dvd_aux_matmult(data,data->auxV,wx,outx);
601:
602: /* aux <- K^{1/2} * in */
603: for(i=0; i<n; i++) {
604: PCApplySymmetricLeft(data->old_pc,wx[i],outx[i]);
605: }
606: break;
607: default:
608: SETERRQ(PETSC_COMM_SELF,1, "Unsupported KSP side");
609: }
611: /* out <- out - v*(u'*out) */
612: dvd_improvex_apply_proj(data->d,(Vec*)outx,n,data->auxS);
614: return(0);
615: }
620: PetscErrorCode dvd_pcapply(PC pc,Vec in,Vec out)
621: {
622: PetscErrorCode ierr;
623: dvdImprovex_jd *data;
624: PetscInt n,i;
625: const Vec *inx, *outx;
626: Mat A;
630: PCGetOperators(pc,&A,PETSC_NULL,PETSC_NULL);
631: MatShellGetContext(A,(void**)&data);
632: VecCompGetSubVecs(in,PETSC_NULL,&inx);
633: VecCompGetSubVecs(out,PETSC_NULL,&outx);
634: n = data->r_e - data->r_s;
636: /* out <- K * in */
637: for(i=0; i<n; i++) {
638: data->d->improvex_precond(data->d,data->r_s+i,inx[i],outx[i]);
639: }
641: /* out <- out - v*(u'*out) */
642: dvd_improvex_apply_proj(data->d,(Vec*)outx,n,data->auxS);
644: return(0);
645: }
649: PetscErrorCode dvd_pcapplytrans(PC pc,Vec in,Vec out)
650: {
651: PetscErrorCode ierr;
652: dvdImprovex_jd *data;
653: PetscInt n,i;
654: const Vec *inx, *outx;
655: Mat A;
659: PCGetOperators(pc,&A,PETSC_NULL,PETSC_NULL);
660: MatShellGetContext(A,(void**)&data);
661: VecCompGetSubVecs(in,PETSC_NULL,&inx);
662: VecCompGetSubVecs(out,PETSC_NULL,&outx);
663: n = data->r_e - data->r_s;
665: /* Check auxiliary vectors */
666: if (&data->auxV[n] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");
668: /* auxV <- in */
669: for(i=0; i<n; i++) {
670: VecCopy(inx[i],data->auxV[i]);
671: }
672:
673: /* auxV <- auxV - u*(v'*auxV) */
674: dvd_improvex_applytrans_proj(data->d,data->auxV,n,data->auxS);
676: /* out <- K' * aux */
677: for(i=0; i<n; i++) {
678: PCApplyTranspose(data->old_pc,data->auxV[i],outx[i]);
679: }
681: return(0);
682: }
687: PetscErrorCode dvd_matmult_jd(Mat A,Vec in,Vec out)
688: {
689: PetscErrorCode ierr;
690: dvdImprovex_jd *data;
691: PetscInt n;
692: const Vec *inx, *outx;
693: PCSide side;
697: MatShellGetContext(A,(void**)&data);
698: VecCompGetSubVecs(in,PETSC_NULL,&inx);
699: VecCompGetSubVecs(out,PETSC_NULL,&outx);
700: n = data->r_e - data->r_s;
702: /* Check auxiliary vectors */
703: if (&data->auxV[2] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");
705: /* out <- theta[1]A*in - theta[0]*B*in */
706: dvd_aux_matmult(data,inx,outx,data->auxV);
708: KSPGetPCSide(data->ksp,&side);
709: if (side == PC_RIGHT) {
710: /* out <- out - v*(u'*out) */
711: dvd_improvex_apply_proj(data->d,(Vec*)outx,n,data->auxS);
712: }
714: return(0);
715: }
719: PetscErrorCode dvd_matmulttrans_jd(Mat A,Vec in,Vec out)
720: {
721: PetscErrorCode ierr;
722: dvdImprovex_jd *data;
723: PetscInt n,i;
724: const Vec *inx,*outx,*r,*auxV;
725: PCSide side;
729: MatShellGetContext(A,(void**)&data);
730: VecCompGetSubVecs(in,PETSC_NULL,&inx);
731: VecCompGetSubVecs(out,PETSC_NULL,&outx);
732: n = data->r_e - data->r_s;
734: /* Check auxiliary vectors */
735: if (&data->auxV[n+2] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");
737: KSPGetPCSide(data->ksp,&side);
738: if (side == PC_RIGHT) {
739: /* auxV <- in */
740: for(i=0; i<n; i++) {
741: VecCopy(inx[i],data->auxV[i]);
742: }
744: /* auxV <- auxV - v*(u'*auxV) */
745: dvd_improvex_applytrans_proj(data->d,data->auxV,n,data->auxS);
746: r = data->auxV;
747: auxV = data->auxV+n;
748: } else {
749: r = inx;
750: auxV = data->auxV;
751: }
753: /* out <- theta[1]A*r - theta[0]*B*r */
754: dvd_aux_matmulttrans(data,r,outx,auxV);
756: return(0);
757: }
762: PetscErrorCode dvd_matgetvecs_jd(Mat A, Vec *right, Vec *left)
763: {
764: PetscErrorCode ierr;
765: Vec *r, *l;
766: dvdImprovex_jd *data;
767: PetscInt n, i;
771: MatShellGetContext(A, (void**)&data);
772: n = data->ksp_max_size;
773: if (right) {
774: PetscMalloc(sizeof(Vec)*n, &r);
775: }
776: if (left) {
777: PetscMalloc(sizeof(Vec)*n, &l);
778: }
779: for (i=0; i<n; i++) {
780: MatGetVecs(data->d->A, right?&r[i]:PETSC_NULL,
781: left?&l[i]:PETSC_NULL);
782: }
783: if(right) {
784: VecCreateCompWithVecs(r, n, data->friends, right);
785: for (i=0; i<n; i++) {
786: VecDestroy(&r[i]);
787: }
788: }
789: if(left) {
790: VecCreateCompWithVecs(l, n, data->friends, left);
791: for (i=0; i<n; i++) {
792: VecDestroy(&l[i]);
793: }
794: }
796: if (right) {
797: PetscFree(r);
798: }
799: if (left) {
800: PetscFree(l);
801: }
803: return(0);
804: }
809: PetscErrorCode dvd_improvex_jd_proj_uv(dvdDashboard *d, dvdBlackboard *b,
810: ProjType_t p)
811: {
814: /* Setup the step */
815: if (b->state >= DVD_STATE_CONF) {
816: switch(p) {
817: case DVD_PROJ_KXX:
818: d->improvex_jd_proj_uv = dvd_improvex_jd_proj_uv_KXX; break;
819: case DVD_PROJ_KZX:
820: d->improvex_jd_proj_uv = dvd_improvex_jd_proj_uv_KZX; break;
821: }
822: }
824: return(0);
825: }
829: /*
830: Compute: u <- X, v <- K*(theta[0]*A+theta[1]*B)*X,
831: kr <- K^{-1}*(A-eig*B)*X, being X <- V*pX[i_s..i_e-1], Y <- W*pY[i_s..i_e-1]
832: where
833: auxV, 4*(i_e-i_s) auxiliar global vectors
834: pX,pY, the right and left eigenvectors of the projected system
835: ld, the leading dimension of pX and pY
836: auxS: max(8*bs*max_cX_in_proj,size_V*size_V)
837: */
838: PetscErrorCode dvd_improvex_jd_proj_cuv(dvdDashboard *d, PetscInt i_s,
839: PetscInt i_e, Vec **u, Vec **v, Vec *kr, Vec **auxV, PetscScalar **auxS,
840: PetscScalar *theta, PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY,
841: PetscInt ld)
842: {
843: #if defined(PETSC_MISSING_LAPACK_GETRF)
845: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable");
846: #else
847: PetscErrorCode ierr;
848: PetscInt n = i_e - i_s, size_KZ, V_new, rm, i, size_in;
849: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
850: PetscBLASInt s, ldXKZ, info;
851: DvdReduction r;
852: DvdReductionChunk
853: ops[2];
854: DvdMult_copy_func
855: sr[2];
859: /* Check consistency */
860: V_new = d->size_cX - data->size_cX;
861: if (V_new > data->old_size_X) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
862: data->old_size_X = n;
864: /* KZ <- KZ(rm:rm+max_cX-1) */
865: rm = PetscMax(V_new+data->size_KZ-d->max_cX_in_impr, 0);
866: for (i=0; i<d->max_cX_in_impr; i++) {
867: VecCopy(data->KZ[i+rm], data->KZ[i]);
868: }
869: data->size_cX = d->size_cX;
871: /* XKZ <- XKZ(rm:rm+max_cX-1,rm:rm+max_cX-1) */
872: for (i=0; i<d->max_cX_in_impr; i++) {
873: SlepcDenseCopy(&data->XKZ[i*data->ldXKZ+i], data->ldXKZ, &data->XKZ[(i+rm)*data->ldXKZ+i+rm], data->ldXKZ, data->size_KZ, 1);
874: }
875: data->size_KZ = PetscMin(d->max_cX_in_impr, data->size_KZ+V_new);
877: /* Compute X, KZ and KR */
878: *u = *auxV; *auxV+= n;
879: *v = &data->KZ[data->size_KZ];
880: d->improvex_jd_proj_uv(d, i_s, i_e, *u, *v, kr, *auxV, theta, thetai,
881: pX, pY, ld);
883: /* XKZ <- X'*KZ */
884: size_KZ = data->size_KZ+n;
885: size_in = 2*n*data->size_KZ+n*n;
886: SlepcAllReduceSumBegin(ops,2,*auxS,*auxS+size_in,size_in,&r,((PetscObject)d->V[0])->comm);
887: VecsMultS(data->XKZ,0,data->ldXKZ,d->V-data->size_KZ,0,data->size_KZ,data->KZ,data->size_KZ,size_KZ,&r,&sr[0]);
888: VecsMultS(&data->XKZ[data->size_KZ],0,data->ldXKZ,*u,0,n,data->KZ,0,size_KZ,&r,&sr[1]);
889: SlepcAllReduceSumEnd(&r);
891: /* iXKZ <- inv(XKZ) */
892: s = PetscBLASIntCast(size_KZ);
893: data->ldiXKZ = data->size_iXKZ = size_KZ;
894: data->iXKZ = *auxS; *auxS+= size_KZ*size_KZ;
895: data->iXKZPivots = (PetscBLASInt*)*auxS;
896: *auxS += FromIntToScalar(size_KZ);
897: SlepcDenseCopy(data->iXKZ,data->ldiXKZ,data->XKZ,data->ldXKZ,size_KZ,size_KZ);
898: ldXKZ = PetscBLASIntCast(data->ldiXKZ);
899: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
900: LAPACKgetrf_(&s, &s, data->iXKZ, &ldXKZ, data->iXKZPivots, &info);
901: PetscFPTrapPop();
902: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack XGETRF %d", info);
904: return(0);
905: #endif
906: }
908: #define DVD_COMPLEX_RAYLEIGH_QUOTIENT(ur,ui,Axr,Axi,Bxr,Bxi,eigr,eigi,b,ierr)\
909: { \
910: VecDot((Axr), (ur), &(b)[0]); /* r*A*r */ \
911: VecDot((Axr), (ui), &(b)[1]); /* i*A*r */ \
912: VecDot((Axi), (ur), &(b)[2]); /* r*A*i */ \
913: VecDot((Axi), (ui), &(b)[3]); /* i*A*i */ \
914: VecDot((Bxr), (ur), &(b)[4]); /* r*B*r */ \
915: VecDot((Bxr), (ui), &(b)[5]); /* i*B*r */ \
916: VecDot((Bxi), (ur), &(b)[6]); /* r*B*i */ \
917: VecDot((Bxi), (ui), &(b)[7]); /* i*B*i */ \
918: (b)[0] = (b)[0]+(b)[3]; /* rAr+iAi */ \
919: (b)[2] = (b)[2]-(b)[1]; /* rAi-iAr */ \
920: (b)[4] = (b)[4]+(b)[7]; /* rBr+iBi */ \
921: (b)[6] = (b)[6]-(b)[5]; /* rBi-iBr */ \
922: (b)[7] = (b)[4]*(b)[4] + (b)[6]*(b)[6]; /* k */ \
923: *(eigr) = ((b)[0]*(b)[4] + (b)[2]*(b)[6]) / (b)[7]; /* eig_r */ \
924: *(eigi) = ((b)[2]*(b)[4] - (b)[0]*(b)[6]) / (b)[7]; /* eig_i */ \
925: }
927: #if !defined(PETSC_USE_COMPLEX)
928: #define DVD_COMPUTE_N_RR(eps,i,i_s,n,eigr,eigi,u,Ax,Bx,b,ierr) \
929: for((i)=0; (i)<(n); (i)++) { \
930: if ((eigi)[(i_s)+(i)] != 0.0) { \
931: /* eig_r = [(rAr+iAi)*(rBr+iBi) + (rAi-iAr)*(rBi-iBr)]/k \
932: eig_i = [(rAi-iAr)*(rBr+iBi) - (rAr+iAi)*(rBi-iBr)]/k \
933: k = (rBr+iBi)*(rBr+iBi) + (rBi-iBr)*(rBi-iBr) */ \
934: DVD_COMPLEX_RAYLEIGH_QUOTIENT((u)[(i)], (u)[(i)+1], (Ax)[(i)], \
935: (Ax)[(i)+1], (Bx)[(i)], (Bx)[(i)+1], &(b)[8], &(b)[9], (b), (ierr)); \
936: if (PetscAbsScalar((eigr)[(i_s)+(i)] - (b)[8])/ \
937: PetscAbsScalar((eigr)[(i_s)+(i)]) > 1e-10 || \
938: PetscAbsScalar((eigi)[(i_s)+(i)] - (b)[9])/ \
939: PetscAbsScalar((eigi)[(i_s)+(i)]) > 1e-10 ) { \
940: (ierr) = PetscInfo4((eps), "The eigenvalue %G+%G is far from its "\
941: "Rayleigh quotient value %G+%G\n", \
942: (eigr)[(i_s)+(i)], \
943: (eigi)[(i_s)+(i)], (b)[8], (b)[9]); \
944: } \
945: (i)++; \
946: } \
947: }
948: #else
949: #define DVD_COMPUTE_N_RR(eps,i,i_s,n,eigr,eigi,u,Ax,Bx,b,ierr) \
950: for((i)=0; (i)<(n); (i)++) { \
951: (ierr) = VecDot((Ax)[(i)], (u)[(i)], &(b)[0]); \
952: (ierr) = VecDot((Bx)[(i)], (u)[(i)], &(b)[1]); \
953: (b)[0] = (b)[0]/(b)[1]; \
954: if (PetscAbsScalar((eigr)[(i_s)+(i)] - (b)[0])/ \
955: PetscAbsScalar((eigr)[(i_s)+(i)]) > 1e-10 ) { \
956: (ierr) = PetscInfo4((eps), "The eigenvalue %G+%G is far from its " \
957: "Rayleigh quotient value %G+%G\n", \
958: PetscRealPart((eigr)[(i_s)+(i)]), \
959: PetscImaginaryPart((eigr)[(i_s)+(i)]), PetscRealPart((b)[0]), \
960: PetscImaginaryPart((b)[0])); \
961: } \
962: }
963: #endif
968: /*
969: Compute: u <- X, v <- K*(theta[0]*A+theta[1]*B)*X,
970: kr <- K^{-1}*(A-eig*B)*X, being X <- V*pX[i_s..i_e-1], Y <- W*pY[i_s..i_e-1]
971: where
972: auxV, 4*(i_e-i_s) auxiliar global vectors
973: pX,pY, the right and left eigenvectors of the projected system
974: ld, the leading dimension of pX and pY
975: */
976: PetscErrorCode dvd_improvex_jd_proj_uv_KZX(dvdDashboard *d, PetscInt i_s,
977: PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
978: PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld)
979: {
980: PetscErrorCode ierr;
981: PetscInt n = i_e - i_s, i;
982: PetscScalar b[16];
983: Vec *Ax, *Bx, *r=auxV, X[4];
984: /* The memory manager doen't allow to call a subroutines */
985: PetscScalar Z[size_Z];
989: /* u <- X(i) */
990: dvd_improvex_compute_X(d,i_s,i_e,u,pX,ld);
992: /* v <- theta[0]A*u + theta[1]*B*u */
994: /* Bx <- B*X(i) */
995: Bx = kr;
996: if (d->BV) {
997: SlepcUpdateVectorsZ(Bx, 0.0, 1.0, d->BV-d->cX_in_H, d->size_BV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);
998: } else {
999: for(i=0; i<n; i++) {
1000: if (d->B) {
1001: MatMult(d->B, u[i], Bx[i]);
1002: } else {
1003: VecCopy(u[i], Bx[i]);
1004: }
1005: }
1006: }
1008: /* Ax <- A*X(i) */
1009: Ax = r;
1010: SlepcUpdateVectorsZ(Ax, 0.0, 1.0, d->AV-d->cX_in_H, d->size_AV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);
1012: /* v <- Y(i) */
1013: SlepcUpdateVectorsZ(v, 0.0, 1.0, (d->W?d->W:d->V)-d->cX_in_H, d->size_V+d->cX_in_H, &pY[ld*i_s], ld, d->size_H, n);
1015: /* Recompute the eigenvalue */
1016: DVD_COMPUTE_N_RR(d->eps, i, i_s, n, d->eigr, d->eigi, v, Ax, Bx, b, ierr);
1018: for(i=0; i<n; i++) {
1019: #if !defined(PETSC_USE_COMPLEX)
1020: if(d->eigi[i_s+i] != 0.0) {
1021: /* [r_i r_i+1 kr_i kr_i+1]*= [ theta_2i' 0 1 0
1022: 0 theta_2i' 0 1
1023: theta_2i+1 -thetai_i -eigr_i -eigi_i
1024: thetai_i theta_2i+1 eigi_i -eigr_i ] */
1025: b[0] = b[5] = PetscConj(theta[2*i]);
1026: b[2] = b[7] = -theta[2*i+1];
1027: b[6] = -(b[3] = thetai[i]);
1028: b[1] = b[4] = 0.0;
1029: b[8] = b[13] = 1.0/d->nX[i_s+i];
1030: b[10] = b[15] = -d->eigr[i_s+i]/d->nX[i_s+i];
1031: b[14] = -(b[11] = d->eigi[i_s+i]/d->nX[i_s+i]);
1032: b[9] = b[12] = 0.0;
1033: X[0] = Ax[i]; X[1] = Ax[i+1]; X[2] = Bx[i]; X[3] = Bx[i+1];
1034: SlepcUpdateVectorsD(X, 4, 1.0, b, 4, 4, 4, Z, size_Z);
1035:
1036: i++;
1037: } else
1038: #endif
1039: {
1040: /* [Ax_i Bx_i]*= [ theta_2i' 1/nX_i
1041: theta_2i+1 -eig_i/nX_i ] */
1042: b[0] = PetscConj(theta[i*2]);
1043: b[1] = theta[i*2+1];
1044: b[2] = 1.0/d->nX[i_s+i];
1045: b[3] = -d->eigr[i_s+i]/d->nX[i_s+i];
1046: X[0] = Ax[i]; X[1] = Bx[i];
1047: SlepcUpdateVectorsD(X, 2, 1.0, b, 2, 2, 2, Z, size_Z);
1048:
1049: }
1050: }
1051: for (i=0; i<n; i++) d->nX[i_s+i] = 1.0;
1053: /* v <- K^{-1} r = K^{-1}(theta_2i'*Ax + theta_2i+1*Bx) */
1054: for(i=0; i<n; i++) {
1055: d->improvex_precond(d, i_s+i, r[i], v[i]);
1056: }
1058: /* kr <- P*(Ax - eig_i*Bx) */
1059: d->calcpairs_proj_res(d, i_s, i_e, kr);
1061: return(0);
1062: }
1066: /*
1067: Compute: u <- K^{-1}*X, v <- X,
1068: kr <- K^{-1}*(A-eig*B)*X, being X <- V*pX[i_s..i_e-1]
1069: where
1070: auxV, 4*(i_e-i_s) auxiliar global vectors
1071: pX,pY, the right and left eigenvectors of the projected system
1072: ld, the leading dimension of pX and pY
1073: */
1074: PetscErrorCode dvd_improvex_jd_proj_uv_KXX(dvdDashboard *d, PetscInt i_s,
1075: PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
1076: PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld)
1077: {
1078: PetscErrorCode ierr;
1079: PetscInt n = i_e - i_s, i;
1080: PetscScalar b[16];
1081: Vec *Ax, *Bx, *r = auxV, X[4];
1082: /* The memory manager doen't allow to call a subroutines */
1083: PetscScalar Z[size_Z];
1087: /* [v u] <- X(i) Y(i) */
1088: dvd_improvex_compute_X(d,i_s,i_e,v,pX,ld);
1089: SlepcUpdateVectorsZ(u, 0.0, 1.0, (d->W?d->W:d->V)-d->cX_in_H, d->size_V+d->cX_in_H, &pY[ld*i_s], ld, d->size_H, n);
1091: /* Bx <- B*X(i) */
1092: Bx = r;
1093: if (d->BV) {
1094: SlepcUpdateVectorsZ(Bx, 0.0, 1.0, d->BV-d->cX_in_H, d->size_BV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);
1095: } else {
1096: if (d->B) {
1097: for(i=0; i<n; i++) {
1098: MatMult(d->B, v[i], Bx[i]);
1099: }
1100: } else
1101: Bx = v;
1102: }
1104: /* Ax <- A*X(i) */
1105: Ax = kr;
1106: SlepcUpdateVectorsZ(Ax, 0.0, 1.0, d->AV-d->cX_in_H, d->size_AV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);
1108: /* Recompute the eigenvalue */
1109: DVD_COMPUTE_N_RR(d->eps, i, i_s, n, d->eigr, d->eigi, u, Ax, Bx, b, ierr);
1111: for(i=0; i<n; i++) {
1112: if (d->eigi[i_s+i] == 0.0) {
1113: /* kr <- Ax -eig*Bx */
1114: VecAXPBY(kr[i], -d->eigr[i_s+i]/d->nX[i_s+i], 1.0/d->nX[i_s+i], Bx[i]);
1115: } else {
1116: /* [kr_i kr_i+1 r_i r_i+1]*= [ 1 0
1117: 0 1
1118: -eigr_i -eigi_i
1119: eigi_i -eigr_i] */
1120: b[0] = b[5] = 1.0/d->nX[i_s+i];
1121: b[2] = b[7] = -d->eigr[i_s+i]/d->nX[i_s+i];
1122: b[6] = -(b[3] = d->eigi[i_s+i]/d->nX[i_s+i]);
1123: b[1] = b[4] = 0.0;
1124: X[0] = kr[i]; X[1] = kr[i+1]; X[2] = r[i]; X[3] = r[i+1];
1125: SlepcUpdateVectorsD(X, 4, 1.0, b, 4, 4, 2, Z, size_Z);
1126:
1127: i++;
1128: }
1129: }
1130: for (i=0; i<n; i++) d->nX[i_s+i] = 1.0;
1132: /* kr <- P*kr */
1133: d->calcpairs_proj_res(d, i_s, i_e, r);
1135: /* u <- K^{-1} X(i) */
1136: for(i=0; i<n; i++) {
1137: d->improvex_precond(d, i_s+i, v[i], u[i]);
1138: }
1140: return(0);
1141: }
1146: PetscErrorCode dvd_improvex_jd_lit_const(dvdDashboard *d, dvdBlackboard *b,
1147: PetscInt maxits, PetscReal tol,
1148: PetscReal fix)
1149: {
1150: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
1153: /* Setup the step */
1154: if (b->state >= DVD_STATE_CONF) {
1155: data->maxits = maxits;
1156: data->tol = tol;
1157: data->fix = fix;
1158: d->improvex_jd_lit = dvd_improvex_jd_lit_const_0;
1159: }
1160: return(0);
1161: }
1166: PetscErrorCode dvd_improvex_jd_lit_const_0(dvdDashboard *d, PetscInt i,
1167: PetscScalar* theta, PetscScalar* thetai, PetscInt *maxits, PetscReal *tol)
1168: {
1169: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
1170: PetscReal a;
1173: a = SlepcAbsEigenvalue(d->eigr[i],d->eigi[i]);
1175: if (d->nR[i]/a < data->fix) {
1176: theta[0] = d->eigr[i];
1177: theta[1] = 1.0;
1178: #if !defined(PETSC_USE_COMPLEX)
1179: *thetai = d->eigi[i];
1180: #endif
1181: } else {
1182: theta[0] = d->target[0];
1183: theta[1] = d->target[1];
1184: #if !defined(PETSC_USE_COMPLEX)
1185: *thetai = 0.0;
1186: #endif
1187: }
1189: #if defined(PETSC_USE_COMPLEX)
1190: if(thetai) *thetai = 0.0;
1191: #endif
1192: *maxits = data->maxits;
1193: *tol = data->tol;
1194: return(0);
1195: }
1198: /**** Patterns implementation *************************************************/
1200: typedef PetscInt (*funcV0_t)(dvdDashboard*, PetscInt, PetscInt, Vec*);
1201: typedef PetscInt (*funcV1_t)(dvdDashboard*, PetscInt, PetscInt, Vec*,
1202: PetscScalar*, Vec);
1206: /* Compute D <- K^{-1} * funcV[r_s..r_e] */
1207: PetscErrorCode dvd_improvex_PfuncV(dvdDashboard *d, void *funcV, Vec *D,
1208: PetscInt max_size_D, PetscInt r_s, PetscInt r_e, Vec *auxV,
1209: PetscScalar *auxS)
1210: {
1211: PetscErrorCode ierr;
1212: PetscInt i;
1215: if (max_size_D >= r_e-r_s+1) {
1216: /* The optimized version needs one vector extra of D */
1217: /* D(1:r.size) = R(r_s:r_e-1) */
1218: if (auxS) ((funcV1_t)funcV)(d, r_s, r_e, D+1, auxS, auxV[0]);
1219: else ((funcV0_t)funcV)(d, r_s, r_e, D+1);
1221: /* D = K^{-1} * R */
1222: for (i=0; i<r_e-r_s; i++) {
1223: d->improvex_precond(d, i+r_s, D[i+1], D[i]);
1224: }
1225: } else if (max_size_D == r_e-r_s) {
1226: /* Non-optimized version */
1227: /* auxV <- R[r_e-1] */
1228: if (auxS) ((funcV1_t)funcV)(d, r_e-1, r_e, auxV, auxS, auxV[1]);
1229: else ((funcV0_t)funcV)(d, r_e-1, r_e, auxV);
1231: /* D(1:r.size-1) = R(r_s:r_e-2) */
1232: if (auxS) ((funcV1_t)funcV)(d, r_s, r_e-1, D+1, auxS, auxV[1]);
1233: else ((funcV0_t)funcV)(d, r_s, r_e-1, D+1);
1235: /* D = K^{-1} * R */
1236: for (i=0; i<r_e-r_s-1; i++) {
1237: d->improvex_precond(d, i+r_s, D[i+1], D[i]);
1238: }
1239: d->improvex_precond(d, r_e-1, auxV[0], D[r_e-r_s-1]);
1240: } else SETERRQ(PETSC_COMM_SELF,1, "Problem: r_e-r_s > max_size_D");
1241: return(0);
1242: }
1247: /* Compute (I - KZ*iXKZ*X')*V where,
1248: V, the vectors to apply the projector,
1249: cV, the number of vectors in V,
1250: auxS, auxiliar vector of size length 3*size_iXKZ*cV
1251: */
1252: PetscErrorCode dvd_improvex_apply_proj(dvdDashboard *d, Vec *V, PetscInt cV, PetscScalar *auxS)
1253: {
1254: #if defined(PETSC_MISSING_LAPACK_GETRS)
1256: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routines are unavailable");
1257: #else
1258: PetscErrorCode ierr;
1259: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
1260: PetscInt size_in = data->size_iXKZ*cV, i, ldh;
1261: PetscScalar *h, *in, *out;
1262: PetscBLASInt cV_, n, info, ld;
1263: DvdReduction r;
1264: DvdReductionChunk
1265: ops[4];
1266: DvdMult_copy_func
1267: sr[4];
1268:
1270: if (cV > 2) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
1272: /* h <- X'*V */
1273: h = auxS; in = h+size_in; out = in+size_in; ldh = data->size_iXKZ;
1274: SlepcAllReduceSumBegin(ops, 4, in, out, size_in, &r,
1275: ((PetscObject)d->V[0])->comm);
1276: for (i=0; i<cV; i++) {
1277: VecsMultS(&h[i*ldh],0,ldh,d->V-data->size_KZ,0,data->size_KZ,V+i,0,1,&r,&sr[i*2]);
1278: VecsMultS(&h[i*ldh+data->size_KZ],0,ldh,data->u,0,data->size_iXKZ-data->size_KZ,V+i,0,1,&r,&sr[i*2+1]);
1279: }
1280: SlepcAllReduceSumEnd(&r);
1282: /* h <- iXKZ\h */
1283: cV_ = PetscBLASIntCast(cV);
1284: n = PetscBLASIntCast(data->size_iXKZ);
1285: ld = PetscBLASIntCast(data->ldiXKZ);
1287: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1288: LAPACKgetrs_("N", &n, &cV_, data->iXKZ, &ld, data->iXKZPivots, h, &n, &info);
1289: PetscFPTrapPop();
1290: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack XGETRS %d", info);
1292: /* V <- V - KZ*h */
1293: for (i=0; i<cV; i++) {
1294: SlepcUpdateVectorsZ(V+i,1.0,-1.0,data->KZ,data->size_iXKZ,&h[ldh*i],ldh,data->size_iXKZ,1);
1295: }
1296: return(0);
1297: #endif
1298: }
1302: /* Compute (I - X*iXKZ*KZ')*V where,
1303: V, the vectors to apply the projector,
1304: cV, the number of vectors in V,
1305: auxS, auxiliar vector of size length 3*size_iXKZ*cV
1306: */
1307: PetscErrorCode dvd_improvex_applytrans_proj(dvdDashboard *d, Vec *V, PetscInt cV, PetscScalar *auxS)
1308: {
1309: #if defined(PETSC_MISSING_LAPACK_GETRS)
1311: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routines are unavailable");
1312: #else
1313: PetscErrorCode ierr;
1314: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
1315: PetscInt size_in = data->size_iXKZ*cV, i, ldh;
1316: PetscScalar *h, *in, *out;
1317: PetscBLASInt cV_, n, info, ld;
1318: DvdReduction r;
1319: DvdReductionChunk
1320: ops[2];
1321: DvdMult_copy_func
1322: sr[2];
1323:
1325: if (cV > 2) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
1327: /* h <- KZ'*V */
1328: h = auxS; in = h+size_in; out = in+size_in; ldh = data->size_iXKZ;
1329: SlepcAllReduceSumBegin(ops, 2, in, out, size_in, &r,
1330: ((PetscObject)d->V[0])->comm);
1331: for (i=0; i<cV; i++) {
1332: VecsMultS(&h[i*ldh],0,ldh,data->KZ,0,data->size_KZ,V+i,0,1,&r,&sr[i]);
1333: }
1334: SlepcAllReduceSumEnd(&r);
1336: /* h <- iXKZ\h */
1337: cV_ = PetscBLASIntCast(cV);
1338: n = PetscBLASIntCast(data->size_iXKZ);
1339: ld = PetscBLASIntCast(data->ldiXKZ);
1341: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1342: LAPACKgetrs_("C", &n, &cV_, data->iXKZ, &ld, data->iXKZPivots, h, &n, &info);
1343: PetscFPTrapPop();
1344: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack XGETRS %d", info);
1346: /* V <- V - X*h */
1347: for (i=0; i<cV; i++) {
1348: SlepcUpdateVectorsZ(V+i,1.0,-1.0,d->V-data->size_KZ,data->size_KZ,&h[ldh*i],ldh,data->size_KZ,1);
1349: SlepcUpdateVectorsZ(V+i,1.0,-1.0,data->u,data->size_iXKZ-data->size_KZ,&h[ldh*i+data->size_KZ],ldh,data->size_iXKZ-data->size_KZ,1);
1350: }
1351: return(0);
1352: #endif
1353: }