Actual source code: dvd_improvex.c
1: /*
2: SLEPc eigensolver: "davidson"
4: Step: improve the eigenvectors X
6: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
7: SLEPc - Scalable Library for Eigenvalue Problem Computations
8: Copyright (c) 2002-2012, Universitat Politecnica de Valencia, Spain
10: This file is part of SLEPc.
11:
12: SLEPc is free software: you can redistribute it and/or modify it under the
13: terms of version 3 of the GNU Lesser General Public License as published by
14: the Free Software Foundation.
16: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
17: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
19: more details.
21: You should have received a copy of the GNU Lesser General Public License
22: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
23: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
24: */
26: #include davidson.h
27: #include <slepcvec.h>
28: #include <slepcblaslapack.h>
30: PetscErrorCode dvd_improvex_PfuncV(dvdDashboard *d, void *funcV, Vec *D,
31: PetscInt max_size_D, PetscInt r_s, PetscInt r_e, Vec *auxV,
32: PetscScalar *auxS);
33: PetscErrorCode dvd_pcapplyba(PC pc,PCSide side,Vec in,Vec out,Vec w);
34: PetscErrorCode dvd_pcapply(PC pc,Vec in,Vec out);
35: PetscErrorCode dvd_pcapplytrans(PC pc,Vec in,Vec out);
36: PetscErrorCode dvd_matmult_jd(Mat A,Vec in,Vec out);
37: PetscErrorCode dvd_matmulttrans_jd(Mat A,Vec in,Vec out);
38: PetscErrorCode dvd_matgetvecs_jd(Mat A, Vec *right, Vec *left);
39: PetscErrorCode dvd_improvex_jd_d(dvdDashboard *d);
40: PetscErrorCode dvd_improvex_jd_start(dvdDashboard *d);
41: PetscErrorCode dvd_improvex_jd_end(dvdDashboard *d);
42: PetscErrorCode dvd_improvex_jd_gen(dvdDashboard *d, Vec *D,
43: PetscInt max_size_D, PetscInt r_s,
44: PetscInt r_e, PetscInt *size_D);
45: PetscErrorCode dvd_improvex_jd_proj_cuv(dvdDashboard *d, PetscInt i_s,
46: PetscInt i_e, Vec **u, Vec **v, Vec *kr, Vec **auxV, PetscScalar **auxS,
47: PetscScalar *theta, PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY,
48: PetscInt ld);
49: PetscErrorCode dvd_improvex_jd_proj_uv_KXX(dvdDashboard *d, PetscInt i_s,
50: PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
51: PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld);
52: PetscErrorCode dvd_improvex_jd_proj_uv_KZX(dvdDashboard *d, PetscInt i_s,
53: PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
54: PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld);
55: PetscErrorCode dvd_improvex_jd_lit_const_0(dvdDashboard *d, PetscInt i,
56: PetscScalar* theta, PetscScalar* thetai,
57: PetscInt *maxits, PetscReal *tol);
58: PetscErrorCode dvd_improvex_apply_proj(dvdDashboard *d, Vec *V, PetscInt cV, PetscScalar *auxS);
59: PetscErrorCode dvd_improvex_applytrans_proj(dvdDashboard *d, Vec *V, PetscInt cV, PetscScalar *auxS);
61: #define size_Z (64*4)
63: /**** JD update step (I - Kfg'/(g'Kf)) K(A - sB) (I - Kfg'/(g'Kf)) t = (I - Kfg'/(g'Kf))r ****/
65: typedef struct {
66: PetscInt size_X;
67: void
68: *old_improveX_data; /* old improveX_data */
69: improveX_type
70: old_improveX; /* old improveX */
71: KSP ksp; /* correction equation solver */
72: Vec
73: friends, /* reference vector for composite vectors */
74: *auxV; /* auxiliar vectors */
75: PetscScalar *auxS, /* auxiliar scalars */
76: *theta,
77: *thetai; /* the shifts used in the correction eq. */
78: PetscInt maxits, /* maximum number of iterations */
79: r_s, r_e, /* the selected eigenpairs to improve */
80: ksp_max_size; /* the ksp maximum subvectors size */
81: PetscReal tol, /* the maximum solution tolerance */
82: lastTol, /* last tol for dynamic stopping criterion */
83: fix; /* tolerance for using the approx. eigenvalue */
84: PetscBool
85: dynamic; /* if the dynamic stopping criterion is applied */
86: dvdDashboard
87: *d; /* the currect dvdDashboard reference */
88: PC old_pc; /* old pc in ksp */
89: Vec
90: *u, /* new X vectors */
91: *real_KZ, /* original KZ */
92: *KZ; /* KZ vecs for the projector KZ*inv(X'*KZ)*X' */
93: PetscScalar
94: *XKZ, /* X'*KZ */
95: *iXKZ; /* inverse of XKZ */
96: PetscInt
97: ldXKZ, /* leading dimension of XKZ */
98: size_iXKZ, /* size of iXKZ */
99: ldiXKZ, /* leading dimension of iXKZ */
100: size_KZ, /* size of converged KZ */
101: size_real_KZ, /* original size of KZ */
102: size_cX, /* last value of d->size_cX */
103: old_size_X; /* last number of improved vectors */
104: PetscBLASInt
105: *iXKZPivots; /* array of pivots */
106: } dvdImprovex_jd;
108: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmult(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV);
109: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmulttrans(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV);
113: PetscErrorCode dvd_improvex_jd(dvdDashboard *d,dvdBlackboard *b,KSP ksp,PetscInt max_bs,PetscInt cX_impr,PetscBool dynamic)
114: {
115: PetscErrorCode ierr;
116: dvdImprovex_jd *data;
117: PetscBool useGD,her_probl,std_probl;
118: PC pc;
119: PetscInt size_P,s=1;
122: std_probl = DVD_IS(d->sEP,DVD_EP_STD)?PETSC_TRUE:PETSC_FALSE;
123: her_probl = DVD_IS(d->sEP,DVD_EP_HERMITIAN)?PETSC_TRUE:PETSC_FALSE;
125: /* Setting configuration constrains */
126: PetscObjectTypeCompare((PetscObject)ksp,KSPPREONLY,&useGD);
128: /* If the arithmetic is real and the problem is not Hermitian, then
129: the block size is incremented in one */
130: #if !defined(PETSC_USE_COMPLEX)
131: if (!her_probl) {
132: max_bs++;
133: b->max_size_P = PetscMax(b->max_size_P,2);
134: s = 2;
135: } else
136: #endif
137: b->max_size_P = PetscMax(b->max_size_P,1);
138: b->max_size_X = PetscMax(b->max_size_X,max_bs);
139: size_P = b->max_size_P+cX_impr;
140: b->max_size_auxV = PetscMax(b->max_size_auxV,
141: b->max_size_X*3+(useGD?0:2)+ /* u, kr, auxV(max_size_X+2?) */
142: ((her_probl || !d->eps->trueres)?1:PetscMax(s*2,b->max_size_cX_proj+b->max_size_X))); /* testConv */
143:
144: b->own_scalars+= size_P*size_P; /* XKZ */
145: b->max_size_auxS = PetscMax(b->max_size_auxS,
146: b->max_size_X*3 + /* theta, thetai */
147: size_P*size_P + /* iXKZ */
148: FromIntToScalar(size_P) + /* iXkZPivots */
149: PetscMax(PetscMax(
150: 3*b->max_size_proj*b->max_size_X, /* dvd_improvex_apply_proj */
151: 8*cX_impr*b->max_size_X), /* dvd_improvex_jd_proj_cuv_KZX */
152: (her_probl || !d->eps->trueres)?0:b->max_nev*b->max_nev+PetscMax(b->max_nev*6,(b->max_nev+b->max_size_proj)*s+b->max_nev*(b->max_size_X+b->max_size_cX_proj)*(std_probl?2:4)+64))); /* preTestConv */
153: b->own_vecs+= size_P; /* KZ */
155: /* Setup the preconditioner */
156: if (ksp) {
157: KSPGetPC(ksp,&pc);
158: dvd_static_precond_PC(d,b,pc);
159: } else {
160: dvd_static_precond_PC(d,b,0);
161: }
163: /* Setup the step */
164: if (b->state >= DVD_STATE_CONF) {
165: PetscMalloc(sizeof(dvdImprovex_jd),&data);
166: data->dynamic = dynamic;
167: data->size_real_KZ = size_P;
168: data->real_KZ = b->free_vecs; b->free_vecs+= data->size_real_KZ;
169: d->max_cX_in_impr = cX_impr;
170: data->XKZ = b->free_scalars; b->free_scalars+= size_P*size_P;
171: data->ldXKZ = size_P;
172: data->size_X = b->max_size_X;
173: data->old_improveX_data = d->improveX_data;
174: d->improveX_data = data;
175: data->old_improveX = d->improveX;
176: data->ksp = useGD?PETSC_NULL:ksp;
177: data->d = d;
178: d->improveX = dvd_improvex_jd_gen;
179: data->ksp_max_size = max_bs;
181: DVD_FL_ADD(d->startList,dvd_improvex_jd_start);
182: DVD_FL_ADD(d->endList,dvd_improvex_jd_end);
183: DVD_FL_ADD(d->destroyList,dvd_improvex_jd_d);
184: }
186: return(0);
187: }
192: PetscErrorCode dvd_improvex_jd_start(dvdDashboard *d)
193: {
194: PetscErrorCode ierr;
195: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
196: PetscInt rA, cA, rlA, clA;
197: Mat A;
198: PetscBool t;
199: PC pc;
203: data->KZ = data->real_KZ;
204: data->size_KZ = data->size_cX = data->old_size_X = 0;
205: data->lastTol = data->dynamic?0.5:0.0;
207: /* Setup the ksp */
208: if(data->ksp) {
209: /* Create the reference vector */
210: VecCreateCompWithVecs(d->V, data->ksp_max_size, PETSC_NULL,
211: &data->friends);
213: /* Save the current pc and set a PCNONE */
214: KSPGetPC(data->ksp, &data->old_pc);
215: PetscObjectTypeCompare((PetscObject)data->old_pc, PCNONE, &t);
216:
217: data->lastTol = 0.5;
218: if (t) {
219: data->old_pc = 0;
220: } else {
221: PetscObjectReference((PetscObject)data->old_pc);
222: PCCreate(((PetscObject)d->eps)->comm, &pc);
223: PCSetType(pc, PCSHELL);
224: PCSetOperators(pc, d->A, d->A, SAME_PRECONDITIONER);
225: PCShellSetApply(pc,dvd_pcapply);
226: PCShellSetApplyBA(pc,dvd_pcapplyba);
227: PCShellSetApplyTranspose(pc,dvd_pcapplytrans);
228: KSPSetPC(data->ksp, pc);
229: PCDestroy(&pc);
230: }
232: /* Create the (I-v*u')*K*(A-s*B) matrix */
233: MatGetSize(d->A, &rA, &cA);
234: MatGetLocalSize(d->A, &rlA, &clA);
235: MatCreateShell(((PetscObject)d->A)->comm, rlA*data->ksp_max_size,
236: clA*data->ksp_max_size, rA*data->ksp_max_size,
237: cA*data->ksp_max_size, data, &A);
238: MatShellSetOperation(A, MATOP_MULT,
239: (void(*)(void))dvd_matmult_jd);
240: MatShellSetOperation(A, MATOP_MULT_TRANSPOSE,
241: (void(*)(void))dvd_matmulttrans_jd);
242: MatShellSetOperation(A, MATOP_GET_VECS,
243: (void(*)(void))dvd_matgetvecs_jd);
244:
246: /* Try to avoid KSPReset */
247: KSPGetOperatorsSet(data->ksp,&t,PETSC_NULL);
248: if (t) {
249: Mat M;
250: PetscInt rM;
251: KSPGetOperators(data->ksp,&M,PETSC_NULL,PETSC_NULL);
252: MatGetSize(M,&rM,PETSC_NULL);
253: if (rM != rA*data->ksp_max_size) { KSPReset(data->ksp); }
254: }
255: KSPSetOperators(data->ksp, A, A, SAME_PRECONDITIONER);
256:
257: KSPSetUp(data->ksp);
258: MatDestroy(&A);
259: } else {
260: data->old_pc = 0;
261: data->friends = PETSC_NULL;
262: }
263:
264: return(0);
265: }
270: PetscErrorCode dvd_improvex_jd_end(dvdDashboard *d)
271: {
272: PetscErrorCode ierr;
273: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
277: if (data->friends) { VecDestroy(&data->friends); }
279: /* Restore the pc of ksp */
280: if (data->old_pc) {
281: KSPSetPC(data->ksp, data->old_pc);
282: PCDestroy(&data->old_pc);
283: }
285: return(0);
286: }
291: PetscErrorCode dvd_improvex_jd_d(dvdDashboard *d)
292: {
293: PetscErrorCode ierr;
294: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
297:
298: /* Restore changes in dvdDashboard */
299: d->improveX_data = data->old_improveX_data;
301: /* Free local data and objects */
302: PetscFree(data);
304: return(0);
305: }
310: PetscErrorCode dvd_improvex_jd_gen(dvdDashboard *d,Vec *D,PetscInt max_size_D,PetscInt r_s,PetscInt r_e,PetscInt *size_D)
311: {
312: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
313: PetscErrorCode ierr;
314: PetscInt i,j,n,maxits,maxits0,lits,s,ld,k;
315: PetscScalar *pX,*pY,*auxS = d->auxS,*auxS0;
316: PetscReal tol,tol0;
317: Vec *u,*v,*kr,kr_comp,D_comp;
321: /* Quick exit */
322: if ((max_size_D == 0) || r_e-r_s <= 0) {
323: *size_D = 0;
324: /* Callback old improveX */
325: if (data->old_improveX) {
326: d->improveX_data = data->old_improveX_data;
327: data->old_improveX(d, PETSC_NULL, 0, 0, 0, PETSC_NULL);
328: d->improveX_data = data;
329: }
330: return(0);
331: }
332:
333: n = PetscMin(PetscMin(data->size_X, max_size_D), r_e-r_s);
334: if (n == 0) SETERRQ(PETSC_COMM_SELF,1, "n == 0");
335: if (data->size_X < r_e-r_s) SETERRQ(PETSC_COMM_SELF,1, "size_X < r_e-r_s");
337: DSGetLeadingDimension(d->ps,&ld);
339: /* Restart lastTol if a new pair converged */
340: if (data->dynamic && data->size_cX < d->size_cX)
341: data->lastTol = 0.5;
343: for(i=0, s=0, auxS0=auxS; i<n; i+=s) {
344: /* If the selected eigenvalue is complex, but the arithmetic is real... */
345: #if !defined(PETSC_USE_COMPLEX)
346: if (d->eigi[i] != 0.0) {
347: if (i+2 <= max_size_D) s=2; else break;
348: } else
349: #endif
350: s=1;
352: data->auxV = d->auxV;
353: data->r_s = r_s+i; data->r_e = r_s+i+s;
354: auxS = auxS0;
355: data->theta = auxS; auxS+= 2*s;
356: data->thetai = auxS; auxS+= s;
357: kr = data->auxV; data->auxV+= s;
359: /* Compute theta, maximum iterations and tolerance */
360: maxits = 0; tol = 1;
361: for(j=0; j<s; j++) {
362: d->improvex_jd_lit(d, r_s+i+j, &data->theta[2*j],
363: &data->thetai[j], &maxits0, &tol0);
364:
365: maxits+= maxits0; tol*= tol0;
366: }
367: maxits/= s; tol = data->dynamic?data->lastTol:exp(log(tol)/s);
369: /* Compute u, v and kr */
370: k = r_s+i+d->cX_in_H;
371: DSVectors(d->ps,DS_MAT_X,&k,PETSC_NULL);
372: DSNormalize(d->ps,DS_MAT_X,r_s+i+d->cX_in_H);
373: k = r_s+i+d->cX_in_H;
374: DSVectors(d->ps,DS_MAT_Y,&k,PETSC_NULL);
375: DSNormalize(d->ps,DS_MAT_Y,r_s+i+d->cX_in_H);
376: DSGetArray(d->ps,DS_MAT_X,&pX);
377: DSGetArray(d->ps,DS_MAT_Y,&pY);
378: dvd_improvex_jd_proj_cuv(d,r_s+i,r_s+i+s,&u,&v,kr,&data->auxV,&auxS,data->theta,data->thetai,pX,pY,ld);
379: DSRestoreArray(d->ps,DS_MAT_X,&pX);
380: DSRestoreArray(d->ps,DS_MAT_Y,&pY);
381: data->u = u;
383: /* Check if the first eigenpairs are converged */
384: if (i == 0) {
385: PetscInt n_auxV = data->auxV-d->auxV+s, n_auxS = auxS - d->auxS;
386: d->auxV+= n_auxV; d->size_auxV-= n_auxV;
387: d->auxS+= n_auxS; d->size_auxS-= n_auxS;
388: d->preTestConv(d,0,s,s,d->auxV-s,PETSC_NULL,&d->npreconv);
389: d->auxV-= n_auxV; d->size_auxV+= n_auxV;
390: d->auxS-= n_auxS; d->size_auxS+= n_auxS;
391: if (d->npreconv > 0) break;
392: }
394: /* If JD */
395: if (data->ksp) {
396: data->auxS = auxS;
398: /* kr <- -kr */
399: for(j=0; j<s; j++) {
400: VecScale(kr[j], -1.0);
401: }
403: /* Compouse kr and D */
404: VecCreateCompWithVecs(kr, data->ksp_max_size, data->friends,
405: &kr_comp);
406: VecCreateCompWithVecs(&D[i], data->ksp_max_size, data->friends,
407: &D_comp);
408: VecCompSetSubVecs(data->friends,s,PETSC_NULL);
409:
410: /* Solve the correction equation */
411: KSPSetTolerances(data->ksp, tol, PETSC_DEFAULT, PETSC_DEFAULT,
412: maxits);
413: KSPSolve(data->ksp, kr_comp, D_comp);
414: KSPGetIterationNumber(data->ksp, &lits);
415: d->eps->OP->lineariterations+= lits;
416:
417: /* Destroy the composed ks and D */
418: VecDestroy(&kr_comp);
419: VecDestroy(&D_comp);
421: /* If GD */
422: } else {
423: for(j=0; j<s; j++) {
424: d->improvex_precond(d, r_s+i+j, kr[j], D[i+j]);
425: }
426: dvd_improvex_apply_proj(d, &D[i], s, auxS);
427: }
428: /* Prevent that short vectors are discarded in the orthogonalization */
429: if (i == 0 && d->eps->errest[d->nconv+r_s] > PETSC_MACHINE_EPSILON && d->eps->errest[d->nconv+r_s] < PETSC_MAX_REAL) {
430: for(j=0; j<s; j++) {
431: VecScale(D[j],1.0/d->eps->errest[d->nconv+r_s]);
432: }
433: }
434: }
435: *size_D = i;
436: if (data->dynamic) data->lastTol = PetscMax(data->lastTol/2.0,PETSC_MACHINE_EPSILON*10.0);
437:
438: /* Callback old improveX */
439: if (data->old_improveX) {
440: d->improveX_data = data->old_improveX_data;
441: data->old_improveX(d, PETSC_NULL, 0, 0, 0, PETSC_NULL);
442: d->improveX_data = data;
443: }
445: return(0);
446: }
448: /* y <- theta[1]A*x - theta[0]*B*x
449: auxV, two auxiliary vectors */
452: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmult(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV)
453: {
454: PetscErrorCode ierr;
455: PetscInt n,i;
456: const Vec *Bx;
459: n = data->r_e - data->r_s;
460: for(i=0; i<n; i++) {
461: MatMult(data->d->A,x[i],y[i]);
462: }
464: for(i=0; i<n; i++) {
465: #if !defined(PETSC_USE_COMPLEX)
466: if(data->d->eigi[data->r_s+i] != 0.0) {
467: if (data->d->B) {
468: MatMult(data->d->B,x[i],auxV[0]);
469: MatMult(data->d->B,x[i+1],auxV[1]);
470: Bx = auxV;
471: } else {
472: Bx = &x[i];
473: }
475: /* y_i <- [ t_2i+1*A*x_i - t_2i*Bx_i + ti_i*Bx_i+1;
476: y_i+1 t_2i+1*A*x_i+1 - ti_i*Bx_i - t_2i*Bx_i+1 ] */
477: VecAXPBYPCZ(y[i],-data->theta[2*i],data->thetai[i],data->theta[2*i+1],Bx[0],Bx[1]);
478: VecAXPBYPCZ(y[i+1],-data->thetai[i],-data->theta[2*i],data->theta[2*i+1],Bx[0],Bx[1]);
479: i++;
480: } else
481: #endif
482: {
483: if (data->d->B) {
484: MatMult(data->d->B,x[i],auxV[0]);
485: Bx = auxV;
486: } else {
487: Bx = &x[i];
488: }
489: VecAXPBY(y[i],-data->theta[i*2],data->theta[i*2+1],Bx[0]);
490: }
491: }
492: return(0);
493: }
497: /* y <- theta[1]'*A'*x - theta[0]'*B'*x
498: auxV, two auxiliary vectors */
501: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmulttrans(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV)
502: {
503: PetscErrorCode ierr;
504: PetscInt n,i;
505: const Vec *Bx;
508: n = data->r_e - data->r_s;
509: for(i=0; i<n; i++) {
510: MatMultTranspose(data->d->A,x[i],y[i]);
511: }
513: for(i=0; i<n; i++) {
514: #if !defined(PETSC_USE_COMPLEX)
515: if(data->d->eigi[data->r_s+i] != 0.0) {
516: if (data->d->B) {
517: MatMultTranspose(data->d->B,x[i],auxV[0]);
518: MatMultTranspose(data->d->B,x[i+1],auxV[1]);
519: Bx = auxV;
520: } else {
521: Bx = &x[i];
522: }
524: /* y_i <- [ t_2i+1*A*x_i - t_2i*Bx_i - ti_i*Bx_i+1;
525: y_i+1 t_2i+1*A*x_i+1 + ti_i*Bx_i - t_2i*Bx_i+1 ] */
526: VecAXPBYPCZ(y[i],-data->theta[2*i],-data->thetai[i],data->theta[2*i+1],Bx[0],Bx[1]);
527: VecAXPBYPCZ(y[i+1],data->thetai[i],-data->theta[2*i],data->theta[2*i+1],Bx[0],Bx[1]);
528: i++;
529: } else
530: #endif
531: {
532: if (data->d->B) {
533: MatMultTranspose(data->d->B,x[i],auxV[0]);
534: Bx = auxV;
535: } else {
536: Bx = &x[i];
537: }
538: VecAXPBY(y[i],PetscConj(-data->theta[i*2]),PetscConj(data->theta[i*2+1]),Bx[0]);
539: }
540: }
541: return(0);
542: }
547: PetscErrorCode dvd_pcapplyba(PC pc,PCSide side,Vec in,Vec out,Vec w)
548: {
549: PetscErrorCode ierr;
550: dvdImprovex_jd *data;
551: PetscInt n,i;
552: const Vec *inx,*outx,*wx;
553: Mat A;
557: PCGetOperators(pc,&A,PETSC_NULL,PETSC_NULL);
558: MatShellGetContext(A,(void**)&data);
559: VecCompGetSubVecs(in,PETSC_NULL,&inx);
560: VecCompGetSubVecs(out,PETSC_NULL,&outx);
561: VecCompGetSubVecs(w,PETSC_NULL,&wx);
562: n = data->r_e - data->r_s;
564: /* Check auxiliary vectors */
565: if (&data->auxV[n] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");
567: switch(side) {
568: case PC_LEFT:
569: /* aux <- theta[1]A*in - theta[0]*B*in */
570: dvd_aux_matmult(data,inx,data->auxV,outx);
571:
572: /* out <- K * aux */
573: for(i=0; i<n; i++) {
574: data->d->improvex_precond(data->d,data->r_s+i,data->auxV[i],outx[i]);
575: }
576: break;
577: case PC_RIGHT:
578: /* aux <- K * in */
579: for(i=0; i<n; i++) {
580: data->d->improvex_precond(data->d,data->r_s+i,inx[i],data->auxV[i]);
581: }
582:
583: /* out <- theta[1]A*auxV - theta[0]*B*auxV */
584: dvd_aux_matmult(data,data->auxV,outx,wx);
585: break;
586: case PC_SYMMETRIC:
587: /* aux <- K^{1/2} * in */
588: for(i=0; i<n; i++) {
589: PCApplySymmetricRight(data->old_pc,inx[i],data->auxV[i]);
590: }
591:
592: /* out <- theta[1]A*auxV - theta[0]*B*auxV */
593: dvd_aux_matmult(data,data->auxV,wx,outx);
594:
595: /* aux <- K^{1/2} * in */
596: for(i=0; i<n; i++) {
597: PCApplySymmetricLeft(data->old_pc,wx[i],outx[i]);
598: }
599: break;
600: default:
601: SETERRQ(PETSC_COMM_SELF,1, "Unsupported KSP side");
602: }
604: /* out <- out - v*(u'*out) */
605: dvd_improvex_apply_proj(data->d,(Vec*)outx,n,data->auxS);
607: return(0);
608: }
613: PetscErrorCode dvd_pcapply(PC pc,Vec in,Vec out)
614: {
615: PetscErrorCode ierr;
616: dvdImprovex_jd *data;
617: PetscInt n,i;
618: const Vec *inx, *outx;
619: Mat A;
623: PCGetOperators(pc,&A,PETSC_NULL,PETSC_NULL);
624: MatShellGetContext(A,(void**)&data);
625: VecCompGetSubVecs(in,PETSC_NULL,&inx);
626: VecCompGetSubVecs(out,PETSC_NULL,&outx);
627: n = data->r_e - data->r_s;
629: /* out <- K * in */
630: for(i=0; i<n; i++) {
631: data->d->improvex_precond(data->d,data->r_s+i,inx[i],outx[i]);
632: }
634: /* out <- out - v*(u'*out) */
635: dvd_improvex_apply_proj(data->d,(Vec*)outx,n,data->auxS);
637: return(0);
638: }
642: PetscErrorCode dvd_pcapplytrans(PC pc,Vec in,Vec out)
643: {
644: PetscErrorCode ierr;
645: dvdImprovex_jd *data;
646: PetscInt n,i;
647: const Vec *inx, *outx;
648: Mat A;
652: PCGetOperators(pc,&A,PETSC_NULL,PETSC_NULL);
653: MatShellGetContext(A,(void**)&data);
654: VecCompGetSubVecs(in,PETSC_NULL,&inx);
655: VecCompGetSubVecs(out,PETSC_NULL,&outx);
656: n = data->r_e - data->r_s;
658: /* Check auxiliary vectors */
659: if (&data->auxV[n] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");
661: /* auxV <- in */
662: for(i=0; i<n; i++) {
663: VecCopy(inx[i],data->auxV[i]);
664: }
665:
666: /* auxV <- auxV - u*(v'*auxV) */
667: dvd_improvex_applytrans_proj(data->d,data->auxV,n,data->auxS);
669: /* out <- K' * aux */
670: for(i=0; i<n; i++) {
671: PCApplyTranspose(data->old_pc,data->auxV[i],outx[i]);
672: }
674: return(0);
675: }
680: PetscErrorCode dvd_matmult_jd(Mat A,Vec in,Vec out)
681: {
682: PetscErrorCode ierr;
683: dvdImprovex_jd *data;
684: PetscInt n;
685: const Vec *inx, *outx;
686: PCSide side;
690: MatShellGetContext(A,(void**)&data);
691: VecCompGetSubVecs(in,PETSC_NULL,&inx);
692: VecCompGetSubVecs(out,PETSC_NULL,&outx);
693: n = data->r_e - data->r_s;
695: /* Check auxiliary vectors */
696: if (&data->auxV[2] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");
698: /* out <- theta[1]A*in - theta[0]*B*in */
699: dvd_aux_matmult(data,inx,outx,data->auxV);
701: KSPGetPCSide(data->ksp,&side);
702: if (side == PC_RIGHT) {
703: /* out <- out - v*(u'*out) */
704: dvd_improvex_apply_proj(data->d,(Vec*)outx,n,data->auxS);
705: }
707: return(0);
708: }
712: PetscErrorCode dvd_matmulttrans_jd(Mat A,Vec in,Vec out)
713: {
714: PetscErrorCode ierr;
715: dvdImprovex_jd *data;
716: PetscInt n,i;
717: const Vec *inx,*outx,*r,*auxV;
718: PCSide side;
722: MatShellGetContext(A,(void**)&data);
723: VecCompGetSubVecs(in,PETSC_NULL,&inx);
724: VecCompGetSubVecs(out,PETSC_NULL,&outx);
725: n = data->r_e - data->r_s;
727: /* Check auxiliary vectors */
728: if (&data->auxV[n+2] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");
730: KSPGetPCSide(data->ksp,&side);
731: if (side == PC_RIGHT) {
732: /* auxV <- in */
733: for(i=0; i<n; i++) {
734: VecCopy(inx[i],data->auxV[i]);
735: }
737: /* auxV <- auxV - v*(u'*auxV) */
738: dvd_improvex_applytrans_proj(data->d,data->auxV,n,data->auxS);
739: r = data->auxV;
740: auxV = data->auxV+n;
741: } else {
742: r = inx;
743: auxV = data->auxV;
744: }
746: /* out <- theta[1]A*r - theta[0]*B*r */
747: dvd_aux_matmulttrans(data,r,outx,auxV);
749: return(0);
750: }
755: PetscErrorCode dvd_matgetvecs_jd(Mat A, Vec *right, Vec *left)
756: {
757: PetscErrorCode ierr;
758: Vec *r, *l;
759: dvdImprovex_jd *data;
760: PetscInt n, i;
764: MatShellGetContext(A, (void**)&data);
765: n = data->ksp_max_size;
766: if (right) {
767: PetscMalloc(sizeof(Vec)*n, &r);
768: }
769: if (left) {
770: PetscMalloc(sizeof(Vec)*n, &l);
771: }
772: for (i=0; i<n; i++) {
773: MatGetVecs(data->d->A, right?&r[i]:PETSC_NULL,
774: left?&l[i]:PETSC_NULL);
775: }
776: if(right) {
777: VecCreateCompWithVecs(r, n, data->friends, right);
778: for (i=0; i<n; i++) {
779: VecDestroy(&r[i]);
780: }
781: }
782: if(left) {
783: VecCreateCompWithVecs(l, n, data->friends, left);
784: for (i=0; i<n; i++) {
785: VecDestroy(&l[i]);
786: }
787: }
789: if (right) {
790: PetscFree(r);
791: }
792: if (left) {
793: PetscFree(l);
794: }
796: return(0);
797: }
802: PetscErrorCode dvd_improvex_jd_proj_uv(dvdDashboard *d, dvdBlackboard *b,
803: ProjType_t p)
804: {
807: /* Setup the step */
808: if (b->state >= DVD_STATE_CONF) {
809: switch(p) {
810: case DVD_PROJ_KXX:
811: d->improvex_jd_proj_uv = dvd_improvex_jd_proj_uv_KXX; break;
812: case DVD_PROJ_KZX:
813: d->improvex_jd_proj_uv = dvd_improvex_jd_proj_uv_KZX; break;
814: }
815: }
817: return(0);
818: }
822: /*
823: Compute: u <- X, v <- K*(theta[0]*A+theta[1]*B)*X,
824: kr <- K^{-1}*(A-eig*B)*X, being X <- V*pX[i_s..i_e-1], Y <- W*pY[i_s..i_e-1]
825: where
826: auxV, 4*(i_e-i_s) auxiliar global vectors
827: pX,pY, the right and left eigenvectors of the projected system
828: ld, the leading dimension of pX and pY
829: auxS: max(8*bs*max_cX_in_proj,size_V*size_V)
830: */
831: PetscErrorCode dvd_improvex_jd_proj_cuv(dvdDashboard *d, PetscInt i_s,
832: PetscInt i_e, Vec **u, Vec **v, Vec *kr, Vec **auxV, PetscScalar **auxS,
833: PetscScalar *theta, PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY,
834: PetscInt ld)
835: {
836: #if defined(PETSC_MISSING_LAPACK_GETRF)
838: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable");
839: #else
840: PetscErrorCode ierr;
841: PetscInt n = i_e - i_s, size_KZ, V_new, rm, i, size_in;
842: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
843: PetscBLASInt s, ldXKZ, info;
844: DvdReduction r;
845: DvdReductionChunk
846: ops[2];
847: DvdMult_copy_func
848: sr[2];
852: /* Check consistency */
853: V_new = d->size_cX - data->size_cX;
854: if (V_new > data->old_size_X) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
855: data->old_size_X = n;
857: /* KZ <- KZ(rm:rm+max_cX-1) */
858: rm = PetscMax(V_new+data->size_KZ-d->max_cX_in_impr, 0);
859: for (i=0; i<d->max_cX_in_impr; i++) {
860: VecCopy(data->KZ[i+rm], data->KZ[i]);
861: }
862: data->size_cX = d->size_cX;
864: /* XKZ <- XKZ(rm:rm+max_cX-1,rm:rm+max_cX-1) */
865: for (i=0; i<d->max_cX_in_impr; i++) {
866: SlepcDenseCopy(&data->XKZ[i*data->ldXKZ+i], data->ldXKZ, &data->XKZ[(i+rm)*data->ldXKZ+i+rm], data->ldXKZ, data->size_KZ, 1);
867: }
868: data->size_KZ = PetscMin(d->max_cX_in_impr, data->size_KZ+V_new);
870: /* Compute X, KZ and KR */
871: *u = *auxV; *auxV+= n;
872: *v = &data->KZ[data->size_KZ];
873: d->improvex_jd_proj_uv(d, i_s, i_e, *u, *v, kr, *auxV, theta, thetai,
874: pX, pY, ld);
876: /* XKZ <- X'*KZ */
877: size_KZ = data->size_KZ+n;
878: size_in = 2*n*data->size_KZ+n*n;
879: SlepcAllReduceSumBegin(ops,2,*auxS,*auxS+size_in,size_in,&r,((PetscObject)d->V[0])->comm);
880: VecsMultS(data->XKZ,0,data->ldXKZ,d->V-data->size_KZ,0,data->size_KZ,data->KZ,data->size_KZ,size_KZ,&r,&sr[0]);
881: VecsMultS(&data->XKZ[data->size_KZ],0,data->ldXKZ,*u,0,n,data->KZ,0,size_KZ,&r,&sr[1]);
882: SlepcAllReduceSumEnd(&r);
884: /* iXKZ <- inv(XKZ) */
885: s = PetscBLASIntCast(size_KZ);
886: data->ldiXKZ = data->size_iXKZ = size_KZ;
887: data->iXKZ = *auxS; *auxS+= size_KZ*size_KZ;
888: data->iXKZPivots = (PetscBLASInt*)*auxS;
889: *auxS += FromIntToScalar(size_KZ);
890: SlepcDenseCopy(data->iXKZ,data->ldiXKZ,data->XKZ,data->ldXKZ,size_KZ,size_KZ);
891: ldXKZ = PetscBLASIntCast(data->ldiXKZ);
892: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
893: LAPACKgetrf_(&s, &s, data->iXKZ, &ldXKZ, data->iXKZPivots, &info);
894: PetscFPTrapPop();
895: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack XGETRF %d", info);
897: return(0);
898: #endif
899: }
901: #define DVD_COMPLEX_RAYLEIGH_QUOTIENT(ur,ui,Axr,Axi,Bxr,Bxi,eigr,eigi,b,ierr)\
902: { \
903: VecDot((Axr), (ur), &(b)[0]); /* r*A*r */ \
904: VecDot((Axr), (ui), &(b)[1]); /* i*A*r */ \
905: VecDot((Axi), (ur), &(b)[2]); /* r*A*i */ \
906: VecDot((Axi), (ui), &(b)[3]); /* i*A*i */ \
907: VecDot((Bxr), (ur), &(b)[4]); /* r*B*r */ \
908: VecDot((Bxr), (ui), &(b)[5]); /* i*B*r */ \
909: VecDot((Bxi), (ur), &(b)[6]); /* r*B*i */ \
910: VecDot((Bxi), (ui), &(b)[7]); /* i*B*i */ \
911: (b)[0] = (b)[0]+(b)[3]; /* rAr+iAi */ \
912: (b)[2] = (b)[2]-(b)[1]; /* rAi-iAr */ \
913: (b)[4] = (b)[4]+(b)[7]; /* rBr+iBi */ \
914: (b)[6] = (b)[6]-(b)[5]; /* rBi-iBr */ \
915: (b)[7] = (b)[4]*(b)[4] + (b)[6]*(b)[6]; /* k */ \
916: *(eigr) = ((b)[0]*(b)[4] + (b)[2]*(b)[6]) / (b)[7]; /* eig_r */ \
917: *(eigi) = ((b)[2]*(b)[4] - (b)[0]*(b)[6]) / (b)[7]; /* eig_i */ \
918: }
920: #if !defined(PETSC_USE_COMPLEX)
921: #define DVD_COMPUTE_N_RR(eps,i,i_s,n,eigr,eigi,u,Ax,Bx,b,ierr) \
922: for((i)=0; (i)<(n); (i)++) { \
923: if ((eigi)[(i_s)+(i)] != 0.0) { \
924: /* eig_r = [(rAr+iAi)*(rBr+iBi) + (rAi-iAr)*(rBi-iBr)]/k \
925: eig_i = [(rAi-iAr)*(rBr+iBi) - (rAr+iAi)*(rBi-iBr)]/k \
926: k = (rBr+iBi)*(rBr+iBi) + (rBi-iBr)*(rBi-iBr) */ \
927: DVD_COMPLEX_RAYLEIGH_QUOTIENT((u)[(i)], (u)[(i)+1], (Ax)[(i)], \
928: (Ax)[(i)+1], (Bx)[(i)], (Bx)[(i)+1], &(b)[8], &(b)[9], (b), (ierr)); \
929: if (PetscAbsScalar((eigr)[(i_s)+(i)] - (b)[8])/ \
930: PetscAbsScalar((eigr)[(i_s)+(i)]) > 1e-10 || \
931: PetscAbsScalar((eigi)[(i_s)+(i)] - (b)[9])/ \
932: PetscAbsScalar((eigi)[(i_s)+(i)]) > 1e-10 ) { \
933: (ierr) = PetscInfo4((eps), "The eigenvalue %G+%G is far from its "\
934: "Rayleigh quotient value %G+%G\n", \
935: (eigr)[(i_s)+(i)], \
936: (eigi)[(i_s)+(i)], (b)[8], (b)[9]); \
937: } \
938: (i)++; \
939: } \
940: }
941: #else
942: #define DVD_COMPUTE_N_RR(eps,i,i_s,n,eigr,eigi,u,Ax,Bx,b,ierr) \
943: for((i)=0; (i)<(n); (i)++) { \
944: (ierr) = VecDot((Ax)[(i)], (u)[(i)], &(b)[0]); \
945: (ierr) = VecDot((Bx)[(i)], (u)[(i)], &(b)[1]); \
946: (b)[0] = (b)[0]/(b)[1]; \
947: if (PetscAbsScalar((eigr)[(i_s)+(i)] - (b)[0])/ \
948: PetscAbsScalar((eigr)[(i_s)+(i)]) > 1e-10 ) { \
949: (ierr) = PetscInfo4((eps), "The eigenvalue %G+%G is far from its " \
950: "Rayleigh quotient value %G+%G\n", \
951: PetscRealPart((eigr)[(i_s)+(i)]), \
952: PetscImaginaryPart((eigr)[(i_s)+(i)]), PetscRealPart((b)[0]), \
953: PetscImaginaryPart((b)[0])); \
954: } \
955: }
956: #endif
961: /*
962: Compute: u <- X, v <- K*(theta[0]*A+theta[1]*B)*X,
963: kr <- K^{-1}*(A-eig*B)*X, being X <- V*pX[i_s..i_e-1], Y <- W*pY[i_s..i_e-1]
964: where
965: auxV, 4*(i_e-i_s) auxiliar global vectors
966: pX,pY, the right and left eigenvectors of the projected system
967: ld, the leading dimension of pX and pY
968: */
969: PetscErrorCode dvd_improvex_jd_proj_uv_KZX(dvdDashboard *d, PetscInt i_s,
970: PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
971: PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld)
972: {
973: PetscErrorCode ierr;
974: PetscInt n = i_e - i_s, i;
975: PetscScalar b[16];
976: Vec *Ax, *Bx, *r=auxV, X[4];
977: /* The memory manager doen't allow to call a subroutines */
978: PetscScalar Z[size_Z];
982: /* u <- X(i) */
983: dvd_improvex_compute_X(d,i_s,i_e,u,pX,ld);
985: /* v <- theta[0]A*u + theta[1]*B*u */
987: /* Bx <- B*X(i) */
988: Bx = kr;
989: if (d->BV) {
990: SlepcUpdateVectorsZ(Bx, 0.0, 1.0, d->BV-d->cX_in_H, d->size_BV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);
991: } else {
992: for(i=0; i<n; i++) {
993: if (d->B) {
994: MatMult(d->B, u[i], Bx[i]);
995: } else {
996: VecCopy(u[i], Bx[i]);
997: }
998: }
999: }
1001: /* Ax <- A*X(i) */
1002: Ax = r;
1003: SlepcUpdateVectorsZ(Ax, 0.0, 1.0, d->AV-d->cX_in_H, d->size_AV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);
1005: /* v <- Y(i) */
1006: SlepcUpdateVectorsZ(v, 0.0, 1.0, (d->W?d->W:d->V)-d->cX_in_H, d->size_V+d->cX_in_H, &pY[ld*i_s], ld, d->size_H, n);
1008: /* Recompute the eigenvalue */
1009: DVD_COMPUTE_N_RR(d->eps, i, i_s, n, d->eigr, d->eigi, v, Ax, Bx, b, ierr);
1011: for(i=0; i<n; i++) {
1012: #if !defined(PETSC_USE_COMPLEX)
1013: if(d->eigi[i_s+i] != 0.0) {
1014: /* [r_i r_i+1 kr_i kr_i+1]*= [ theta_2i' 0 1 0
1015: 0 theta_2i' 0 1
1016: theta_2i+1 -thetai_i -eigr_i -eigi_i
1017: thetai_i theta_2i+1 eigi_i -eigr_i ] */
1018: b[0] = b[5] = PetscConj(theta[2*i]);
1019: b[2] = b[7] = -theta[2*i+1];
1020: b[6] = -(b[3] = thetai[i]);
1021: b[1] = b[4] = 0.0;
1022: b[8] = b[13] = 1.0/d->nX[i_s+i];
1023: b[10] = b[15] = -d->eigr[i_s+i]/d->nX[i_s+i];
1024: b[14] = -(b[11] = d->eigi[i_s+i]/d->nX[i_s+i]);
1025: b[9] = b[12] = 0.0;
1026: X[0] = Ax[i]; X[1] = Ax[i+1]; X[2] = Bx[i]; X[3] = Bx[i+1];
1027: SlepcUpdateVectorsD(X, 4, 1.0, b, 4, 4, 4, Z, size_Z);
1028:
1029: i++;
1030: } else
1031: #endif
1032: {
1033: /* [Ax_i Bx_i]*= [ theta_2i' 1/nX_i
1034: theta_2i+1 -eig_i/nX_i ] */
1035: b[0] = PetscConj(theta[i*2]);
1036: b[1] = theta[i*2+1];
1037: b[2] = 1.0/d->nX[i_s+i];
1038: b[3] = -d->eigr[i_s+i]/d->nX[i_s+i];
1039: X[0] = Ax[i]; X[1] = Bx[i];
1040: SlepcUpdateVectorsD(X, 2, 1.0, b, 2, 2, 2, Z, size_Z);
1041:
1042: }
1043: }
1044: for (i=0; i<n; i++) d->nX[i_s+i] = 1.0;
1046: /* v <- K^{-1} r = K^{-1}(theta_2i'*Ax + theta_2i+1*Bx) */
1047: for(i=0; i<n; i++) {
1048: d->improvex_precond(d, i_s+i, r[i], v[i]);
1049: }
1051: /* kr <- P*(Ax - eig_i*Bx) */
1052: d->calcpairs_proj_res(d, i_s, i_e, kr);
1054: return(0);
1055: }
1059: /*
1060: Compute: u <- K^{-1}*X, v <- X,
1061: kr <- K^{-1}*(A-eig*B)*X, being X <- V*pX[i_s..i_e-1]
1062: where
1063: auxV, 4*(i_e-i_s) auxiliar global vectors
1064: pX,pY, the right and left eigenvectors of the projected system
1065: ld, the leading dimension of pX and pY
1066: */
1067: PetscErrorCode dvd_improvex_jd_proj_uv_KXX(dvdDashboard *d, PetscInt i_s,
1068: PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
1069: PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld)
1070: {
1071: PetscErrorCode ierr;
1072: PetscInt n = i_e - i_s, i;
1073: PetscScalar b[16];
1074: Vec *Ax, *Bx, *r = auxV, X[4];
1075: /* The memory manager doen't allow to call a subroutines */
1076: PetscScalar Z[size_Z];
1080: /* [v u] <- X(i) Y(i) */
1081: dvd_improvex_compute_X(d,i_s,i_e,v,pX,ld);
1082: SlepcUpdateVectorsZ(u, 0.0, 1.0, (d->W?d->W:d->V)-d->cX_in_H, d->size_V+d->cX_in_H, &pY[ld*i_s], ld, d->size_H, n);
1084: /* Bx <- B*X(i) */
1085: Bx = r;
1086: if (d->BV) {
1087: SlepcUpdateVectorsZ(Bx, 0.0, 1.0, d->BV-d->cX_in_H, d->size_BV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);
1088: } else {
1089: if (d->B) {
1090: for(i=0; i<n; i++) {
1091: MatMult(d->B, v[i], Bx[i]);
1092: }
1093: } else
1094: Bx = v;
1095: }
1097: /* Ax <- A*X(i) */
1098: Ax = kr;
1099: SlepcUpdateVectorsZ(Ax, 0.0, 1.0, d->AV-d->cX_in_H, d->size_AV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);
1101: /* Recompute the eigenvalue */
1102: DVD_COMPUTE_N_RR(d->eps, i, i_s, n, d->eigr, d->eigi, u, Ax, Bx, b, ierr);
1104: for(i=0; i<n; i++) {
1105: if (d->eigi[i_s+i] == 0.0) {
1106: /* kr <- Ax -eig*Bx */
1107: VecAXPBY(kr[i], -d->eigr[i_s+i]/d->nX[i_s+i], 1.0/d->nX[i_s+i], Bx[i]);
1108: } else {
1109: /* [kr_i kr_i+1 r_i r_i+1]*= [ 1 0
1110: 0 1
1111: -eigr_i -eigi_i
1112: eigi_i -eigr_i] */
1113: b[0] = b[5] = 1.0/d->nX[i_s+i];
1114: b[2] = b[7] = -d->eigr[i_s+i]/d->nX[i_s+i];
1115: b[6] = -(b[3] = d->eigi[i_s+i]/d->nX[i_s+i]);
1116: b[1] = b[4] = 0.0;
1117: X[0] = kr[i]; X[1] = kr[i+1]; X[2] = r[i]; X[3] = r[i+1];
1118: SlepcUpdateVectorsD(X, 4, 1.0, b, 4, 4, 2, Z, size_Z);
1119:
1120: i++;
1121: }
1122: }
1123: for (i=0; i<n; i++) d->nX[i_s+i] = 1.0;
1125: /* kr <- P*kr */
1126: d->calcpairs_proj_res(d, i_s, i_e, r);
1128: /* u <- K^{-1} X(i) */
1129: for(i=0; i<n; i++) {
1130: d->improvex_precond(d, i_s+i, v[i], u[i]);
1131: }
1133: return(0);
1134: }
1139: PetscErrorCode dvd_improvex_jd_lit_const(dvdDashboard *d, dvdBlackboard *b,
1140: PetscInt maxits, PetscReal tol,
1141: PetscReal fix)
1142: {
1143: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
1146: /* Setup the step */
1147: if (b->state >= DVD_STATE_CONF) {
1148: data->maxits = maxits;
1149: data->tol = tol;
1150: data->fix = fix;
1151: d->improvex_jd_lit = dvd_improvex_jd_lit_const_0;
1152: }
1153: return(0);
1154: }
1159: PetscErrorCode dvd_improvex_jd_lit_const_0(dvdDashboard *d, PetscInt i,
1160: PetscScalar* theta, PetscScalar* thetai, PetscInt *maxits, PetscReal *tol)
1161: {
1162: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
1163: PetscReal a;
1166: a = SlepcAbsEigenvalue(d->eigr[i],d->eigi[i]);
1168: if (d->nR[i]/a < data->fix) {
1169: theta[0] = d->eigr[i];
1170: theta[1] = 1.0;
1171: #if !defined(PETSC_USE_COMPLEX)
1172: *thetai = d->eigi[i];
1173: #endif
1174: } else {
1175: theta[0] = d->target[0];
1176: theta[1] = d->target[1];
1177: #if !defined(PETSC_USE_COMPLEX)
1178: *thetai = 0.0;
1179: #endif
1180: }
1182: #if defined(PETSC_USE_COMPLEX)
1183: if(thetai) *thetai = 0.0;
1184: #endif
1185: *maxits = data->maxits;
1186: *tol = data->tol;
1187: return(0);
1188: }
1191: /**** Patterns implementation *************************************************/
1193: typedef PetscInt (*funcV0_t)(dvdDashboard*, PetscInt, PetscInt, Vec*);
1194: typedef PetscInt (*funcV1_t)(dvdDashboard*, PetscInt, PetscInt, Vec*,
1195: PetscScalar*, Vec);
1199: /* Compute D <- K^{-1} * funcV[r_s..r_e] */
1200: PetscErrorCode dvd_improvex_PfuncV(dvdDashboard *d, void *funcV, Vec *D,
1201: PetscInt max_size_D, PetscInt r_s, PetscInt r_e, Vec *auxV,
1202: PetscScalar *auxS)
1203: {
1204: PetscErrorCode ierr;
1205: PetscInt i;
1208: if (max_size_D >= r_e-r_s+1) {
1209: /* The optimized version needs one vector extra of D */
1210: /* D(1:r.size) = R(r_s:r_e-1) */
1211: if (auxS) ((funcV1_t)funcV)(d, r_s, r_e, D+1, auxS, auxV[0]);
1212: else ((funcV0_t)funcV)(d, r_s, r_e, D+1);
1214: /* D = K^{-1} * R */
1215: for (i=0; i<r_e-r_s; i++) {
1216: d->improvex_precond(d, i+r_s, D[i+1], D[i]);
1217: }
1218: } else if (max_size_D == r_e-r_s) {
1219: /* Non-optimized version */
1220: /* auxV <- R[r_e-1] */
1221: if (auxS) ((funcV1_t)funcV)(d, r_e-1, r_e, auxV, auxS, auxV[1]);
1222: else ((funcV0_t)funcV)(d, r_e-1, r_e, auxV);
1224: /* D(1:r.size-1) = R(r_s:r_e-2) */
1225: if (auxS) ((funcV1_t)funcV)(d, r_s, r_e-1, D+1, auxS, auxV[1]);
1226: else ((funcV0_t)funcV)(d, r_s, r_e-1, D+1);
1228: /* D = K^{-1} * R */
1229: for (i=0; i<r_e-r_s-1; i++) {
1230: d->improvex_precond(d, i+r_s, D[i+1], D[i]);
1231: }
1232: d->improvex_precond(d, r_e-1, auxV[0], D[r_e-r_s-1]);
1233: } else SETERRQ(PETSC_COMM_SELF,1, "Problem: r_e-r_s > max_size_D");
1234: return(0);
1235: }
1240: /* Compute (I - KZ*iXKZ*X')*V where,
1241: V, the vectors to apply the projector,
1242: cV, the number of vectors in V,
1243: auxS, auxiliar vector of size length 3*size_iXKZ*cV
1244: */
1245: PetscErrorCode dvd_improvex_apply_proj(dvdDashboard *d, Vec *V, PetscInt cV, PetscScalar *auxS)
1246: {
1247: #if defined(PETSC_MISSING_LAPACK_GETRS)
1249: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routines are unavailable");
1250: #else
1251: PetscErrorCode ierr;
1252: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
1253: PetscInt size_in = data->size_iXKZ*cV, i, ldh;
1254: PetscScalar *h, *in, *out;
1255: PetscBLASInt cV_, n, info, ld;
1256: DvdReduction r;
1257: DvdReductionChunk
1258: ops[4];
1259: DvdMult_copy_func
1260: sr[4];
1261:
1263: if (cV > 2) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
1265: /* h <- X'*V */
1266: h = auxS; in = h+size_in; out = in+size_in; ldh = data->size_iXKZ;
1267: SlepcAllReduceSumBegin(ops, 4, in, out, size_in, &r,
1268: ((PetscObject)d->V[0])->comm);
1269: for (i=0; i<cV; i++) {
1270: VecsMultS(&h[i*ldh],0,ldh,d->V-data->size_KZ,0,data->size_KZ,V+i,0,1,&r,&sr[i*2]);
1271: VecsMultS(&h[i*ldh+data->size_KZ],0,ldh,data->u,0,data->size_iXKZ-data->size_KZ,V+i,0,1,&r,&sr[i*2+1]);
1272: }
1273: SlepcAllReduceSumEnd(&r);
1275: /* h <- iXKZ\h */
1276: cV_ = PetscBLASIntCast(cV);
1277: n = PetscBLASIntCast(data->size_iXKZ);
1278: ld = PetscBLASIntCast(data->ldiXKZ);
1280: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1281: LAPACKgetrs_("N", &n, &cV_, data->iXKZ, &ld, data->iXKZPivots, h, &n, &info);
1282: PetscFPTrapPop();
1283: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack XGETRS %d", info);
1285: /* V <- V - KZ*h */
1286: for (i=0; i<cV; i++) {
1287: SlepcUpdateVectorsZ(V+i,1.0,-1.0,data->KZ,data->size_iXKZ,&h[ldh*i],ldh,data->size_iXKZ,1);
1288: }
1289: return(0);
1290: #endif
1291: }
1295: /* Compute (I - X*iXKZ*KZ')*V where,
1296: V, the vectors to apply the projector,
1297: cV, the number of vectors in V,
1298: auxS, auxiliar vector of size length 3*size_iXKZ*cV
1299: */
1300: PetscErrorCode dvd_improvex_applytrans_proj(dvdDashboard *d, Vec *V, PetscInt cV, PetscScalar *auxS)
1301: {
1302: #if defined(PETSC_MISSING_LAPACK_GETRS)
1304: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routines are unavailable");
1305: #else
1306: PetscErrorCode ierr;
1307: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
1308: PetscInt size_in = data->size_iXKZ*cV, i, ldh;
1309: PetscScalar *h, *in, *out;
1310: PetscBLASInt cV_, n, info, ld;
1311: DvdReduction r;
1312: DvdReductionChunk
1313: ops[2];
1314: DvdMult_copy_func
1315: sr[2];
1316:
1318: if (cV > 2) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
1320: /* h <- KZ'*V */
1321: h = auxS; in = h+size_in; out = in+size_in; ldh = data->size_iXKZ;
1322: SlepcAllReduceSumBegin(ops, 2, in, out, size_in, &r,
1323: ((PetscObject)d->V[0])->comm);
1324: for (i=0; i<cV; i++) {
1325: VecsMultS(&h[i*ldh],0,ldh,data->KZ,0,data->size_KZ,V+i,0,1,&r,&sr[i]);
1326: }
1327: SlepcAllReduceSumEnd(&r);
1329: /* h <- iXKZ\h */
1330: cV_ = PetscBLASIntCast(cV);
1331: n = PetscBLASIntCast(data->size_iXKZ);
1332: ld = PetscBLASIntCast(data->ldiXKZ);
1334: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1335: LAPACKgetrs_("C", &n, &cV_, data->iXKZ, &ld, data->iXKZPivots, h, &n, &info);
1336: PetscFPTrapPop();
1337: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack XGETRS %d", info);
1339: /* V <- V - X*h */
1340: for (i=0; i<cV; i++) {
1341: SlepcUpdateVectorsZ(V+i,1.0,-1.0,d->V-data->size_KZ,data->size_KZ,&h[ldh*i],ldh,data->size_KZ,1);
1342: SlepcUpdateVectorsZ(V+i,1.0,-1.0,data->u,data->size_iXKZ-data->size_KZ,&h[ldh*i+data->size_KZ],ldh,data->size_iXKZ-data->size_KZ,1);
1343: }
1344: return(0);
1345: #endif
1346: }