Actual source code: dvd_improvex.c
1: /*
2: SLEPc eigensolver: "davidson"
4: Step: improve the eigenvectors X
6: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
7: SLEPc - Scalable Library for Eigenvalue Problem Computations
8: Copyright (c) 2002-2012, Universitat Politecnica de Valencia, Spain
10: This file is part of SLEPc.
11:
12: SLEPc is free software: you can redistribute it and/or modify it under the
13: terms of version 3 of the GNU Lesser General Public License as published by
14: the Free Software Foundation.
16: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
17: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
19: more details.
21: You should have received a copy of the GNU Lesser General Public License
22: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
23: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
24: */
26: #include davidson.h
27: #include <slepcvec.h>
28: #include <slepcblaslapack.h>
30: PetscErrorCode dvd_improvex_PfuncV(dvdDashboard *d, void *funcV, Vec *D,
31: PetscInt max_size_D, PetscInt r_s, PetscInt r_e, Vec *auxV,
32: PetscScalar *auxS);
33: PetscErrorCode dvd_pcapplyba(PC pc,PCSide side,Vec in,Vec out,Vec w);
34: PetscErrorCode dvd_pcapply(PC pc,Vec in,Vec out);
35: PetscErrorCode dvd_pcapplytrans(PC pc,Vec in,Vec out);
36: PetscErrorCode dvd_matmult_jd(Mat A,Vec in,Vec out);
37: PetscErrorCode dvd_matmulttrans_jd(Mat A,Vec in,Vec out);
38: PetscErrorCode dvd_matgetvecs_jd(Mat A, Vec *right, Vec *left);
39: PetscErrorCode dvd_improvex_jd_d(dvdDashboard *d);
40: PetscErrorCode dvd_improvex_jd_start(dvdDashboard *d);
41: PetscErrorCode dvd_improvex_jd_end(dvdDashboard *d);
42: PetscErrorCode dvd_improvex_jd_gen(dvdDashboard *d, Vec *D,
43: PetscInt max_size_D, PetscInt r_s,
44: PetscInt r_e, PetscInt *size_D);
45: PetscErrorCode dvd_improvex_jd_proj_cuv(dvdDashboard *d, PetscInt i_s,
46: PetscInt i_e, Vec **u, Vec **v, Vec *kr, Vec **auxV, PetscScalar **auxS,
47: PetscScalar *theta, PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY,
48: PetscInt ld);
49: PetscErrorCode dvd_improvex_jd_proj_uv_KXX(dvdDashboard *d, PetscInt i_s,
50: PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
51: PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld);
52: PetscErrorCode dvd_improvex_jd_proj_uv_KZX(dvdDashboard *d, PetscInt i_s,
53: PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
54: PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld);
55: PetscErrorCode dvd_improvex_jd_lit_const_0(dvdDashboard *d, PetscInt i,
56: PetscScalar* theta, PetscScalar* thetai,
57: PetscInt *maxits, PetscReal *tol);
58: PetscErrorCode dvd_improvex_apply_proj(dvdDashboard *d, Vec *V, PetscInt cV, PetscScalar *auxS);
59: PetscErrorCode dvd_improvex_applytrans_proj(dvdDashboard *d, Vec *V, PetscInt cV, PetscScalar *auxS);
61: #define size_Z (64*4)
63: /**** JD update step (I - Kfg'/(g'Kf)) K(A - sB) (I - Kfg'/(g'Kf)) t = (I - Kfg'/(g'Kf))r ****/
65: typedef struct {
66: PetscInt size_X;
67: void
68: *old_improveX_data; /* old improveX_data */
69: improveX_type
70: old_improveX; /* old improveX */
71: KSP ksp; /* correction equation solver */
72: Vec
73: friends, /* reference vector for composite vectors */
74: *auxV; /* auxiliar vectors */
75: PetscScalar *auxS, /* auxiliar scalars */
76: *theta,
77: *thetai; /* the shifts used in the correction eq. */
78: PetscInt maxits, /* maximum number of iterations */
79: r_s, r_e, /* the selected eigenpairs to improve */
80: ksp_max_size; /* the ksp maximum subvectors size */
81: PetscReal tol, /* the maximum solution tolerance */
82: lastTol, /* last tol for dynamic stopping criterion */
83: fix; /* tolerance for using the approx. eigenvalue */
84: PetscBool
85: dynamic; /* if the dynamic stopping criterion is applied */
86: dvdDashboard
87: *d; /* the currect dvdDashboard reference */
88: PC old_pc; /* old pc in ksp */
89: Vec
90: *u, /* new X vectors */
91: *real_KZ, /* original KZ */
92: *KZ; /* KZ vecs for the projector KZ*inv(X'*KZ)*X' */
93: PetscScalar
94: *XKZ, /* X'*KZ */
95: *iXKZ; /* inverse of XKZ */
96: PetscInt
97: ldXKZ, /* leading dimension of XKZ */
98: size_iXKZ, /* size of iXKZ */
99: ldiXKZ, /* leading dimension of iXKZ */
100: size_KZ, /* size of converged KZ */
101: size_real_KZ, /* original size of KZ */
102: size_cX, /* last value of d->size_cX */
103: old_size_X; /* last number of improved vectors */
104: PetscBLASInt
105: *iXKZPivots; /* array of pivots */
106: } dvdImprovex_jd;
108: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmult(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV);
109: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmulttrans(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV);
113: PetscErrorCode dvd_improvex_jd(dvdDashboard *d,dvdBlackboard *b,KSP ksp,PetscInt max_bs,PetscInt cX_impr,PetscBool dynamic)
114: {
115: PetscErrorCode ierr;
116: dvdImprovex_jd *data;
117: PetscBool useGD,her_probl,std_probl;
118: PC pc;
119: PetscInt size_P,s=1;
122: std_probl = DVD_IS(d->sEP,DVD_EP_STD)?PETSC_TRUE:PETSC_FALSE;
123: her_probl = DVD_IS(d->sEP,DVD_EP_HERMITIAN)?PETSC_TRUE:PETSC_FALSE;
125: /* Setting configuration constrains */
126: PetscObjectTypeCompare((PetscObject)ksp,KSPPREONLY,&useGD);
128: /* If the arithmetic is real and the problem is not Hermitian, then
129: the block size is incremented in one */
130: #if !defined(PETSC_USE_COMPLEX)
131: if (!her_probl) {
132: max_bs++;
133: b->max_size_P = PetscMax(b->max_size_P,2);
134: s = 2;
135: } else
136: #endif
137: b->max_size_P = PetscMax(b->max_size_P,1);
138: b->max_size_X = PetscMax(b->max_size_X,max_bs);
139: size_P = b->max_size_P+cX_impr;
140: b->max_size_auxV = PetscMax(b->max_size_auxV,
141: b->max_size_X*3+(useGD?0:2)+ /* u, kr, auxV(max_size_X+2?) */
142: ((her_probl || !d->eps->trueres)?1:PetscMax(s*2,b->max_size_cX_proj+b->max_size_X))); /* testConv */
143:
144: b->own_scalars+= size_P*size_P; /* XKZ */
145: b->max_size_auxS = PetscMax(b->max_size_auxS,
146: b->max_size_X*3 + /* theta, thetai */
147: size_P*size_P + /* iXKZ */
148: FromIntToScalar(size_P) + /* iXkZPivots */
149: PetscMax(PetscMax(
150: 3*b->max_size_proj*b->max_size_X, /* dvd_improvex_apply_proj */
151: 8*cX_impr*b->max_size_X), /* dvd_improvex_jd_proj_cuv_KZX */
152: (her_probl || !d->eps->trueres)?0:b->max_nev*b->max_nev+PetscMax(b->max_nev*6,(b->max_nev+b->max_size_proj)*s+b->max_nev*(b->max_size_X+b->max_size_cX_proj)*(std_probl?2:4)+64))); /* preTestConv */
153: b->own_vecs+= size_P; /* KZ */
155: /* Setup the preconditioner */
156: if (ksp) {
157: KSPGetPC(ksp,&pc);
158: dvd_static_precond_PC(d,b,pc);
159: } else {
160: dvd_static_precond_PC(d,b,0);
161: }
163: /* Setup the step */
164: if (b->state >= DVD_STATE_CONF) {
165: PetscMalloc(sizeof(dvdImprovex_jd),&data);
166: data->dynamic = dynamic;
167: data->size_real_KZ = size_P;
168: data->real_KZ = b->free_vecs; b->free_vecs+= data->size_real_KZ;
169: d->max_cX_in_impr = cX_impr;
170: data->XKZ = b->free_scalars; b->free_scalars+= size_P*size_P;
171: data->ldXKZ = size_P;
172: data->size_X = b->max_size_X;
173: data->old_improveX_data = d->improveX_data;
174: d->improveX_data = data;
175: data->old_improveX = d->improveX;
176: data->ksp = useGD?PETSC_NULL:ksp;
177: data->d = d;
178: d->improveX = dvd_improvex_jd_gen;
179: data->ksp_max_size = max_bs;
181: DVD_FL_ADD(d->startList,dvd_improvex_jd_start);
182: DVD_FL_ADD(d->endList,dvd_improvex_jd_end);
183: DVD_FL_ADD(d->destroyList,dvd_improvex_jd_d);
184: }
186: return(0);
187: }
192: PetscErrorCode dvd_improvex_jd_start(dvdDashboard *d)
193: {
194: PetscErrorCode ierr;
195: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
196: PetscInt rA, cA, rlA, clA;
197: Mat A;
198: PetscBool t;
199: PC pc;
203: data->KZ = data->real_KZ;
204: data->size_KZ = data->size_cX = data->old_size_X = 0;
205: data->lastTol = data->dynamic?0.5:0.0;
207: /* Setup the ksp */
208: if(data->ksp) {
209: /* Create the reference vector */
210: VecCreateCompWithVecs(d->V, data->ksp_max_size, PETSC_NULL,
211: &data->friends);
213: /* Save the current pc and set a PCNONE */
214: KSPGetPC(data->ksp, &data->old_pc);
215: PetscObjectTypeCompare((PetscObject)data->old_pc, PCNONE, &t);
216:
217: data->lastTol = 0.5;
218: if (t) {
219: data->old_pc = 0;
220: } else {
221: PetscObjectReference((PetscObject)data->old_pc);
222: PCCreate(((PetscObject)d->eps)->comm, &pc);
223: PCSetType(pc, PCSHELL);
224: PCSetOperators(pc, d->A, d->A, SAME_PRECONDITIONER);
225: PCShellSetApply(pc,dvd_pcapply);
226: PCShellSetApplyBA(pc,dvd_pcapplyba);
227: PCShellSetApplyTranspose(pc,dvd_pcapplytrans);
228: KSPSetPC(data->ksp, pc);
229: PCDestroy(&pc);
230: }
232: /* Create the (I-v*u')*K*(A-s*B) matrix */
233: MatGetSize(d->A, &rA, &cA);
234: MatGetLocalSize(d->A, &rlA, &clA);
235: MatCreateShell(((PetscObject)d->A)->comm, rlA*data->ksp_max_size,
236: clA*data->ksp_max_size, rA*data->ksp_max_size,
237: cA*data->ksp_max_size, data, &A);
238: MatShellSetOperation(A, MATOP_MULT,
239: (void(*)(void))dvd_matmult_jd);
240: MatShellSetOperation(A, MATOP_MULT_TRANSPOSE,
241: (void(*)(void))dvd_matmulttrans_jd);
242: MatShellSetOperation(A, MATOP_GET_VECS,
243: (void(*)(void))dvd_matgetvecs_jd);
244:
246: /* Try to avoid KSPReset */
247: KSPGetOperatorsSet(data->ksp,&t,PETSC_NULL);
248: if (t) {
249: Mat M;
250: PetscInt rM;
251: KSPGetOperators(data->ksp,&M,PETSC_NULL,PETSC_NULL);
252: MatGetSize(M,&rM,PETSC_NULL);
253: if (rM != rA*data->ksp_max_size) { KSPReset(data->ksp); }
254: }
255: KSPSetOperators(data->ksp, A, A, SAME_PRECONDITIONER);
256:
257: KSPSetUp(data->ksp);
258: MatDestroy(&A);
259: } else {
260: data->old_pc = 0;
261: data->friends = PETSC_NULL;
262: }
263:
264: return(0);
265: }
270: PetscErrorCode dvd_improvex_jd_end(dvdDashboard *d)
271: {
272: PetscErrorCode ierr;
273: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
277: if (data->friends) { VecDestroy(&data->friends); }
279: /* Restore the pc of ksp */
280: if (data->old_pc) {
281: KSPSetPC(data->ksp, data->old_pc);
282: PCDestroy(&data->old_pc);
283: }
285: return(0);
286: }
291: PetscErrorCode dvd_improvex_jd_d(dvdDashboard *d)
292: {
293: PetscErrorCode ierr;
294: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
297:
298: /* Restore changes in dvdDashboard */
299: d->improveX_data = data->old_improveX_data;
301: /* Free local data and objects */
302: PetscFree(data);
304: return(0);
305: }
310: PetscErrorCode dvd_improvex_jd_gen(dvdDashboard *d,Vec *D,PetscInt max_size_D,PetscInt r_s,PetscInt r_e,PetscInt *size_D)
311: {
312: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
313: PetscErrorCode ierr;
314: PetscInt i,j,n,maxits,maxits0,lits,s,ld,k;
315: PetscScalar *pX,*pY,*auxS = d->auxS,*auxS0;
316: PetscReal tol,tol0;
317: Vec *u,*v,*kr,kr_comp,D_comp;
321: /* Quick exit */
322: if ((max_size_D == 0) || r_e-r_s <= 0) {
323: /* Callback old improveX */
324: if (data->old_improveX) {
325: d->improveX_data = data->old_improveX_data;
326: data->old_improveX(d, PETSC_NULL, 0, 0, 0, PETSC_NULL);
327: d->improveX_data = data;
328: }
329: return(0);
330: }
331:
332: n = PetscMin(PetscMin(data->size_X, max_size_D), r_e-r_s);
333: if (n == 0) SETERRQ(PETSC_COMM_SELF,1, "n == 0");
334: if (data->size_X < r_e-r_s) SETERRQ(PETSC_COMM_SELF,1, "size_X < r_e-r_s");
336: DSGetLeadingDimension(d->ps,&ld);
338: /* Restart lastTol if a new pair converged */
339: if (data->dynamic && data->size_cX < d->size_cX)
340: data->lastTol = 0.5;
342: for(i=0, s=0, auxS0=auxS; i<n; i+=s) {
343: /* If the selected eigenvalue is complex, but the arithmetic is real... */
344: #if !defined(PETSC_USE_COMPLEX)
345: if (d->eigi[i] != 0.0) {
346: if (i+2 <= max_size_D) s=2; else break;
347: } else
348: #endif
349: s=1;
351: data->auxV = d->auxV;
352: data->r_s = r_s+i; data->r_e = r_s+i+s;
353: auxS = auxS0;
354: data->theta = auxS; auxS+= 2*s;
355: data->thetai = auxS; auxS+= s;
356: kr = data->auxV; data->auxV+= s;
358: /* Compute theta, maximum iterations and tolerance */
359: maxits = 0; tol = 1;
360: for(j=0; j<s; j++) {
361: d->improvex_jd_lit(d, r_s+i+j, &data->theta[2*j],
362: &data->thetai[j], &maxits0, &tol0);
363:
364: maxits+= maxits0; tol*= tol0;
365: }
366: maxits/= s; tol = data->dynamic?data->lastTol:exp(log(tol)/s);
368: /* Compute u, v and kr */
369: k = r_s+i+d->cX_in_H;
370: DSVectors(d->ps,DS_MAT_X,&k,PETSC_NULL);
371: DSNormalize(d->ps,DS_MAT_X,r_s+i+d->cX_in_H);
372: k = r_s+i+d->cX_in_H;
373: DSVectors(d->ps,DS_MAT_Y,&k,PETSC_NULL);
374: DSNormalize(d->ps,DS_MAT_Y,r_s+i+d->cX_in_H);
375: DSGetArray(d->ps,DS_MAT_X,&pX);
376: DSGetArray(d->ps,DS_MAT_Y,&pY);
377: dvd_improvex_jd_proj_cuv(d,r_s+i,r_s+i+s,&u,&v,kr,&data->auxV,&auxS,data->theta,data->thetai,pX,pY,ld);
378: DSRestoreArray(d->ps,DS_MAT_X,&pX);
379: DSRestoreArray(d->ps,DS_MAT_Y,&pY);
380: data->u = u;
382: /* Check if the first eigenpairs are converged */
383: if (i == 0) {
384: PetscInt n_auxV = data->auxV-d->auxV+s, n_auxS = auxS - d->auxS;
385: d->auxV+= n_auxV; d->size_auxV-= n_auxV;
386: d->auxS+= n_auxS; d->size_auxS-= n_auxS;
387: d->preTestConv(d,0,s,s,d->auxV-s,PETSC_NULL,&d->npreconv);
388: d->auxV-= n_auxV; d->size_auxV+= n_auxV;
389: d->auxS-= n_auxS; d->size_auxS+= n_auxS;
390: if (d->npreconv > 0) break;
391: }
393: /* If JD */
394: if (data->ksp) {
395: data->auxS = auxS;
397: /* kr <- -kr */
398: for(j=0; j<s; j++) {
399: VecScale(kr[j], -1.0);
400: }
402: /* Compouse kr and D */
403: VecCreateCompWithVecs(kr, data->ksp_max_size, data->friends,
404: &kr_comp);
405: VecCreateCompWithVecs(&D[i], data->ksp_max_size, data->friends,
406: &D_comp);
407: VecCompSetSubVecs(data->friends,s,PETSC_NULL);
408:
409: /* Solve the correction equation */
410: KSPSetTolerances(data->ksp, tol, PETSC_DEFAULT, PETSC_DEFAULT,
411: maxits);
412: KSPSolve(data->ksp, kr_comp, D_comp);
413: KSPGetIterationNumber(data->ksp, &lits);
414: d->eps->OP->lineariterations+= lits;
415:
416: /* Destroy the composed ks and D */
417: VecDestroy(&kr_comp);
418: VecDestroy(&D_comp);
420: /* If GD */
421: } else {
422: for(j=0; j<s; j++) {
423: d->improvex_precond(d, r_s+i+j, kr[j], D[i+j]);
424: }
425: dvd_improvex_apply_proj(d, &D[i], s, auxS);
426: }
427: }
428: *size_D = i;
429: if (data->dynamic) data->lastTol = PetscMax(data->lastTol/2.0,PETSC_MACHINE_EPSILON*10.0);
430:
431: /* Callback old improveX */
432: if (data->old_improveX) {
433: d->improveX_data = data->old_improveX_data;
434: data->old_improveX(d, PETSC_NULL, 0, 0, 0, PETSC_NULL);
435: d->improveX_data = data;
436: }
438: return(0);
439: }
441: /* y <- theta[1]A*x - theta[0]*B*x
442: auxV, two auxiliary vectors */
445: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmult(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV)
446: {
447: PetscErrorCode ierr;
448: PetscInt n,i;
449: const Vec *Bx;
452: n = data->r_e - data->r_s;
453: for(i=0; i<n; i++) {
454: MatMult(data->d->A,x[i],y[i]);
455: }
457: for(i=0; i<n; i++) {
458: #if !defined(PETSC_USE_COMPLEX)
459: if(data->d->eigi[data->r_s+i] != 0.0) {
460: if (data->d->B) {
461: MatMult(data->d->B,x[i],auxV[0]);
462: MatMult(data->d->B,x[i+1],auxV[1]);
463: Bx = auxV;
464: } else {
465: Bx = &x[i];
466: }
468: /* y_i <- [ t_2i+1*A*x_i - t_2i*Bx_i + ti_i*Bx_i+1;
469: y_i+1 t_2i+1*A*x_i+1 - ti_i*Bx_i - t_2i*Bx_i+1 ] */
470: VecAXPBYPCZ(y[i],-data->theta[2*i],data->thetai[i],data->theta[2*i+1],Bx[0],Bx[1]);
471: VecAXPBYPCZ(y[i+1],-data->thetai[i],-data->theta[2*i],data->theta[2*i+1],Bx[0],Bx[1]);
472: i++;
473: } else
474: #endif
475: {
476: if (data->d->B) {
477: MatMult(data->d->B,x[i],auxV[0]);
478: Bx = auxV;
479: } else {
480: Bx = &x[i];
481: }
482: VecAXPBY(y[i],-data->theta[i*2],data->theta[i*2+1],Bx[0]);
483: }
484: }
485: return(0);
486: }
490: /* y <- theta[1]'*A'*x - theta[0]'*B'*x
491: auxV, two auxiliary vectors */
494: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmulttrans(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV)
495: {
496: PetscErrorCode ierr;
497: PetscInt n,i;
498: const Vec *Bx;
501: n = data->r_e - data->r_s;
502: for(i=0; i<n; i++) {
503: MatMultTranspose(data->d->A,x[i],y[i]);
504: }
506: for(i=0; i<n; i++) {
507: #if !defined(PETSC_USE_COMPLEX)
508: if(data->d->eigi[data->r_s+i] != 0.0) {
509: if (data->d->B) {
510: MatMultTranspose(data->d->B,x[i],auxV[0]);
511: MatMultTranspose(data->d->B,x[i+1],auxV[1]);
512: Bx = auxV;
513: } else {
514: Bx = &x[i];
515: }
517: /* y_i <- [ t_2i+1*A*x_i - t_2i*Bx_i - ti_i*Bx_i+1;
518: y_i+1 t_2i+1*A*x_i+1 + ti_i*Bx_i - t_2i*Bx_i+1 ] */
519: VecAXPBYPCZ(y[i],-data->theta[2*i],-data->thetai[i],data->theta[2*i+1],Bx[0],Bx[1]);
520: VecAXPBYPCZ(y[i+1],data->thetai[i],-data->theta[2*i],data->theta[2*i+1],Bx[0],Bx[1]);
521: i++;
522: } else
523: #endif
524: {
525: if (data->d->B) {
526: MatMultTranspose(data->d->B,x[i],auxV[0]);
527: Bx = auxV;
528: } else {
529: Bx = &x[i];
530: }
531: VecAXPBY(y[i],PetscConj(-data->theta[i*2]),PetscConj(data->theta[i*2+1]),Bx[0]);
532: }
533: }
534: return(0);
535: }
540: PetscErrorCode dvd_pcapplyba(PC pc,PCSide side,Vec in,Vec out,Vec w)
541: {
542: PetscErrorCode ierr;
543: dvdImprovex_jd *data;
544: PetscInt n,i;
545: const Vec *inx,*outx,*wx;
546: Mat A;
550: PCGetOperators(pc,&A,PETSC_NULL,PETSC_NULL);
551: MatShellGetContext(A,(void**)&data);
552: VecCompGetSubVecs(in,PETSC_NULL,&inx);
553: VecCompGetSubVecs(out,PETSC_NULL,&outx);
554: VecCompGetSubVecs(w,PETSC_NULL,&wx);
555: n = data->r_e - data->r_s;
557: /* Check auxiliary vectors */
558: if (&data->auxV[n] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");
560: switch(side) {
561: case PC_LEFT:
562: /* aux <- theta[1]A*in - theta[0]*B*in */
563: dvd_aux_matmult(data,inx,data->auxV,outx);
564:
565: /* out <- K * aux */
566: for(i=0; i<n; i++) {
567: data->d->improvex_precond(data->d,data->r_s+i,data->auxV[i],outx[i]);
568: }
569: break;
570: case PC_RIGHT:
571: /* aux <- K * in */
572: for(i=0; i<n; i++) {
573: data->d->improvex_precond(data->d,data->r_s+i,inx[i],data->auxV[i]);
574: }
575:
576: /* out <- theta[1]A*auxV - theta[0]*B*auxV */
577: dvd_aux_matmult(data,data->auxV,outx,wx);
578: break;
579: case PC_SYMMETRIC:
580: /* aux <- K^{1/2} * in */
581: for(i=0; i<n; i++) {
582: PCApplySymmetricRight(data->old_pc,inx[i],data->auxV[i]);
583: }
584:
585: /* out <- theta[1]A*auxV - theta[0]*B*auxV */
586: dvd_aux_matmult(data,data->auxV,wx,outx);
587:
588: /* aux <- K^{1/2} * in */
589: for(i=0; i<n; i++) {
590: PCApplySymmetricLeft(data->old_pc,wx[i],outx[i]);
591: }
592: break;
593: default:
594: SETERRQ(PETSC_COMM_SELF,1, "Unsupported KSP side");
595: }
597: /* out <- out - v*(u'*out) */
598: dvd_improvex_apply_proj(data->d,(Vec*)outx,n,data->auxS);
600: return(0);
601: }
606: PetscErrorCode dvd_pcapply(PC pc,Vec in,Vec out)
607: {
608: PetscErrorCode ierr;
609: dvdImprovex_jd *data;
610: PetscInt n,i;
611: const Vec *inx, *outx;
612: Mat A;
616: PCGetOperators(pc,&A,PETSC_NULL,PETSC_NULL);
617: MatShellGetContext(A,(void**)&data);
618: VecCompGetSubVecs(in,PETSC_NULL,&inx);
619: VecCompGetSubVecs(out,PETSC_NULL,&outx);
620: n = data->r_e - data->r_s;
622: /* out <- K * in */
623: for(i=0; i<n; i++) {
624: data->d->improvex_precond(data->d,data->r_s+i,inx[i],outx[i]);
625: }
627: /* out <- out - v*(u'*out) */
628: dvd_improvex_apply_proj(data->d,(Vec*)outx,n,data->auxS);
630: return(0);
631: }
635: PetscErrorCode dvd_pcapplytrans(PC pc,Vec in,Vec out)
636: {
637: PetscErrorCode ierr;
638: dvdImprovex_jd *data;
639: PetscInt n,i;
640: const Vec *inx, *outx;
641: Mat A;
645: PCGetOperators(pc,&A,PETSC_NULL,PETSC_NULL);
646: MatShellGetContext(A,(void**)&data);
647: VecCompGetSubVecs(in,PETSC_NULL,&inx);
648: VecCompGetSubVecs(out,PETSC_NULL,&outx);
649: n = data->r_e - data->r_s;
651: /* Check auxiliary vectors */
652: if (&data->auxV[n] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");
654: /* auxV <- in */
655: for(i=0; i<n; i++) {
656: VecCopy(inx[i],data->auxV[i]);
657: }
658:
659: /* auxV <- auxV - u*(v'*auxV) */
660: dvd_improvex_applytrans_proj(data->d,data->auxV,n,data->auxS);
662: /* out <- K' * aux */
663: for(i=0; i<n; i++) {
664: PCApplyTranspose(data->old_pc,data->auxV[i],outx[i]);
665: }
667: return(0);
668: }
673: PetscErrorCode dvd_matmult_jd(Mat A,Vec in,Vec out)
674: {
675: PetscErrorCode ierr;
676: dvdImprovex_jd *data;
677: PetscInt n;
678: const Vec *inx, *outx;
679: PCSide side;
683: MatShellGetContext(A,(void**)&data);
684: VecCompGetSubVecs(in,PETSC_NULL,&inx);
685: VecCompGetSubVecs(out,PETSC_NULL,&outx);
686: n = data->r_e - data->r_s;
688: /* Check auxiliary vectors */
689: if (&data->auxV[2] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");
691: /* out <- theta[1]A*in - theta[0]*B*in */
692: dvd_aux_matmult(data,inx,outx,data->auxV);
694: KSPGetPCSide(data->ksp,&side);
695: if (side == PC_RIGHT) {
696: /* out <- out - v*(u'*out) */
697: dvd_improvex_apply_proj(data->d,(Vec*)outx,n,data->auxS);
698: }
700: return(0);
701: }
705: PetscErrorCode dvd_matmulttrans_jd(Mat A,Vec in,Vec out)
706: {
707: PetscErrorCode ierr;
708: dvdImprovex_jd *data;
709: PetscInt n,i;
710: const Vec *inx,*outx,*r,*auxV;
711: PCSide side;
715: MatShellGetContext(A,(void**)&data);
716: VecCompGetSubVecs(in,PETSC_NULL,&inx);
717: VecCompGetSubVecs(out,PETSC_NULL,&outx);
718: n = data->r_e - data->r_s;
720: /* Check auxiliary vectors */
721: if (&data->auxV[n+2] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");
723: KSPGetPCSide(data->ksp,&side);
724: if (side == PC_RIGHT) {
725: /* auxV <- in */
726: for(i=0; i<n; i++) {
727: VecCopy(inx[i],data->auxV[i]);
728: }
730: /* auxV <- auxV - v*(u'*auxV) */
731: dvd_improvex_applytrans_proj(data->d,data->auxV,n,data->auxS);
732: r = data->auxV;
733: auxV = data->auxV+n;
734: } else {
735: r = inx;
736: auxV = data->auxV;
737: }
739: /* out <- theta[1]A*r - theta[0]*B*r */
740: dvd_aux_matmulttrans(data,r,outx,auxV);
742: return(0);
743: }
748: PetscErrorCode dvd_matgetvecs_jd(Mat A, Vec *right, Vec *left)
749: {
750: PetscErrorCode ierr;
751: Vec *r, *l;
752: dvdImprovex_jd *data;
753: PetscInt n, i;
757: MatShellGetContext(A, (void**)&data);
758: n = data->ksp_max_size;
759: if (right) {
760: PetscMalloc(sizeof(Vec)*n, &r);
761: }
762: if (left) {
763: PetscMalloc(sizeof(Vec)*n, &l);
764: }
765: for (i=0; i<n; i++) {
766: MatGetVecs(data->d->A, right?&r[i]:PETSC_NULL,
767: left?&l[i]:PETSC_NULL);
768: }
769: if(right) {
770: VecCreateCompWithVecs(r, n, data->friends, right);
771: for (i=0; i<n; i++) {
772: VecDestroy(&r[i]);
773: }
774: }
775: if(left) {
776: VecCreateCompWithVecs(l, n, data->friends, left);
777: for (i=0; i<n; i++) {
778: VecDestroy(&l[i]);
779: }
780: }
782: if (right) {
783: PetscFree(r);
784: }
785: if (left) {
786: PetscFree(l);
787: }
789: return(0);
790: }
795: PetscErrorCode dvd_improvex_jd_proj_uv(dvdDashboard *d, dvdBlackboard *b,
796: ProjType_t p)
797: {
800: /* Setup the step */
801: if (b->state >= DVD_STATE_CONF) {
802: switch(p) {
803: case DVD_PROJ_KXX:
804: d->improvex_jd_proj_uv = dvd_improvex_jd_proj_uv_KXX; break;
805: case DVD_PROJ_KZX:
806: d->improvex_jd_proj_uv = dvd_improvex_jd_proj_uv_KZX; break;
807: }
808: }
810: return(0);
811: }
815: /*
816: Compute: u <- X, v <- K*(theta[0]*A+theta[1]*B)*X,
817: kr <- K^{-1}*(A-eig*B)*X, being X <- V*pX[i_s..i_e-1], Y <- W*pY[i_s..i_e-1]
818: where
819: auxV, 4*(i_e-i_s) auxiliar global vectors
820: pX,pY, the right and left eigenvectors of the projected system
821: ld, the leading dimension of pX and pY
822: auxS: max(8*bs*max_cX_in_proj,size_V*size_V)
823: */
824: PetscErrorCode dvd_improvex_jd_proj_cuv(dvdDashboard *d, PetscInt i_s,
825: PetscInt i_e, Vec **u, Vec **v, Vec *kr, Vec **auxV, PetscScalar **auxS,
826: PetscScalar *theta, PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY,
827: PetscInt ld)
828: {
829: #if defined(PETSC_MISSING_LAPACK_GETRF)
831: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable");
832: #else
833: PetscErrorCode ierr;
834: PetscInt n = i_e - i_s, size_KZ, V_new, rm, i, size_in;
835: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
836: PetscBLASInt s, ldXKZ, info;
837: DvdReduction r;
838: DvdReductionChunk
839: ops[2];
840: DvdMult_copy_func
841: sr[2];
845: /* Check consistency */
846: V_new = d->size_cX - data->size_cX;
847: if (V_new > data->old_size_X) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
848: data->old_size_X = n;
850: /* KZ <- KZ(rm:rm+max_cX-1) */
851: rm = PetscMax(V_new+data->size_KZ-d->max_cX_in_impr, 0);
852: for (i=0; i<d->max_cX_in_impr; i++) {
853: VecCopy(data->KZ[i+rm], data->KZ[i]);
854: }
855: data->size_cX = d->size_cX;
857: /* XKZ <- XKZ(rm:rm+max_cX-1,rm:rm+max_cX-1) */
858: for (i=0; i<d->max_cX_in_impr; i++) {
859: SlepcDenseCopy(&data->XKZ[i*data->ldXKZ+i], data->ldXKZ, &data->XKZ[(i+rm)*data->ldXKZ+i+rm], data->ldXKZ, data->size_KZ, 1);
860: }
861: data->size_KZ = PetscMin(d->max_cX_in_impr, data->size_KZ+V_new);
863: /* Compute X, KZ and KR */
864: *u = *auxV; *auxV+= n;
865: *v = &data->KZ[data->size_KZ];
866: d->improvex_jd_proj_uv(d, i_s, i_e, *u, *v, kr, *auxV, theta, thetai,
867: pX, pY, ld);
869: /* XKZ <- X'*KZ */
870: size_KZ = data->size_KZ+n;
871: size_in = 2*n*data->size_KZ+n*n;
872: SlepcAllReduceSumBegin(ops,2,*auxS,*auxS+size_in,size_in,&r,((PetscObject)d->V[0])->comm);
873: VecsMultS(data->XKZ,0,data->ldXKZ,d->V-data->size_KZ,0,data->size_KZ,data->KZ,data->size_KZ,size_KZ,&r,&sr[0]);
874: VecsMultS(&data->XKZ[data->size_KZ],0,data->ldXKZ,*u,0,n,data->KZ,0,size_KZ,&r,&sr[1]);
875: SlepcAllReduceSumEnd(&r);
877: /* iXKZ <- inv(XKZ) */
878: s = PetscBLASIntCast(size_KZ);
879: data->ldiXKZ = data->size_iXKZ = size_KZ;
880: data->iXKZ = *auxS; *auxS+= size_KZ*size_KZ;
881: data->iXKZPivots = (PetscBLASInt*)*auxS;
882: *auxS += FromIntToScalar(size_KZ);
883: SlepcDenseCopy(data->iXKZ,data->ldiXKZ,data->XKZ,data->ldXKZ,size_KZ,size_KZ);
884: ldXKZ = PetscBLASIntCast(data->ldiXKZ);
885: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
886: LAPACKgetrf_(&s, &s, data->iXKZ, &ldXKZ, data->iXKZPivots, &info);
887: PetscFPTrapPop();
888: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack XGETRF %d", info);
890: return(0);
891: #endif
892: }
894: #define DVD_COMPLEX_RAYLEIGH_QUOTIENT(ur,ui,Axr,Axi,Bxr,Bxi,eigr,eigi,b,ierr)\
895: { \
896: VecDot((Axr), (ur), &(b)[0]); /* r*A*r */ \
897: VecDot((Axr), (ui), &(b)[1]); /* i*A*r */ \
898: VecDot((Axi), (ur), &(b)[2]); /* r*A*i */ \
899: VecDot((Axi), (ui), &(b)[3]); /* i*A*i */ \
900: VecDot((Bxr), (ur), &(b)[4]); /* r*B*r */ \
901: VecDot((Bxr), (ui), &(b)[5]); /* i*B*r */ \
902: VecDot((Bxi), (ur), &(b)[6]); /* r*B*i */ \
903: VecDot((Bxi), (ui), &(b)[7]); /* i*B*i */ \
904: (b)[0] = (b)[0]+(b)[3]; /* rAr+iAi */ \
905: (b)[2] = (b)[2]-(b)[1]; /* rAi-iAr */ \
906: (b)[4] = (b)[4]+(b)[7]; /* rBr+iBi */ \
907: (b)[6] = (b)[6]-(b)[5]; /* rBi-iBr */ \
908: (b)[7] = (b)[4]*(b)[4] + (b)[6]*(b)[6]; /* k */ \
909: *(eigr) = ((b)[0]*(b)[4] + (b)[2]*(b)[6]) / (b)[7]; /* eig_r */ \
910: *(eigi) = ((b)[2]*(b)[4] - (b)[0]*(b)[6]) / (b)[7]; /* eig_i */ \
911: }
913: #if !defined(PETSC_USE_COMPLEX)
914: #define DVD_COMPUTE_N_RR(eps,i,i_s,n,eigr,eigi,u,Ax,Bx,b,ierr) \
915: for((i)=0; (i)<(n); (i)++) { \
916: if ((eigi)[(i_s)+(i)] != 0.0) { \
917: /* eig_r = [(rAr+iAi)*(rBr+iBi) + (rAi-iAr)*(rBi-iBr)]/k \
918: eig_i = [(rAi-iAr)*(rBr+iBi) - (rAr+iAi)*(rBi-iBr)]/k \
919: k = (rBr+iBi)*(rBr+iBi) + (rBi-iBr)*(rBi-iBr) */ \
920: DVD_COMPLEX_RAYLEIGH_QUOTIENT((u)[(i)], (u)[(i)+1], (Ax)[(i)], \
921: (Ax)[(i)+1], (Bx)[(i)], (Bx)[(i)+1], &(b)[8], &(b)[9], (b), (ierr)); \
922: if (PetscAbsScalar((eigr)[(i_s)+(i)] - (b)[8])/ \
923: PetscAbsScalar((eigr)[(i_s)+(i)]) > 1e-10 || \
924: PetscAbsScalar((eigi)[(i_s)+(i)] - (b)[9])/ \
925: PetscAbsScalar((eigi)[(i_s)+(i)]) > 1e-10 ) { \
926: (ierr) = PetscInfo4((eps), "The eigenvalue %G+%G is far from its "\
927: "Rayleigh quotient value %G+%G\n", \
928: (eigr)[(i_s)+(i)], \
929: (eigi)[(i_s)+1], (b)[8], (b)[9]); \
930: } \
931: (i)++; \
932: } \
933: }
934: #else
935: #define DVD_COMPUTE_N_RR(eps,i,i_s,n,eigr,eigi,u,Ax,Bx,b,ierr) \
936: for((i)=0; (i)<(n); (i)++) { \
937: (ierr) = VecDot((Ax)[(i)], (u)[(i)], &(b)[0]); \
938: (ierr) = VecDot((Bx)[(i)], (u)[(i)], &(b)[1]); \
939: (b)[0] = (b)[0]/(b)[1]; \
940: if (PetscAbsScalar((eigr)[(i_s)+(i)] - (b)[0])/ \
941: PetscAbsScalar((eigr)[(i_s)+(i)]) > 1e-10 ) { \
942: (ierr) = PetscInfo4((eps), "The eigenvalue %G+%G is far from its " \
943: "Rayleigh quotient value %G+%G\n", \
944: PetscRealPart((eigr)[(i_s)+(i)]), \
945: PetscImaginaryPart((eigr)[(i_s)+(i)]), PetscRealPart((b)[0]), \
946: PetscImaginaryPart((b)[0])); \
947: } \
948: }
949: #endif
954: /*
955: Compute: u <- X, v <- K*(theta[0]*A+theta[1]*B)*X,
956: kr <- K^{-1}*(A-eig*B)*X, being X <- V*pX[i_s..i_e-1], Y <- W*pY[i_s..i_e-1]
957: where
958: auxV, 4*(i_e-i_s) auxiliar global vectors
959: pX,pY, the right and left eigenvectors of the projected system
960: ld, the leading dimension of pX and pY
961: */
962: PetscErrorCode dvd_improvex_jd_proj_uv_KZX(dvdDashboard *d, PetscInt i_s,
963: PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
964: PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld)
965: {
966: PetscErrorCode ierr;
967: PetscInt n = i_e - i_s, i;
968: PetscScalar b[16];
969: Vec *Ax, *Bx, *r=auxV, X[4];
970: /* The memory manager doen't allow to call a subroutines */
971: PetscScalar Z[size_Z];
975: /* u <- X(i) */
976: dvd_improvex_compute_X(d,i_s,i_e,u,pX,ld);
978: /* v <- theta[0]A*u + theta[1]*B*u */
980: /* Bx <- B*X(i) */
981: Bx = kr;
982: if (d->BV) {
983: SlepcUpdateVectorsZ(Bx, 0.0, 1.0, d->BV-d->cX_in_H, d->size_BV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);
984: } else {
985: for(i=0; i<n; i++) {
986: if (d->B) {
987: MatMult(d->B, u[i], Bx[i]);
988: } else {
989: VecCopy(u[i], Bx[i]);
990: }
991: }
992: }
994: /* Ax <- A*X(i) */
995: Ax = r;
996: SlepcUpdateVectorsZ(Ax, 0.0, 1.0, d->AV-d->cX_in_H, d->size_AV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);
998: /* v <- Y(i) */
999: SlepcUpdateVectorsZ(v, 0.0, 1.0, (d->W?d->W:d->V)-d->cX_in_H, d->size_V+d->cX_in_H, &pY[ld*i_s], ld, d->size_H, n);
1001: /* Recompute the eigenvalue */
1002: DVD_COMPUTE_N_RR(d->eps, i, i_s, n, d->eigr, d->eigi, v, Ax, Bx, b, ierr);
1004: for(i=0; i<n; i++) {
1005: #if !defined(PETSC_USE_COMPLEX)
1006: if(d->eigi[i_s+i] != 0.0) {
1007: /* [r_i r_i+1 kr_i kr_i+1]*= [ theta_2i' 0 1 0
1008: 0 theta_2i' 0 1
1009: theta_2i+1 -thetai_i -eigr_i -eigi_i
1010: thetai_i theta_2i+1 eigi_i -eigr_i ] */
1011: b[0] = b[5] = PetscConj(theta[2*i]);
1012: b[2] = b[7] = -theta[2*i+1];
1013: b[6] = -(b[3] = thetai[i]);
1014: b[1] = b[4] = 0.0;
1015: b[8] = b[13] = 1.0/d->nX[i_s+i];
1016: b[10] = b[15] = -d->eigr[i_s+i]/d->nX[i_s+i];
1017: b[14] = -(b[11] = d->eigi[i_s+i]/d->nX[i_s+i]);
1018: b[9] = b[12] = 0.0;
1019: X[0] = Ax[i]; X[1] = Ax[i+1]; X[2] = Bx[i]; X[3] = Bx[i+1];
1020: SlepcUpdateVectorsD(X, 4, 1.0, b, 4, 4, 4, Z, size_Z);
1021:
1022: i++;
1023: } else
1024: #endif
1025: {
1026: /* [Ax_i Bx_i]*= [ theta_2i' 1/nX_i
1027: theta_2i+1 -eig_i/nX_i ] */
1028: b[0] = PetscConj(theta[i*2]);
1029: b[1] = theta[i*2+1];
1030: b[2] = 1.0/d->nX[i_s+i];
1031: b[3] = -d->eigr[i_s+i]/d->nX[i_s+i];
1032: X[0] = Ax[i]; X[1] = Bx[i];
1033: SlepcUpdateVectorsD(X, 2, 1.0, b, 2, 2, 2, Z, size_Z);
1034:
1035: }
1036: }
1037: for (i=0; i<n; i++) d->nX[i_s+i] = 1.0;
1039: /* v <- K^{-1} r = K^{-1}(theta_2i'*Ax + theta_2i+1*Bx) */
1040: for(i=0; i<n; i++) {
1041: d->improvex_precond(d, i_s+i, r[i], v[i]);
1042: }
1044: /* kr <- P*(Ax - eig_i*Bx) */
1045: d->calcpairs_proj_res(d, i_s, i_e, kr);
1047: return(0);
1048: }
1052: /*
1053: Compute: u <- K^{-1}*X, v <- X,
1054: kr <- K^{-1}*(A-eig*B)*X, being X <- V*pX[i_s..i_e-1]
1055: where
1056: auxV, 4*(i_e-i_s) auxiliar global vectors
1057: pX,pY, the right and left eigenvectors of the projected system
1058: ld, the leading dimension of pX and pY
1059: */
1060: PetscErrorCode dvd_improvex_jd_proj_uv_KXX(dvdDashboard *d, PetscInt i_s,
1061: PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
1062: PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld)
1063: {
1064: PetscErrorCode ierr;
1065: PetscInt n = i_e - i_s, i;
1066: PetscScalar b[16];
1067: Vec *Ax, *Bx, *r = auxV, X[4];
1068: /* The memory manager doen't allow to call a subroutines */
1069: PetscScalar Z[size_Z];
1073: /* [v u] <- X(i) Y(i) */
1074: dvd_improvex_compute_X(d,i_s,i_e,v,pX,ld);
1075: SlepcUpdateVectorsZ(u, 0.0, 1.0, (d->W?d->W:d->V)-d->cX_in_H, d->size_V+d->cX_in_H, &pY[ld*i_s], ld, d->size_H, n);
1077: /* Bx <- B*X(i) */
1078: Bx = r;
1079: if (d->BV) {
1080: SlepcUpdateVectorsZ(Bx, 0.0, 1.0, d->BV-d->cX_in_H, d->size_BV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);
1081: } else {
1082: if (d->B) {
1083: for(i=0; i<n; i++) {
1084: MatMult(d->B, v[i], Bx[i]);
1085: }
1086: } else
1087: Bx = v;
1088: }
1090: /* Ax <- A*X(i) */
1091: Ax = kr;
1092: SlepcUpdateVectorsZ(Ax, 0.0, 1.0, d->AV-d->cX_in_H, d->size_AV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);
1094: /* Recompute the eigenvalue */
1095: DVD_COMPUTE_N_RR(d->eps, i, i_s, n, d->eigr, d->eigi, u, Ax, Bx, b, ierr);
1097: for(i=0; i<n; i++) {
1098: if (d->eigi[i_s+i] == 0.0) {
1099: /* kr <- Ax -eig*Bx */
1100: VecAXPBY(kr[i], -d->eigr[i_s+i]/d->nX[i_s+i], 1.0/d->nX[i_s+i], Bx[i]);
1101: } else {
1102: /* [kr_i kr_i+1 r_i r_i+1]*= [ 1 0
1103: 0 1
1104: -eigr_i -eigi_i
1105: eigi_i -eigr_i] */
1106: b[0] = b[5] = 1.0/d->nX[i_s+i];
1107: b[2] = b[7] = -d->eigr[i_s+i]/d->nX[i_s+i];
1108: b[6] = -(b[3] = d->eigi[i_s+i]/d->nX[i_s+i]);
1109: b[1] = b[4] = 0.0;
1110: X[0] = kr[i]; X[1] = kr[i+1]; X[2] = r[i]; X[3] = r[i+1];
1111: SlepcUpdateVectorsD(X, 4, 1.0, b, 4, 4, 2, Z, size_Z);
1112:
1113: i++;
1114: }
1115: }
1116: for (i=0; i<n; i++) d->nX[i_s+i] = 1.0;
1118: /* kr <- P*kr */
1119: d->calcpairs_proj_res(d, i_s, i_e, r);
1121: /* u <- K^{-1} X(i) */
1122: for(i=0; i<n; i++) {
1123: d->improvex_precond(d, i_s+i, v[i], u[i]);
1124: }
1126: return(0);
1127: }
1132: PetscErrorCode dvd_improvex_jd_lit_const(dvdDashboard *d, dvdBlackboard *b,
1133: PetscInt maxits, PetscReal tol,
1134: PetscReal fix)
1135: {
1136: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
1139: /* Setup the step */
1140: if (b->state >= DVD_STATE_CONF) {
1141: data->maxits = maxits;
1142: data->tol = tol;
1143: data->fix = fix;
1144: d->improvex_jd_lit = dvd_improvex_jd_lit_const_0;
1145: }
1146: return(0);
1147: }
1152: PetscErrorCode dvd_improvex_jd_lit_const_0(dvdDashboard *d, PetscInt i,
1153: PetscScalar* theta, PetscScalar* thetai, PetscInt *maxits, PetscReal *tol)
1154: {
1155: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
1156: PetscReal a;
1159: a = SlepcAbsEigenvalue(d->eigr[i],d->eigi[i]);
1161: if (d->nR[i]/a < data->fix) {
1162: theta[0] = d->eigr[i];
1163: theta[1] = 1.0;
1164: #if !defined(PETSC_USE_COMPLEX)
1165: *thetai = d->eigi[i];
1166: #endif
1167: } else {
1168: theta[0] = d->target[0];
1169: theta[1] = d->target[1];
1170: #if !defined(PETSC_USE_COMPLEX)
1171: *thetai = 0.0;
1172: #endif
1173: }
1175: #if defined(PETSC_USE_COMPLEX)
1176: if(thetai) *thetai = 0.0;
1177: #endif
1178: *maxits = data->maxits;
1179: *tol = data->tol;
1180: return(0);
1181: }
1184: /**** Patterns implementation *************************************************/
1186: typedef PetscInt (*funcV0_t)(dvdDashboard*, PetscInt, PetscInt, Vec*);
1187: typedef PetscInt (*funcV1_t)(dvdDashboard*, PetscInt, PetscInt, Vec*,
1188: PetscScalar*, Vec);
1192: /* Compute D <- K^{-1} * funcV[r_s..r_e] */
1193: PetscErrorCode dvd_improvex_PfuncV(dvdDashboard *d, void *funcV, Vec *D,
1194: PetscInt max_size_D, PetscInt r_s, PetscInt r_e, Vec *auxV,
1195: PetscScalar *auxS)
1196: {
1197: PetscErrorCode ierr;
1198: PetscInt i;
1201: if (max_size_D >= r_e-r_s+1) {
1202: /* The optimized version needs one vector extra of D */
1203: /* D(1:r.size) = R(r_s:r_e-1) */
1204: if (auxS) ((funcV1_t)funcV)(d, r_s, r_e, D+1, auxS, auxV[0]);
1205: else ((funcV0_t)funcV)(d, r_s, r_e, D+1);
1207: /* D = K^{-1} * R */
1208: for (i=0; i<r_e-r_s; i++) {
1209: d->improvex_precond(d, i+r_s, D[i+1], D[i]);
1210: }
1211: } else if (max_size_D == r_e-r_s) {
1212: /* Non-optimized version */
1213: /* auxV <- R[r_e-1] */
1214: if (auxS) ((funcV1_t)funcV)(d, r_e-1, r_e, auxV, auxS, auxV[1]);
1215: else ((funcV0_t)funcV)(d, r_e-1, r_e, auxV);
1217: /* D(1:r.size-1) = R(r_s:r_e-2) */
1218: if (auxS) ((funcV1_t)funcV)(d, r_s, r_e-1, D+1, auxS, auxV[1]);
1219: else ((funcV0_t)funcV)(d, r_s, r_e-1, D+1);
1221: /* D = K^{-1} * R */
1222: for (i=0; i<r_e-r_s-1; i++) {
1223: d->improvex_precond(d, i+r_s, D[i+1], D[i]);
1224: }
1225: d->improvex_precond(d, r_e-1, auxV[0], D[r_e-r_s-1]);
1226: } else SETERRQ(PETSC_COMM_SELF,1, "Problem: r_e-r_s > max_size_D");
1227: return(0);
1228: }
1233: /* Compute (I - KZ*iXKZ*X')*V where,
1234: V, the vectors to apply the projector,
1235: cV, the number of vectors in V,
1236: auxS, auxiliar vector of size length 3*size_iXKZ*cV
1237: */
1238: PetscErrorCode dvd_improvex_apply_proj(dvdDashboard *d, Vec *V, PetscInt cV, PetscScalar *auxS)
1239: {
1240: #if defined(PETSC_MISSING_LAPACK_GETRS)
1242: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routines are unavailable");
1243: #else
1244: PetscErrorCode ierr;
1245: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
1246: PetscInt size_in = data->size_iXKZ*cV, i, ldh;
1247: PetscScalar *h, *in, *out;
1248: PetscBLASInt cV_, n, info, ld;
1249: DvdReduction r;
1250: DvdReductionChunk
1251: ops[4];
1252: DvdMult_copy_func
1253: sr[4];
1254:
1256: if (cV > 2) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
1258: /* h <- X'*V */
1259: h = auxS; in = h+size_in; out = in+size_in; ldh = data->size_iXKZ;
1260: SlepcAllReduceSumBegin(ops, 4, in, out, size_in, &r,
1261: ((PetscObject)d->V[0])->comm);
1262: for (i=0; i<cV; i++) {
1263: VecsMultS(&h[i*ldh],0,ldh,d->V-data->size_KZ,0,data->size_KZ,V+i,0,1,&r,&sr[i*2]);
1264: VecsMultS(&h[i*ldh+data->size_KZ],0,ldh,data->u,0,data->size_iXKZ-data->size_KZ,V+i,0,1,&r,&sr[i*2+1]);
1265: }
1266: SlepcAllReduceSumEnd(&r);
1268: /* h <- iXKZ\h */
1269: cV_ = PetscBLASIntCast(cV);
1270: n = PetscBLASIntCast(data->size_iXKZ);
1271: ld = PetscBLASIntCast(data->ldiXKZ);
1273: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1274: LAPACKgetrs_("N", &n, &cV_, data->iXKZ, &ld, data->iXKZPivots, h, &n, &info);
1275: PetscFPTrapPop();
1276: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack XGETRS %d", info);
1278: /* V <- V - KZ*h */
1279: for (i=0; i<cV; i++) {
1280: SlepcUpdateVectorsZ(V+i,1.0,-1.0,data->KZ,data->size_iXKZ,&h[ldh*i],ldh,data->size_iXKZ,1);
1281: }
1282: return(0);
1283: #endif
1284: }
1288: /* Compute (I - X*iXKZ*KZ')*V where,
1289: V, the vectors to apply the projector,
1290: cV, the number of vectors in V,
1291: auxS, auxiliar vector of size length 3*size_iXKZ*cV
1292: */
1293: PetscErrorCode dvd_improvex_applytrans_proj(dvdDashboard *d, Vec *V, PetscInt cV, PetscScalar *auxS)
1294: {
1295: #if defined(PETSC_MISSING_LAPACK_GETRS)
1297: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routines are unavailable");
1298: #else
1299: PetscErrorCode ierr;
1300: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
1301: PetscInt size_in = data->size_iXKZ*cV, i, ldh;
1302: PetscScalar *h, *in, *out;
1303: PetscBLASInt cV_, n, info, ld;
1304: DvdReduction r;
1305: DvdReductionChunk
1306: ops[2];
1307: DvdMult_copy_func
1308: sr[2];
1309:
1311: if (cV > 2) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
1313: /* h <- KZ'*V */
1314: h = auxS; in = h+size_in; out = in+size_in; ldh = data->size_iXKZ;
1315: SlepcAllReduceSumBegin(ops, 2, in, out, size_in, &r,
1316: ((PetscObject)d->V[0])->comm);
1317: for (i=0; i<cV; i++) {
1318: VecsMultS(&h[i*ldh],0,ldh,data->KZ,0,data->size_KZ,V+i,0,1,&r,&sr[i]);
1319: }
1320: SlepcAllReduceSumEnd(&r);
1322: /* h <- iXKZ\h */
1323: cV_ = PetscBLASIntCast(cV);
1324: n = PetscBLASIntCast(data->size_iXKZ);
1325: ld = PetscBLASIntCast(data->ldiXKZ);
1327: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1328: LAPACKgetrs_("C", &n, &cV_, data->iXKZ, &ld, data->iXKZPivots, h, &n, &info);
1329: PetscFPTrapPop();
1330: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack XGETRS %d", info);
1332: /* V <- V - X*h */
1333: for (i=0; i<cV; i++) {
1334: SlepcUpdateVectorsZ(V+i,1.0,-1.0,d->V-data->size_KZ,data->size_KZ,&h[ldh*i],ldh,data->size_KZ,1);
1335: SlepcUpdateVectorsZ(V+i,1.0,-1.0,data->u,data->size_iXKZ-data->size_KZ,&h[ldh*i+data->size_KZ],ldh,data->size_iXKZ-data->size_KZ,1);
1336: }
1337: return(0);
1338: #endif
1339: }