Actual source code: nrefine.c
slepc-3.22.2 2024-12-02
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: Newton refinement for polynomial eigenproblems.
13: References:
15: [1] T. Betcke and D. Kressner, "Perturbation, extraction and refinement
16: of invariant pairs for matrix polynomials", Linear Algebra Appl.
17: 435(3):514-536, 2011.
19: [2] C. Campos and J.E. Roman, "Parallel iterative refinement in
20: polynomial eigenvalue problems", Numer. Linear Algebra Appl. 23(4):
21: 730-745, 2016.
22: */
24: #include <slepc/private/pepimpl.h>
25: #include <slepcblaslapack.h>
27: typedef struct {
28: Mat *A,M1;
29: BV V,M2,M3,W;
30: PetscInt k,nmat;
31: PetscScalar *fih,*work,*M4;
32: PetscBLASInt *pM4;
33: PetscBool compM1;
34: Vec t;
35: } PEP_REFINE_MATSHELL;
37: typedef struct {
38: Mat E[2],M1;
39: Vec tN,ttN,t1,vseq;
40: VecScatter scatterctx;
41: PetscBool compM1;
42: PetscInt *map0,*map1,*idxg,*idxp;
43: PetscSubcomm subc;
44: VecScatter scatter_sub;
45: VecScatter *scatter_id,*scatterp_id;
46: Mat *A;
47: BV V,W,M2,M3,Wt;
48: PetscScalar *M4,*w,*wt,*d,*dt;
49: Vec t,tg,Rv,Vi,tp,tpg;
50: PetscInt idx,*cols;
51: } PEP_REFINE_EXPLICIT;
53: static PetscErrorCode MatMult_FS(Mat M ,Vec x,Vec y)
54: {
55: PEP_REFINE_MATSHELL *ctx;
56: PetscInt k,i;
57: PetscScalar *c;
58: PetscBLASInt k_,one=1,info;
60: PetscFunctionBegin;
61: PetscCall(MatShellGetContext(M,&ctx));
62: PetscCall(VecCopy(x,ctx->t));
63: k = ctx->k;
64: c = ctx->work;
65: PetscCall(PetscBLASIntCast(k,&k_));
66: PetscCall(MatMult(ctx->M1,x,y));
67: PetscCall(VecConjugate(ctx->t));
68: PetscCall(BVDotVec(ctx->M3,ctx->t,c));
69: for (i=0;i<k;i++) c[i] = PetscConj(c[i]);
70: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
71: PetscCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,c,&k_,&info));
72: PetscCall(PetscFPTrapPop());
73: SlepcCheckLapackInfo("getrs",info);
74: PetscCall(BVMultVec(ctx->M2,-1.0,1.0,y,c));
75: PetscFunctionReturn(PETSC_SUCCESS);
76: }
78: /*
79: Evaluates the first d elements of the polynomial basis
80: on a given matrix H which is considered to be triangular
81: */
82: static PetscErrorCode PEPEvaluateBasisforMatrix(PEP pep,PetscInt nm,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH)
83: {
84: PetscInt i,j,ldfh=nm*k,off,nmat=pep->nmat;
85: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat,t;
86: PetscScalar corr=0.0,alpha,beta;
87: PetscBLASInt k_,ldh_,ldfh_;
89: PetscFunctionBegin;
90: PetscCall(PetscBLASIntCast(ldh,&ldh_));
91: PetscCall(PetscBLASIntCast(k,&k_));
92: PetscCall(PetscBLASIntCast(ldfh,&ldfh_));
93: PetscCall(PetscArrayzero(fH,nm*k*k));
94: if (nm>0) for (j=0;j<k;j++) fH[j+j*ldfh] = 1.0;
95: if (nm>1) {
96: t = b[0]/a[0];
97: off = k;
98: for (j=0;j<k;j++) {
99: for (i=0;i<k;i++) fH[off+i+j*ldfh] = H[i+j*ldh]/a[0];
100: fH[j+j*ldfh] -= t;
101: }
102: }
103: for (i=2;i<nm;i++) {
104: off = i*k;
105: if (i==2) {
106: for (j=0;j<k;j++) {
107: fH[off+j+j*ldfh] = 1.0;
108: H[j+j*ldh] -= b[1];
109: }
110: } else {
111: for (j=0;j<k;j++) {
112: PetscCall(PetscArraycpy(fH+off+j*ldfh,fH+(i-2)*k+j*ldfh,k));
113: H[j+j*ldh] += corr-b[i-1];
114: }
115: }
116: corr = b[i-1];
117: beta = -g[i-1]/a[i-1];
118: alpha = 1/a[i-1];
119: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&alpha,H,&ldh_,fH+(i-1)*k,&ldfh_,&beta,fH+off,&ldfh_));
120: }
121: for (j=0;j<k;j++) H[j+j*ldh] += corr;
122: PetscFunctionReturn(PETSC_SUCCESS);
123: }
125: static PetscErrorCode NRefSysSetup_shell(PEP pep,PetscInt k,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,PEP_REFINE_MATSHELL *ctx)
126: {
127: PetscScalar *DHii,*T12,*Tr,*Ts,*array,s,ss,sone=1.0,zero=0.0,*M4=ctx->M4,t,*v,*T;
128: const PetscScalar *m3,*m2;
129: PetscInt i,d,j,nmat=pep->nmat,lda=nmat*k,deg=nmat-1,nloc,ld2,ld3;
130: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
131: PetscBLASInt k_,lda_,lds_,nloc_,ld2_,one=1,info;
132: Mat *A=ctx->A,Mk,M1=ctx->M1,P;
133: BV V=ctx->V,M2=ctx->M2,M3=ctx->M3,W=ctx->W;
134: MatStructure str;
135: Vec vc;
137: PetscFunctionBegin;
138: PetscCall(STGetMatStructure(pep->st,&str));
139: PetscCall(PetscMalloc3(nmat*k*k,&T12,k*k,&Tr,PetscMax(k*k,nmat),&Ts));
140: DHii = T12;
141: PetscCall(PetscArrayzero(DHii,k*k*nmat));
142: for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
143: for (d=2;d<nmat;d++) {
144: for (j=0;j<k;j++) {
145: for (i=0;i<k;i++) {
146: DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/(a[d-1]);
147: }
148: }
149: }
150: /* T11 */
151: if (!ctx->compM1) {
152: PetscCall(MatCopy(A[0],M1,DIFFERENT_NONZERO_PATTERN));
153: PetscCall(PEPEvaluateBasis(pep,h,0,Ts,NULL));
154: for (j=1;j<nmat;j++) PetscCall(MatAXPY(M1,Ts[j],A[j],str));
155: }
157: /* T22 */
158: PetscCall(PetscBLASIntCast(lds,&lds_));
159: PetscCall(PetscBLASIntCast(k,&k_));
160: PetscCall(PetscBLASIntCast(lda,&lda_));
161: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
162: for (i=1;i<deg;i++) {
163: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
164: s = (i==1)?0.0:1.0;
165: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,M4,&k_));
166: }
167: for (i=0;i<k;i++) for (j=0;j<i;j++) { t=M4[i+j*k];M4[i+j*k]=M4[j+i*k];M4[j+i*k]=t; }
169: /* T12 */
170: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&Mk));
171: for (i=1;i<nmat;i++) {
172: PetscCall(MatDenseGetArrayWrite(Mk,&array));
173: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,array,&k_));
174: PetscCall(MatDenseRestoreArrayWrite(Mk,&array));
175: PetscCall(BVSetActiveColumns(W,0,k));
176: PetscCall(BVMult(W,1.0,0.0,V,Mk));
177: if (i==1) PetscCall(BVMatMult(W,A[i],M2));
178: else {
179: PetscCall(BVMatMult(W,A[i],M3)); /* using M3 as work space */
180: PetscCall(BVMult(M2,1.0,1.0,M3,NULL));
181: }
182: }
184: /* T21 */
185: PetscCall(MatDenseGetArrayWrite(Mk,&array));
186: for (i=1;i<deg;i++) {
187: s = (i==1)?0.0:1.0;
188: ss = PetscConj(fh[i]);
189: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,array,&k_));
190: }
191: PetscCall(MatDenseRestoreArrayWrite(Mk,&array));
192: PetscCall(BVSetActiveColumns(M3,0,k));
193: PetscCall(BVMult(M3,1.0,0.0,V,Mk));
194: for (i=0;i<k;i++) {
195: PetscCall(BVGetColumn(M3,i,&vc));
196: PetscCall(VecConjugate(vc));
197: PetscCall(BVRestoreColumn(M3,i,&vc));
198: }
199: PetscCall(MatDestroy(&Mk));
200: PetscCall(PetscFree3(T12,Tr,Ts));
202: PetscCall(VecGetLocalSize(ctx->t,&nloc));
203: PetscCall(PetscBLASIntCast(nloc,&nloc_));
204: PetscCall(PetscMalloc1(nloc*k,&T));
205: PetscCall(KSPGetOperators(pep->refineksp,NULL,&P));
206: if (!ctx->compM1) PetscCall(MatCopy(ctx->M1,P,SAME_NONZERO_PATTERN));
207: PetscCall(BVGetArrayRead(ctx->M2,&m2));
208: PetscCall(BVGetLeadingDimension(ctx->M2,&ld2));
209: PetscCall(PetscBLASIntCast(ld2,&ld2_));
210: PetscCall(BVGetArrayRead(ctx->M3,&m3));
211: PetscCall(BVGetLeadingDimension(ctx->M3,&ld3));
212: PetscCall(VecGetArray(ctx->t,&v));
213: for (i=0;i<nloc;i++) for (j=0;j<k;j++) T[j+i*k] = m3[i+j*ld3];
214: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
215: PetscCallBLAS("LAPACKgesv",LAPACKgesv_(&k_,&nloc_,ctx->M4,&k_,ctx->pM4,T,&k_,&info));
216: PetscCall(PetscFPTrapPop());
217: SlepcCheckLapackInfo("gesv",info);
218: for (i=0;i<nloc;i++) v[i] = BLASdot_(&k_,m2+i,&ld2_,T+i*k,&one);
219: PetscCall(VecRestoreArray(ctx->t,&v));
220: PetscCall(BVRestoreArrayRead(ctx->M2,&m2));
221: PetscCall(BVRestoreArrayRead(ctx->M3,&m3));
222: PetscCall(MatDiagonalSet(P,ctx->t,ADD_VALUES));
223: PetscCall(PetscFree(T));
224: PetscCall(KSPSetUp(pep->refineksp));
225: PetscFunctionReturn(PETSC_SUCCESS);
226: }
228: static PetscErrorCode NRefSysSolve_shell(KSP ksp,PetscInt nmat,Vec Rv,PetscScalar *Rh,PetscInt k,Vec dVi,PetscScalar *dHi)
229: {
230: PetscScalar *t0;
231: PetscBLASInt k_,one=1,info,lda_;
232: PetscInt i,lda=nmat*k;
233: Mat M;
234: PEP_REFINE_MATSHELL *ctx;
236: PetscFunctionBegin;
237: PetscCall(KSPGetOperators(ksp,&M,NULL));
238: PetscCall(MatShellGetContext(M,&ctx));
239: PetscCall(PetscCalloc1(k,&t0));
240: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
241: PetscCall(PetscBLASIntCast(lda,&lda_));
242: PetscCall(PetscBLASIntCast(k,&k_));
243: for (i=0;i<k;i++) t0[i] = Rh[i];
244: PetscCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,t0,&k_,&info));
245: SlepcCheckLapackInfo("getrs",info);
246: PetscCall(BVMultVec(ctx->M2,-1.0,1.0,Rv,t0));
247: PetscCall(KSPSolve(ksp,Rv,dVi));
248: PetscCall(VecConjugate(dVi));
249: PetscCall(BVDotVec(ctx->M3,dVi,dHi));
250: PetscCall(VecConjugate(dVi));
251: for (i=0;i<k;i++) dHi[i] = Rh[i]-PetscConj(dHi[i]);
252: PetscCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,dHi,&k_,&info));
253: SlepcCheckLapackInfo("getrs",info);
254: PetscCall(PetscFPTrapPop());
255: PetscCall(PetscFree(t0));
256: PetscFunctionReturn(PETSC_SUCCESS);
257: }
259: /*
260: Computes the residual P(H,V*S)*e_j for the polynomial
261: */
262: static PetscErrorCode NRefRightSide(PetscInt nmat,PetscReal *pcf,Mat *A,PetscInt k,BV V,PetscScalar *S,PetscInt lds,PetscInt j,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *DfH,PetscScalar *dH,BV dV,PetscScalar *dVS,PetscInt rds,Vec Rv,PetscScalar *Rh,BV W,Vec t)
263: {
264: PetscScalar *DS0,*DS1,*F,beta=0.0,sone=1.0,none=-1.0,tt=0.0,*h,zero=0.0,*Z,*c0;
265: PetscReal *a=pcf,*b=pcf+nmat,*g=b+nmat;
266: PetscInt i,ii,jj,lda;
267: PetscBLASInt lda_,k_,ldh_,lds_,nmat_,k2_,krds_,j_,one=1;
268: Mat M0;
269: Vec w;
271: PetscFunctionBegin;
272: PetscCall(PetscMalloc4(k*nmat,&h,k*k,&DS0,k*k,&DS1,k*k,&Z));
273: lda = k*nmat;
274: PetscCall(PetscBLASIntCast(k,&k_));
275: PetscCall(PetscBLASIntCast(lds,&lds_));
276: PetscCall(PetscBLASIntCast(lda,&lda_));
277: PetscCall(PetscBLASIntCast(nmat,&nmat_));
278: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&nmat_,&k_,&sone,S,&lds_,fH+j*lda,&k_,&zero,h,&k_));
279: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,k,nmat,h,&M0));
280: PetscCall(BVSetActiveColumns(W,0,nmat));
281: PetscCall(BVMult(W,1.0,0.0,V,M0));
282: PetscCall(MatDestroy(&M0));
284: PetscCall(BVGetColumn(W,0,&w));
285: PetscCall(MatMult(A[0],w,Rv));
286: PetscCall(BVRestoreColumn(W,0,&w));
287: for (i=1;i<nmat;i++) {
288: PetscCall(BVGetColumn(W,i,&w));
289: PetscCall(MatMult(A[i],w,t));
290: PetscCall(BVRestoreColumn(W,i,&w));
291: PetscCall(VecAXPY(Rv,1.0,t));
292: }
293: /* Update right-hand side */
294: if (j) {
295: PetscCall(PetscBLASIntCast(ldh,&ldh_));
296: PetscCall(PetscArrayzero(Z,k*k));
297: PetscCall(PetscArrayzero(DS0,k*k));
298: PetscCall(PetscArraycpy(Z+(j-1)*k,dH+(j-1)*k,k));
299: /* Update DfH */
300: for (i=1;i<nmat;i++) {
301: if (i>1) {
302: beta = -g[i-1];
303: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,fH+(i-1)*k,&lda_,Z,&k_,&beta,DS0,&k_));
304: tt += -b[i-1];
305: for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
306: tt = b[i-1];
307: beta = 1.0/a[i-1];
308: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&beta,DS1,&k_,H,&ldh_,&beta,DS0,&k_));
309: F = DS0; DS0 = DS1; DS1 = F;
310: } else {
311: PetscCall(PetscArrayzero(DS1,k*k));
312: for (ii=0;ii<k;ii++) DS1[ii+(j-1)*k] = Z[ii+(j-1)*k]/a[0];
313: }
314: for (jj=j;jj<k;jj++) {
315: for (ii=0;ii<k;ii++) DfH[k*i+ii+jj*lda] += DS1[ii+jj*k];
316: }
317: }
318: for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
319: /* Update right-hand side */
320: PetscCall(PetscBLASIntCast(2*k,&k2_));
321: PetscCall(PetscBLASIntCast(j,&j_));
322: PetscCall(PetscBLASIntCast(k+rds,&krds_));
323: c0 = DS0;
324: PetscCall(PetscArrayzero(Rh,k));
325: for (i=0;i<nmat;i++) {
326: PetscCallBLAS("BLASgemv",BLASgemv_("N",&krds_,&j_,&sone,dVS,&k2_,fH+j*lda+i*k,&one,&zero,h,&one));
327: PetscCallBLAS("BLASgemv",BLASgemv_("N",&k_,&k_,&sone,S,&lds_,DfH+i*k+j*lda,&one,&sone,h,&one));
328: PetscCall(BVMultVec(V,1.0,0.0,t,h));
329: PetscCall(BVSetActiveColumns(dV,0,rds));
330: PetscCall(BVMultVec(dV,1.0,1.0,t,h+k));
331: PetscCall(BVGetColumn(W,i,&w));
332: PetscCall(MatMult(A[i],t,w));
333: PetscCall(BVRestoreColumn(W,i,&w));
334: if (i>0 && i<nmat-1) {
335: PetscCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&sone,S,&lds_,h,&one,&zero,c0,&one));
336: PetscCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&none,fH+i*k,&lda_,c0,&one,&sone,Rh,&one));
337: }
338: }
340: for (i=0;i<nmat;i++) h[i] = -1.0;
341: PetscCall(BVMultVec(W,1.0,1.0,Rv,h));
342: }
343: PetscCall(PetscFree4(h,DS0,DS1,Z));
344: PetscFunctionReturn(PETSC_SUCCESS);
345: }
347: static PetscErrorCode NRefSysSolve_mbe(PetscInt k,PetscInt sz,BV W,PetscScalar *w,BV Wt,PetscScalar *wt,PetscScalar *d,PetscScalar *dt,KSP ksp,BV T2,BV T3 ,PetscScalar *T4,PetscBool trans,Vec x1,PetscScalar *x2,Vec sol1,PetscScalar *sol2,Vec vw)
348: {
349: PetscInt i,j,incf,incc;
350: PetscScalar *y,*g,*xx2,*ww,y2,*dd;
351: Vec v,t,xx1;
352: BV WW,T;
354: PetscFunctionBegin;
355: PetscCall(PetscMalloc3(sz,&y,sz,&g,k,&xx2));
356: if (trans) {
357: WW = W; ww = w; dd = d; T = T3; incf = 0; incc = 1;
358: } else {
359: WW = Wt; ww = wt; dd = dt; T = T2; incf = 1; incc = 0;
360: }
361: xx1 = vw;
362: PetscCall(VecCopy(x1,xx1));
363: PetscCall(PetscArraycpy(xx2,x2,sz));
364: PetscCall(PetscArrayzero(sol2,k));
365: for (i=sz-1;i>=0;i--) {
366: PetscCall(BVGetColumn(WW,i,&v));
367: PetscCall(VecConjugate(v));
368: PetscCall(VecDot(xx1,v,y+i));
369: PetscCall(VecConjugate(v));
370: PetscCall(BVRestoreColumn(WW,i,&v));
371: for (j=0;j<i;j++) y[i] += ww[j+i*k]*xx2[j];
372: y[i] = -(y[i]-xx2[i])/dd[i];
373: PetscCall(BVGetColumn(T,i,&t));
374: PetscCall(VecAXPY(xx1,-y[i],t));
375: PetscCall(BVRestoreColumn(T,i,&t));
376: for (j=0;j<=i;j++) xx2[j] -= y[i]*T4[j*incf+incc*i+(i*incf+incc*j)*k];
377: g[i] = xx2[i];
378: }
379: if (trans) PetscCall(KSPSolveTranspose(ksp,xx1,sol1));
380: else PetscCall(KSPSolve(ksp,xx1,sol1));
381: if (trans) {
382: WW = Wt; ww = wt; dd = dt; T = T2; incf = 1; incc = 0;
383: } else {
384: WW = W; ww = w; dd = d; T = T3; incf = 0; incc = 1;
385: }
386: for (i=0;i<sz;i++) {
387: PetscCall(BVGetColumn(T,i,&t));
388: PetscCall(VecConjugate(t));
389: PetscCall(VecDot(sol1,t,&y2));
390: PetscCall(VecConjugate(t));
391: PetscCall(BVRestoreColumn(T,i,&t));
392: for (j=0;j<i;j++) y2 += sol2[j]*T4[j*incf+incc*i+(i*incf+incc*j)*k];
393: y2 = (g[i]-y2)/dd[i];
394: PetscCall(BVGetColumn(WW,i,&v));
395: PetscCall(VecAXPY(sol1,-y2,v));
396: for (j=0;j<i;j++) sol2[j] -= ww[j+i*k]*y2;
397: sol2[i] = y[i]+y2;
398: PetscCall(BVRestoreColumn(WW,i,&v));
399: }
400: PetscCall(PetscFree3(y,g,xx2));
401: PetscFunctionReturn(PETSC_SUCCESS);
402: }
404: static PetscErrorCode NRefSysSetup_mbe(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,BV V,PEP_REFINE_EXPLICIT *matctx)
405: {
406: PetscInt i,j,l,nmat=pep->nmat,lda=nmat*k,deg=nmat-1;
407: Mat M1=matctx->M1,*A,*At,Mk;
408: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
409: PetscScalar s,ss,*DHii,*T12,*array,*Ts,*Tr,*M4=matctx->M4,sone=1.0,zero=0.0;
410: PetscScalar *w=matctx->w,*wt=matctx->wt,*d=matctx->d,*dt=matctx->dt;
411: PetscBLASInt lds_,lda_,k_;
412: MatStructure str;
413: PetscBool flg;
414: BV M2=matctx->M2,M3=matctx->M3,W=matctx->W,Wt=matctx->Wt;
415: Vec vc,vc2;
417: PetscFunctionBegin;
418: PetscCall(PetscMalloc3(nmat*k*k,&T12,k*k,&Tr,PetscMax(k*k,nmat),&Ts));
419: PetscCall(STGetMatStructure(pep->st,&str));
420: PetscCall(STGetTransform(pep->st,&flg));
421: if (flg) {
422: PetscCall(PetscMalloc1(pep->nmat,&At));
423: for (i=0;i<pep->nmat;i++) PetscCall(STGetMatrixTransformed(pep->st,i,&At[i]));
424: } else At = pep->A;
425: if (matctx->subc) A = matctx->A;
426: else A = At;
427: /* Form the explicit system matrix */
428: DHii = T12;
429: PetscCall(PetscArrayzero(DHii,k*k*nmat));
430: for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
431: for (l=2;l<nmat;l++) {
432: for (j=0;j<k;j++) {
433: for (i=0;i<k;i++) {
434: DHii[l*k+i+j*lda] = ((h-b[l-1])*DHii[(l-1)*k+i+j*lda]+fH[(l-1)*k+i+j*lda]-g[l-1]*DHii[(l-2)*k+i+j*lda])/a[l-1];
435: }
436: }
437: }
439: /* T11 */
440: if (!matctx->compM1) {
441: PetscCall(MatCopy(A[0],M1,DIFFERENT_NONZERO_PATTERN));
442: PetscCall(PEPEvaluateBasis(pep,h,0,Ts,NULL));
443: for (j=1;j<nmat;j++) PetscCall(MatAXPY(M1,Ts[j],A[j],str));
444: }
446: /* T22 */
447: PetscCall(PetscBLASIntCast(lds,&lds_));
448: PetscCall(PetscBLASIntCast(k,&k_));
449: PetscCall(PetscBLASIntCast(lda,&lda_));
450: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
451: for (i=1;i<deg;i++) {
452: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
453: s = (i==1)?0.0:1.0;
454: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,M4,&k_));
455: }
457: /* T12 */
458: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&Mk));
459: for (i=1;i<nmat;i++) {
460: PetscCall(MatDenseGetArrayWrite(Mk,&array));
461: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,array,&k_));
462: PetscCall(MatDenseRestoreArrayWrite(Mk,&array));
463: PetscCall(BVSetActiveColumns(W,0,k));
464: PetscCall(BVMult(W,1.0,0.0,V,Mk));
465: if (i==1) PetscCall(BVMatMult(W,A[i],M2));
466: else {
467: PetscCall(BVMatMult(W,A[i],M3)); /* using M3 as work space */
468: PetscCall(BVMult(M2,1.0,1.0,M3,NULL));
469: }
470: }
472: /* T21 */
473: PetscCall(MatDenseGetArrayWrite(Mk,&array));
474: for (i=1;i<deg;i++) {
475: s = (i==1)?0.0:1.0;
476: ss = PetscConj(fh[i]);
477: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,array,&k_));
478: }
479: PetscCall(MatDenseRestoreArrayWrite(Mk,&array));
480: PetscCall(BVSetActiveColumns(M3,0,k));
481: PetscCall(BVMult(M3,1.0,0.0,V,Mk));
482: for (i=0;i<k;i++) {
483: PetscCall(BVGetColumn(M3,i,&vc));
484: PetscCall(VecConjugate(vc));
485: PetscCall(BVRestoreColumn(M3,i,&vc));
486: }
488: PetscCall(PEP_KSPSetOperators(ksp,M1,M1));
489: PetscCall(KSPSetUp(ksp));
490: PetscCall(MatDestroy(&Mk));
492: /* Set up for BEMW */
493: for (i=0;i<k;i++) {
494: PetscCall(BVGetColumn(M2,i,&vc));
495: PetscCall(BVGetColumn(W,i,&vc2));
496: PetscCall(NRefSysSolve_mbe(k,i,W,w,Wt,wt,d,dt,ksp,M2,M3,M4,PETSC_FALSE,vc,M4+i*k,vc2,w+i*k,matctx->t));
497: PetscCall(BVRestoreColumn(M2,i,&vc));
498: PetscCall(BVGetColumn(M3,i,&vc));
499: PetscCall(VecConjugate(vc));
500: PetscCall(VecDot(vc2,vc,&d[i]));
501: PetscCall(VecConjugate(vc));
502: PetscCall(BVRestoreColumn(M3,i,&vc));
503: for (j=0;j<i;j++) d[i] += M4[i+j*k]*w[j+i*k];
504: d[i] = M4[i+i*k]-d[i];
505: PetscCall(BVRestoreColumn(W,i,&vc2));
507: PetscCall(BVGetColumn(M3,i,&vc));
508: PetscCall(BVGetColumn(Wt,i,&vc2));
509: for (j=0;j<=i;j++) Ts[j] = M4[i+j*k];
510: PetscCall(NRefSysSolve_mbe(k,i,W,w,Wt,wt,d,dt,ksp,M2,M3,M4,PETSC_TRUE,vc,Ts,vc2,wt+i*k,matctx->t));
511: PetscCall(BVRestoreColumn(M3,i,&vc));
512: PetscCall(BVGetColumn(M2,i,&vc));
513: PetscCall(VecConjugate(vc2));
514: PetscCall(VecDot(vc,vc2,&dt[i]));
515: PetscCall(VecConjugate(vc2));
516: PetscCall(BVRestoreColumn(M2,i,&vc));
517: for (j=0;j<i;j++) dt[i] += M4[j+i*k]*wt[j+i*k];
518: dt[i] = M4[i+i*k]-dt[i];
519: PetscCall(BVRestoreColumn(Wt,i,&vc2));
520: }
522: if (flg) PetscCall(PetscFree(At));
523: PetscCall(PetscFree3(T12,Tr,Ts));
524: PetscFunctionReturn(PETSC_SUCCESS);
525: }
527: static PetscErrorCode NRefSysSetup_explicit(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,BV V,PEP_REFINE_EXPLICIT *matctx,BV W)
528: {
529: PetscInt i,j,d,n,n0,m0,n1,m1,nmat=pep->nmat,lda=nmat*k,deg=nmat-1;
530: PetscInt *idxg=matctx->idxg,*idxp=matctx->idxp,idx,ncols;
531: Mat M,*E=matctx->E,*A,*At,Mk,Md;
532: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
533: PetscScalar s,ss,*DHii,*T22,*T21,*T12,*Ts,*Tr,*array,*ts,sone=1.0,zero=0.0;
534: PetscBLASInt lds_,lda_,k_;
535: const PetscInt *idxmc;
536: const PetscScalar *valsc,*carray;
537: MatStructure str;
538: Vec vc,vc0;
539: PetscBool flg;
541: PetscFunctionBegin;
542: PetscCall(PetscMalloc5(k*k,&T22,k*k,&T21,nmat*k*k,&T12,k*k,&Tr,k*k,&Ts));
543: PetscCall(STGetMatStructure(pep->st,&str));
544: PetscCall(KSPGetOperators(ksp,&M,NULL));
545: PetscCall(MatGetOwnershipRange(E[1],&n1,&m1));
546: PetscCall(MatGetOwnershipRange(E[0],&n0,&m0));
547: PetscCall(MatGetOwnershipRange(M,&n,NULL));
548: PetscCall(PetscMalloc1(nmat,&ts));
549: PetscCall(STGetTransform(pep->st,&flg));
550: if (flg) {
551: PetscCall(PetscMalloc1(pep->nmat,&At));
552: for (i=0;i<pep->nmat;i++) PetscCall(STGetMatrixTransformed(pep->st,i,&At[i]));
553: } else At = pep->A;
554: if (matctx->subc) A = matctx->A;
555: else A = At;
556: /* Form the explicit system matrix */
557: DHii = T12;
558: PetscCall(PetscArrayzero(DHii,k*k*nmat));
559: for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
560: for (d=2;d<nmat;d++) {
561: for (j=0;j<k;j++) {
562: for (i=0;i<k;i++) {
563: DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/a[d-1];
564: }
565: }
566: }
568: /* T11 */
569: if (!matctx->compM1) {
570: PetscCall(MatCopy(A[0],E[0],DIFFERENT_NONZERO_PATTERN));
571: PetscCall(PEPEvaluateBasis(pep,h,0,Ts,NULL));
572: for (j=1;j<nmat;j++) PetscCall(MatAXPY(E[0],Ts[j],A[j],str));
573: }
574: for (i=n0;i<m0;i++) {
575: PetscCall(MatGetRow(E[0],i,&ncols,&idxmc,&valsc));
576: idx = n+i-n0;
577: for (j=0;j<ncols;j++) {
578: idxg[j] = matctx->map0[idxmc[j]];
579: }
580: PetscCall(MatSetValues(M,1,&idx,ncols,idxg,valsc,INSERT_VALUES));
581: PetscCall(MatRestoreRow(E[0],i,&ncols,&idxmc,&valsc));
582: }
584: /* T22 */
585: PetscCall(PetscBLASIntCast(lds,&lds_));
586: PetscCall(PetscBLASIntCast(k,&k_));
587: PetscCall(PetscBLASIntCast(lda,&lda_));
588: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
589: for (i=1;i<deg;i++) {
590: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
591: s = (i==1)?0.0:1.0;
592: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,T22,&k_));
593: }
594: for (j=0;j<k;j++) idxp[j] = matctx->map1[j];
595: for (i=0;i<m1-n1;i++) {
596: idx = n+m0-n0+i;
597: for (j=0;j<k;j++) {
598: Tr[j] = T22[n1+i+j*k];
599: }
600: PetscCall(MatSetValues(M,1,&idx,k,idxp,Tr,INSERT_VALUES));
601: }
603: /* T21 */
604: for (i=1;i<deg;i++) {
605: s = (i==1)?0.0:1.0;
606: ss = PetscConj(fh[i]);
607: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,T21,&k_));
608: }
609: PetscCall(BVSetActiveColumns(W,0,k));
610: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,k,k,T21,&Mk));
611: PetscCall(BVMult(W,1.0,0.0,V,Mk));
612: for (i=0;i<k;i++) {
613: PetscCall(BVGetColumn(W,i,&vc));
614: PetscCall(VecConjugate(vc));
615: PetscCall(VecGetArrayRead(vc,&carray));
616: idx = matctx->map1[i];
617: PetscCall(MatSetValues(M,1,&idx,m0-n0,matctx->map0+n0,carray,INSERT_VALUES));
618: PetscCall(VecRestoreArrayRead(vc,&carray));
619: PetscCall(BVRestoreColumn(W,i,&vc));
620: }
622: /* T12 */
623: for (i=1;i<nmat;i++) {
624: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,Ts,&k_));
625: for (j=0;j<k;j++) PetscCall(PetscArraycpy(T12+i*k+j*lda,Ts+j*k,k));
626: }
627: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,k,nmat-1,NULL,&Md));
628: for (i=0;i<nmat;i++) ts[i] = 1.0;
629: for (j=0;j<k;j++) {
630: PetscCall(MatDenseGetArrayWrite(Md,&array));
631: PetscCall(PetscArraycpy(array,T12+k+j*lda,(nmat-1)*k));
632: PetscCall(MatDenseRestoreArrayWrite(Md,&array));
633: PetscCall(BVSetActiveColumns(W,0,nmat-1));
634: PetscCall(BVMult(W,1.0,0.0,V,Md));
635: for (i=nmat-1;i>0;i--) {
636: PetscCall(BVGetColumn(W,i-1,&vc0));
637: PetscCall(BVGetColumn(W,i,&vc));
638: PetscCall(MatMult(A[i],vc0,vc));
639: PetscCall(BVRestoreColumn(W,i-1,&vc0));
640: PetscCall(BVRestoreColumn(W,i,&vc));
641: }
642: PetscCall(BVSetActiveColumns(W,1,nmat));
643: PetscCall(BVGetColumn(W,0,&vc0));
644: PetscCall(BVMultVec(W,1.0,0.0,vc0,ts));
645: PetscCall(VecGetArrayRead(vc0,&carray));
646: idx = matctx->map1[j];
647: PetscCall(MatSetValues(M,m0-n0,matctx->map0+n0,1,&idx,carray,INSERT_VALUES));
648: PetscCall(VecRestoreArrayRead(vc0,&carray));
649: PetscCall(BVRestoreColumn(W,0,&vc0));
650: }
651: PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
652: PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
653: PetscCall(PEP_KSPSetOperators(ksp,M,M));
654: PetscCall(KSPSetUp(ksp));
655: PetscCall(PetscFree(ts));
656: PetscCall(MatDestroy(&Mk));
657: PetscCall(MatDestroy(&Md));
658: if (flg) PetscCall(PetscFree(At));
659: PetscCall(PetscFree5(T22,T21,T12,Tr,Ts));
660: PetscFunctionReturn(PETSC_SUCCESS);
661: }
663: static PetscErrorCode NRefSysSolve_explicit(PetscInt k,KSP ksp,Vec Rv,PetscScalar *Rh,Vec dVi,PetscScalar *dHi,PEP_REFINE_EXPLICIT *matctx)
664: {
665: PetscInt n0,m0,n1,m1,i;
666: PetscScalar *arrayV;
667: const PetscScalar *array;
669: PetscFunctionBegin;
670: PetscCall(MatGetOwnershipRange(matctx->E[1],&n1,&m1));
671: PetscCall(MatGetOwnershipRange(matctx->E[0],&n0,&m0));
673: /* Right side */
674: PetscCall(VecGetArrayRead(Rv,&array));
675: PetscCall(VecSetValues(matctx->tN,m0-n0,matctx->map0+n0,array,INSERT_VALUES));
676: PetscCall(VecRestoreArrayRead(Rv,&array));
677: PetscCall(VecSetValues(matctx->tN,m1-n1,matctx->map1+n1,Rh+n1,INSERT_VALUES));
678: PetscCall(VecAssemblyBegin(matctx->tN));
679: PetscCall(VecAssemblyEnd(matctx->tN));
681: /* Solve */
682: PetscCall(KSPSolve(ksp,matctx->tN,matctx->ttN));
684: /* Retrieve solution */
685: PetscCall(VecGetArray(dVi,&arrayV));
686: PetscCall(VecGetArrayRead(matctx->ttN,&array));
687: PetscCall(PetscArraycpy(arrayV,array,m0-n0));
688: PetscCall(VecRestoreArray(dVi,&arrayV));
689: if (!matctx->subc) {
690: PetscCall(VecGetArray(matctx->t1,&arrayV));
691: for (i=0;i<m1-n1;i++) arrayV[i] = array[m0-n0+i];
692: PetscCall(VecRestoreArray(matctx->t1,&arrayV));
693: PetscCall(VecRestoreArrayRead(matctx->ttN,&array));
694: PetscCall(VecScatterBegin(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD));
695: PetscCall(VecScatterEnd(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD));
696: PetscCall(VecGetArrayRead(matctx->vseq,&array));
697: for (i=0;i<k;i++) dHi[i] = array[i];
698: PetscCall(VecRestoreArrayRead(matctx->vseq,&array));
699: }
700: PetscFunctionReturn(PETSC_SUCCESS);
701: }
703: static PetscErrorCode NRefSysIter(PetscInt i,PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar *H,PetscInt ldh,Vec Rv,PetscScalar *Rh,BV V,Vec dVi,PetscScalar *dHi,PEP_REFINE_EXPLICIT *matctx,BV W)
704: {
705: PetscInt j,m,lda=pep->nmat*k,n0,m0,idx;
706: PetscMPIInt root,len;
707: PetscScalar *array2,h;
708: const PetscScalar *array;
709: Vec R,Vi;
710: PEP_REFINE_MATSHELL *ctx;
711: Mat M;
713: PetscFunctionBegin;
714: if (!matctx || !matctx->subc) {
715: for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+i+i*lda];
716: h = H[i+i*ldh];
717: idx = i;
718: R = Rv;
719: Vi = dVi;
720: switch (pep->scheme) {
721: case PEP_REFINE_SCHEME_EXPLICIT:
722: PetscCall(NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,V,matctx,W));
723: matctx->compM1 = PETSC_FALSE;
724: break;
725: case PEP_REFINE_SCHEME_MBE:
726: PetscCall(NRefSysSetup_mbe(pep,k,ksp,fH,S,lds,fh,h,V,matctx));
727: matctx->compM1 = PETSC_FALSE;
728: break;
729: case PEP_REFINE_SCHEME_SCHUR:
730: PetscCall(KSPGetOperators(ksp,&M,NULL));
731: PetscCall(MatShellGetContext(M,&ctx));
732: PetscCall(NRefSysSetup_shell(pep,k,fH,S,lds,fh,h,ctx));
733: ctx->compM1 = PETSC_FALSE;
734: break;
735: }
736: } else {
737: if (i%matctx->subc->n==0 && (idx=i+matctx->subc->color)<k) {
738: for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+idx+idx*lda];
739: h = H[idx+idx*ldh];
740: matctx->idx = idx;
741: switch (pep->scheme) {
742: case PEP_REFINE_SCHEME_EXPLICIT:
743: PetscCall(NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,matctx->V,matctx,matctx->W));
744: matctx->compM1 = PETSC_FALSE;
745: break;
746: case PEP_REFINE_SCHEME_MBE:
747: PetscCall(NRefSysSetup_mbe(pep,k,ksp,fH,S,lds,fh,h,matctx->V,matctx));
748: matctx->compM1 = PETSC_FALSE;
749: break;
750: case PEP_REFINE_SCHEME_SCHUR:
751: break;
752: }
753: } else idx = matctx->idx;
754: PetscCall(VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD));
755: PetscCall(VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD));
756: PetscCall(VecGetArrayRead(matctx->tg,&array));
757: PetscCall(VecPlaceArray(matctx->t,array));
758: PetscCall(VecCopy(matctx->t,matctx->Rv));
759: PetscCall(VecResetArray(matctx->t));
760: PetscCall(VecRestoreArrayRead(matctx->tg,&array));
761: R = matctx->Rv;
762: Vi = matctx->Vi;
763: }
764: if (idx==i && idx<k) {
765: switch (pep->scheme) {
766: case PEP_REFINE_SCHEME_EXPLICIT:
767: PetscCall(NRefSysSolve_explicit(k,ksp,R,Rh,Vi,dHi,matctx));
768: break;
769: case PEP_REFINE_SCHEME_MBE:
770: PetscCall(NRefSysSolve_mbe(k,k,matctx->W,matctx->w,matctx->Wt,matctx->wt,matctx->d,matctx->dt,ksp,matctx->M2,matctx->M3 ,matctx->M4,PETSC_FALSE,R,Rh,Vi,dHi,matctx->t));
771: break;
772: case PEP_REFINE_SCHEME_SCHUR:
773: PetscCall(NRefSysSolve_shell(ksp,pep->nmat,R,Rh,k,Vi,dHi));
774: break;
775: }
776: }
777: if (matctx && matctx->subc) {
778: PetscCall(VecGetLocalSize(Vi,&m));
779: PetscCall(VecGetArrayRead(Vi,&array));
780: PetscCall(VecGetArray(matctx->tg,&array2));
781: PetscCall(PetscArraycpy(array2,array,m));
782: PetscCall(VecRestoreArray(matctx->tg,&array2));
783: PetscCall(VecRestoreArrayRead(Vi,&array));
784: PetscCall(VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE));
785: PetscCall(VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE));
786: switch (pep->scheme) {
787: case PEP_REFINE_SCHEME_EXPLICIT:
788: PetscCall(MatGetOwnershipRange(matctx->E[0],&n0,&m0));
789: PetscCall(VecGetArrayRead(matctx->ttN,&array));
790: PetscCall(VecPlaceArray(matctx->tp,array+m0-n0));
791: PetscCall(VecScatterBegin(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD));
792: PetscCall(VecScatterEnd(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD));
793: PetscCall(VecResetArray(matctx->tp));
794: PetscCall(VecRestoreArrayRead(matctx->ttN,&array));
795: PetscCall(VecGetArrayRead(matctx->tpg,&array));
796: for (j=0;j<k;j++) dHi[j] = array[j];
797: PetscCall(VecRestoreArrayRead(matctx->tpg,&array));
798: break;
799: case PEP_REFINE_SCHEME_MBE:
800: root = 0;
801: for (j=0;j<i%matctx->subc->n;j++) root += matctx->subc->subsize[j];
802: PetscCall(PetscMPIIntCast(k,&len));
803: PetscCallMPI(MPI_Bcast(dHi,len,MPIU_SCALAR,root,matctx->subc->dupparent));
804: break;
805: case PEP_REFINE_SCHEME_SCHUR:
806: break;
807: }
808: }
809: PetscFunctionReturn(PETSC_SUCCESS);
810: }
812: static PetscErrorCode PEPNRefForwardSubstitution(PEP pep,PetscInt k,PetscScalar *S,PetscInt lds,PetscScalar *H,PetscInt ldh,PetscScalar *fH,BV dV,PetscScalar *dVS,PetscInt *rds,PetscScalar *dH,PetscInt lddh,KSP ksp,PEP_REFINE_EXPLICIT *matctx)
813: {
814: PetscInt i,nmat=pep->nmat,lda=nmat*k;
815: PetscScalar *fh,*Rh,*DfH;
816: PetscReal norm;
817: BV W;
818: Vec Rv,t,dvi;
819: PEP_REFINE_MATSHELL *ctx;
820: Mat M,*At;
821: PetscBool flg,lindep;
823: PetscFunctionBegin;
824: PetscCall(PetscMalloc2(nmat*k*k,&DfH,k,&Rh));
825: *rds = 0;
826: PetscCall(BVCreateVec(pep->V,&Rv));
827: switch (pep->scheme) {
828: case PEP_REFINE_SCHEME_EXPLICIT:
829: PetscCall(BVCreateVec(pep->V,&t));
830: PetscCall(BVDuplicateResize(pep->V,PetscMax(k,nmat),&W));
831: PetscCall(PetscMalloc1(nmat,&fh));
832: break;
833: case PEP_REFINE_SCHEME_MBE:
834: if (matctx->subc) {
835: PetscCall(BVCreateVec(pep->V,&t));
836: PetscCall(BVDuplicateResize(pep->V,PetscMax(k,nmat),&W));
837: } else {
838: W = matctx->W;
839: PetscCall(PetscObjectReference((PetscObject)W));
840: t = matctx->t;
841: PetscCall(PetscObjectReference((PetscObject)t));
842: }
843: PetscCall(BVScale(matctx->W,0.0));
844: PetscCall(BVScale(matctx->Wt,0.0));
845: PetscCall(BVScale(matctx->M2,0.0));
846: PetscCall(BVScale(matctx->M3,0.0));
847: PetscCall(PetscMalloc1(nmat,&fh));
848: break;
849: case PEP_REFINE_SCHEME_SCHUR:
850: PetscCall(KSPGetOperators(ksp,&M,NULL));
851: PetscCall(MatShellGetContext(M,&ctx));
852: PetscCall(BVCreateVec(pep->V,&t));
853: PetscCall(BVDuplicateResize(pep->V,PetscMax(k,nmat),&W));
854: fh = ctx->fih;
855: break;
856: }
857: PetscCall(PetscArrayzero(dVS,2*k*k));
858: PetscCall(PetscArrayzero(DfH,lda*k));
859: PetscCall(STGetTransform(pep->st,&flg));
860: if (flg) {
861: PetscCall(PetscMalloc1(pep->nmat,&At));
862: for (i=0;i<pep->nmat;i++) PetscCall(STGetMatrixTransformed(pep->st,i,&At[i]));
863: } else At = pep->A;
865: /* Main loop for computing the i-th columns of dX and dS */
866: for (i=0;i<k;i++) {
867: /* Compute and update i-th column of the right hand side */
868: PetscCall(PetscArrayzero(Rh,k));
869: PetscCall(NRefRightSide(nmat,pep->pbc,At,k,pep->V,S,lds,i,H,ldh,fH,DfH,dH,dV,dVS,*rds,Rv,Rh,W,t));
871: /* Update and solve system */
872: PetscCall(BVGetColumn(dV,i,&dvi));
873: PetscCall(NRefSysIter(i,pep,k,ksp,fH,S,lds,fh,H,ldh,Rv,Rh,pep->V,dvi,dH+i*k,matctx,W));
874: /* Orthogonalize computed solution */
875: PetscCall(BVOrthogonalizeVec(pep->V,dvi,dVS+i*2*k,&norm,&lindep));
876: PetscCall(BVRestoreColumn(dV,i,&dvi));
877: if (!lindep) {
878: PetscCall(BVOrthogonalizeColumn(dV,i,dVS+k+i*2*k,&norm,&lindep));
879: if (!lindep) {
880: dVS[k+i+i*2*k] = norm;
881: PetscCall(BVScaleColumn(dV,i,1.0/norm));
882: (*rds)++;
883: }
884: }
885: }
886: PetscCall(BVSetActiveColumns(dV,0,*rds));
887: PetscCall(VecDestroy(&t));
888: PetscCall(VecDestroy(&Rv));
889: PetscCall(BVDestroy(&W));
890: if (flg) PetscCall(PetscFree(At));
891: PetscCall(PetscFree2(DfH,Rh));
892: if (pep->scheme!=PEP_REFINE_SCHEME_SCHUR) PetscCall(PetscFree(fh));
893: PetscFunctionReturn(PETSC_SUCCESS);
894: }
896: static PetscErrorCode NRefOrthogStep(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *S,PetscInt lds)
897: {
898: PetscInt j,nmat=pep->nmat,deg=nmat-1,lda=nmat*k,ldg;
899: PetscScalar *G,*tau,sone=1.0,zero=0.0,*work;
900: PetscBLASInt lds_,k_,ldh_,info,ldg_,lda_;
902: PetscFunctionBegin;
903: PetscCall(PetscMalloc3(k,&tau,k,&work,deg*k*k,&G));
904: PetscCall(PetscBLASIntCast(lds,&lds_));
905: PetscCall(PetscBLASIntCast(lda,&lda_));
906: PetscCall(PetscBLASIntCast(k,&k_));
908: /* Form auxiliary matrix for the orthogonalization step */
909: ldg = deg*k;
910: PetscCall(PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH));
911: PetscCall(PetscBLASIntCast(ldg,&ldg_));
912: PetscCall(PetscBLASIntCast(ldh,&ldh_));
913: for (j=0;j<deg;j++) {
914: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,fH+j*k,&lda_,&zero,G+j*k,&ldg_));
915: }
916: /* Orthogonalize and update S */
917: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
918: PetscCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&ldg_,&k_,G,&ldg_,tau,work,&k_,&info));
919: PetscCall(PetscFPTrapPop());
920: SlepcCheckLapackInfo("geqrf",info);
921: PetscCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,S,&lds_));
923: /* Update H */
924: PetscCallBLAS("BLAStrmm",BLAStrmm_("L","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
925: PetscCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
926: PetscCall(PetscFree3(tau,work,G));
927: PetscFunctionReturn(PETSC_SUCCESS);
928: }
930: static PetscErrorCode PEPNRefUpdateInvPair(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *dH,PetscScalar *S,PetscInt lds,BV dV,PetscScalar *dVS,PetscInt rds)
931: {
932: PetscInt i,j,nmat=pep->nmat,lda=nmat*k;
933: PetscScalar *tau,*array,*work;
934: PetscBLASInt lds_,k_,lda_,ldh_,kdrs_,info,k2_;
935: Mat M0;
937: PetscFunctionBegin;
938: PetscCall(PetscMalloc2(k,&tau,k,&work));
939: PetscCall(PetscBLASIntCast(lds,&lds_));
940: PetscCall(PetscBLASIntCast(lda,&lda_));
941: PetscCall(PetscBLASIntCast(ldh,&ldh_));
942: PetscCall(PetscBLASIntCast(k,&k_));
943: PetscCall(PetscBLASIntCast(2*k,&k2_));
944: PetscCall(PetscBLASIntCast((k+rds),&kdrs_));
945: /* Update H */
946: for (j=0;j<k;j++) {
947: for (i=0;i<k;i++) H[i+j*ldh] -= dH[i+j*k];
948: }
949: /* Update V */
950: for (j=0;j<k;j++) {
951: for (i=0;i<k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k]+S[i+j*lds];
952: for (i=k;i<2*k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k];
953: }
954: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
955: PetscCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&kdrs_,&k_,dVS,&k2_,tau,work,&k_,&info));
956: SlepcCheckLapackInfo("geqrf",info);
957: /* Copy triangular matrix in S */
958: for (j=0;j<k;j++) {
959: for (i=0;i<=j;i++) S[i+j*lds] = dVS[i+j*2*k];
960: for (i=j+1;i<k;i++) S[i+j*lds] = 0.0;
961: }
962: PetscCallBLAS("LAPACKorgqr",LAPACKorgqr_(&k2_,&k_,&k_,dVS,&k2_,tau,work,&k_,&info));
963: SlepcCheckLapackInfo("orgqr",info);
964: PetscCall(PetscFPTrapPop());
965: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&M0));
966: PetscCall(MatDenseGetArrayWrite(M0,&array));
967: for (j=0;j<k;j++) PetscCall(PetscArraycpy(array+j*k,dVS+j*2*k,k));
968: PetscCall(MatDenseRestoreArrayWrite(M0,&array));
969: PetscCall(BVMultInPlace(pep->V,M0,0,k));
970: if (rds) {
971: PetscCall(MatDenseGetArrayWrite(M0,&array));
972: for (j=0;j<k;j++) PetscCall(PetscArraycpy(array+j*k,dVS+k+j*2*k,rds));
973: PetscCall(MatDenseRestoreArrayWrite(M0,&array));
974: PetscCall(BVMultInPlace(dV,M0,0,k));
975: PetscCall(BVMult(pep->V,1.0,1.0,dV,NULL));
976: }
977: PetscCall(MatDestroy(&M0));
978: PetscCall(NRefOrthogStep(pep,k,H,ldh,fH,S,lds));
979: PetscCall(PetscFree2(tau,work));
980: PetscFunctionReturn(PETSC_SUCCESS);
981: }
983: static PetscErrorCode PEPNRefSetUp(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PEP_REFINE_EXPLICIT *matctx,PetscBool ini)
984: {
985: PEP_REFINE_MATSHELL *ctx;
986: PetscScalar t,*coef;
987: const PetscScalar *array;
988: MatStructure str;
989: PetscInt j,nmat=pep->nmat,n0,m0,n1,m1,n0_,m0_,n1_,m1_,N0,N1,p,*idx1,*idx2,count,si,i,l0;
990: MPI_Comm comm;
991: PetscMPIInt np;
992: const PetscInt *rgs0,*rgs1;
993: Mat B,C,*E,*A,*At;
994: IS is1,is2;
995: Vec v;
996: PetscBool flg;
997: Mat M,P;
999: PetscFunctionBegin;
1000: PetscCall(PetscMalloc1(nmat,&coef));
1001: PetscCall(STGetTransform(pep->st,&flg));
1002: if (flg) {
1003: PetscCall(PetscMalloc1(pep->nmat,&At));
1004: for (i=0;i<pep->nmat;i++) PetscCall(STGetMatrixTransformed(pep->st,i,&At[i]));
1005: } else At = pep->A;
1006: switch (pep->scheme) {
1007: case PEP_REFINE_SCHEME_EXPLICIT:
1008: if (ini) {
1009: if (matctx->subc) {
1010: A = matctx->A;
1011: PetscCall(PetscSubcommGetChild(matctx->subc,&comm));
1012: } else {
1013: A = At;
1014: PetscCall(PetscObjectGetComm((PetscObject)pep,&comm));
1015: }
1016: E = matctx->E;
1017: PetscCall(STGetMatStructure(pep->st,&str));
1018: PetscCall(MatDuplicate(A[0],MAT_COPY_VALUES,&E[0]));
1019: j = (matctx->subc)?matctx->subc->color:0;
1020: PetscCall(PEPEvaluateBasis(pep,H[j+j*ldh],0,coef,NULL));
1021: for (j=1;j<nmat;j++) PetscCall(MatAXPY(E[0],coef[j],A[j],str));
1022: PetscCall(MatCreateDense(comm,PETSC_DECIDE,PETSC_DECIDE,k,k,NULL,&E[1]));
1023: PetscCall(MatGetOwnershipRange(E[0],&n0,&m0));
1024: PetscCall(MatGetOwnershipRange(E[1],&n1,&m1));
1025: PetscCall(MatGetOwnershipRangeColumn(E[0],&n0_,&m0_));
1026: PetscCall(MatGetOwnershipRangeColumn(E[1],&n1_,&m1_));
1027: /* T12 and T21 are computed from V and V*, so,
1028: they must have the same column and row ranges */
1029: PetscCheck(m0_-n0_==m0-n0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inconsistent dimensions");
1030: PetscCall(MatCreateDense(comm,m0-n0,m1_-n1_,PETSC_DECIDE,PETSC_DECIDE,NULL,&B));
1031: PetscCall(MatCreateDense(comm,m1-n1,m0_-n0_,PETSC_DECIDE,PETSC_DECIDE,NULL,&C));
1032: PetscCall(MatCreateTile(1.0,E[0],1.0,B,1.0,C,1.0,E[1],&M));
1033: PetscCall(MatDestroy(&B));
1034: PetscCall(MatDestroy(&C));
1035: matctx->compM1 = PETSC_TRUE;
1036: PetscCall(MatGetSize(E[0],NULL,&N0));
1037: PetscCall(MatGetSize(E[1],NULL,&N1));
1038: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M),&np));
1039: PetscCall(MatGetOwnershipRanges(E[0],&rgs0));
1040: PetscCall(MatGetOwnershipRanges(E[1],&rgs1));
1041: PetscCall(PetscMalloc4(PetscMax(k,N1),&matctx->idxp,N0,&matctx->idxg,N0,&matctx->map0,N1,&matctx->map1));
1042: /* Create column (and row) mapping */
1043: for (p=0;p<np;p++) {
1044: for (j=rgs0[p];j<rgs0[p+1];j++) matctx->map0[j] = j+rgs1[p];
1045: for (j=rgs1[p];j<rgs1[p+1];j++) matctx->map1[j] = j+rgs0[p+1];
1046: }
1047: PetscCall(MatCreateVecs(M,NULL,&matctx->tN));
1048: PetscCall(MatCreateVecs(matctx->E[1],NULL,&matctx->t1));
1049: PetscCall(VecDuplicate(matctx->tN,&matctx->ttN));
1050: if (matctx->subc) {
1051: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pep),&np));
1052: count = np*k;
1053: PetscCall(PetscMalloc2(count,&idx1,count,&idx2));
1054: PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)pep),m1-n1,PETSC_DECIDE,&matctx->tp));
1055: PetscCall(VecGetOwnershipRange(matctx->tp,&l0,NULL));
1056: PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)pep),k,PETSC_DECIDE,&matctx->tpg));
1057: for (si=0;si<matctx->subc->n;si++) {
1058: if (matctx->subc->color==si) {
1059: j=0;
1060: if (matctx->subc->color==si) {
1061: for (p=0;p<np;p++) {
1062: for (i=n1;i<m1;i++) {
1063: idx1[j] = l0+i-n1;
1064: idx2[j++] =p*k+i;
1065: }
1066: }
1067: }
1068: count = np*(m1-n1);
1069: } else count =0;
1070: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx1,PETSC_COPY_VALUES,&is1));
1071: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx2,PETSC_COPY_VALUES,&is2));
1072: PetscCall(VecScatterCreate(matctx->tp,is1,matctx->tpg,is2,&matctx->scatterp_id[si]));
1073: PetscCall(ISDestroy(&is1));
1074: PetscCall(ISDestroy(&is2));
1075: }
1076: PetscCall(PetscFree2(idx1,idx2));
1077: } else PetscCall(VecScatterCreateToAll(matctx->t1,&matctx->scatterctx,&matctx->vseq));
1078: P = M;
1079: } else {
1080: if (matctx->subc) {
1081: /* Scatter vectors pep->V */
1082: for (i=0;i<k;i++) {
1083: PetscCall(BVGetColumn(pep->V,i,&v));
1084: PetscCall(VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD));
1085: PetscCall(VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD));
1086: PetscCall(BVRestoreColumn(pep->V,i,&v));
1087: PetscCall(VecGetArrayRead(matctx->tg,&array));
1088: PetscCall(VecPlaceArray(matctx->t,(const PetscScalar*)array));
1089: PetscCall(BVInsertVec(matctx->V,i,matctx->t));
1090: PetscCall(VecResetArray(matctx->t));
1091: PetscCall(VecRestoreArrayRead(matctx->tg,&array));
1092: }
1093: }
1094: }
1095: break;
1096: case PEP_REFINE_SCHEME_MBE:
1097: if (ini) {
1098: if (matctx->subc) {
1099: A = matctx->A;
1100: PetscCall(PetscSubcommGetChild(matctx->subc,&comm));
1101: } else {
1102: matctx->V = pep->V;
1103: A = At;
1104: PetscCall(PetscObjectGetComm((PetscObject)pep,&comm));
1105: PetscCall(MatCreateVecs(pep->A[0],&matctx->t,NULL));
1106: }
1107: PetscCall(STGetMatStructure(pep->st,&str));
1108: PetscCall(MatDuplicate(A[0],MAT_COPY_VALUES,&matctx->M1));
1109: j = (matctx->subc)?matctx->subc->color:0;
1110: PetscCall(PEPEvaluateBasis(pep,H[j+j*ldh],0,coef,NULL));
1111: for (j=1;j<nmat;j++) PetscCall(MatAXPY(matctx->M1,coef[j],A[j],str));
1112: PetscCall(BVDuplicateResize(matctx->V,PetscMax(k,pep->nmat),&matctx->W));
1113: PetscCall(BVDuplicateResize(matctx->V,k,&matctx->M2));
1114: PetscCall(BVDuplicate(matctx->M2,&matctx->M3));
1115: PetscCall(BVDuplicate(matctx->M2,&matctx->Wt));
1116: PetscCall(PetscMalloc5(k*k,&matctx->M4,k*k,&matctx->w,k*k,&matctx->wt,k,&matctx->d,k,&matctx->dt));
1117: matctx->compM1 = PETSC_TRUE;
1118: M = matctx->M1;
1119: P = M;
1120: }
1121: break;
1122: case PEP_REFINE_SCHEME_SCHUR:
1123: if (ini) {
1124: PetscCall(PetscObjectGetComm((PetscObject)pep,&comm));
1125: PetscCall(MatGetSize(At[0],&m0,&n0));
1126: PetscCall(PetscMalloc1(1,&ctx));
1127: PetscCall(STGetMatStructure(pep->st,&str));
1128: /* Create a shell matrix to solve the linear system */
1129: ctx->V = pep->V;
1130: ctx->k = k; ctx->nmat = nmat;
1131: PetscCall(PetscMalloc5(nmat,&ctx->A,k*k,&ctx->M4,k,&ctx->pM4,2*k*k,&ctx->work,nmat,&ctx->fih));
1132: for (i=0;i<nmat;i++) ctx->A[i] = At[i];
1133: PetscCall(PetscArrayzero(ctx->M4,k*k));
1134: PetscCall(MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,m0,n0,ctx,&M));
1135: PetscCall(MatShellSetOperation(M,MATOP_MULT,(void(*)(void))MatMult_FS));
1136: PetscCall(BVDuplicateResize(ctx->V,PetscMax(k,pep->nmat),&ctx->W));
1137: PetscCall(BVDuplicateResize(ctx->V,k,&ctx->M2));
1138: PetscCall(BVDuplicate(ctx->M2,&ctx->M3));
1139: PetscCall(BVCreateVec(pep->V,&ctx->t));
1140: PetscCall(MatDuplicate(At[0],MAT_COPY_VALUES,&ctx->M1));
1141: PetscCall(PEPEvaluateBasis(pep,H[0],0,coef,NULL));
1142: for (j=1;j<nmat;j++) PetscCall(MatAXPY(ctx->M1,coef[j],At[j],str));
1143: PetscCall(MatDuplicate(At[0],MAT_COPY_VALUES,&P));
1144: /* Compute a precond matrix for the system */
1145: t = H[0];
1146: PetscCall(PEPEvaluateBasis(pep,t,0,coef,NULL));
1147: for (j=1;j<nmat;j++) PetscCall(MatAXPY(P,coef[j],At[j],str));
1148: ctx->compM1 = PETSC_TRUE;
1149: }
1150: break;
1151: }
1152: if (ini) {
1153: PetscCall(PEPRefineGetKSP(pep,&pep->refineksp));
1154: PetscCall(KSPSetErrorIfNotConverged(pep->refineksp,PETSC_TRUE));
1155: PetscCall(PEP_KSPSetOperators(pep->refineksp,M,P));
1156: PetscCall(KSPSetFromOptions(pep->refineksp));
1157: }
1159: if (!ini && matctx && matctx->subc) {
1160: /* Scatter vectors pep->V */
1161: for (i=0;i<k;i++) {
1162: PetscCall(BVGetColumn(pep->V,i,&v));
1163: PetscCall(VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD));
1164: PetscCall(VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD));
1165: PetscCall(BVRestoreColumn(pep->V,i,&v));
1166: PetscCall(VecGetArrayRead(matctx->tg,&array));
1167: PetscCall(VecPlaceArray(matctx->t,(const PetscScalar*)array));
1168: PetscCall(BVInsertVec(matctx->V,i,matctx->t));
1169: PetscCall(VecResetArray(matctx->t));
1170: PetscCall(VecRestoreArrayRead(matctx->tg,&array));
1171: }
1172: }
1173: PetscCall(PetscFree(coef));
1174: if (flg) PetscCall(PetscFree(At));
1175: PetscFunctionReturn(PETSC_SUCCESS);
1176: }
1178: static PetscErrorCode NRefSubcommSetup(PEP pep,PetscInt k,PEP_REFINE_EXPLICIT *matctx,PetscInt nsubc)
1179: {
1180: PetscInt i,si,j,m0,n0,nloc0,nloc_sub,*idx1,*idx2;
1181: IS is1,is2;
1182: BVType type;
1183: Vec v;
1184: const PetscScalar *array;
1185: Mat *A;
1186: PetscBool flg;
1187: MPI_Comm contpar,child;
1189: PetscFunctionBegin;
1190: PetscCall(STGetTransform(pep->st,&flg));
1191: if (flg) {
1192: PetscCall(PetscMalloc1(pep->nmat,&A));
1193: for (i=0;i<pep->nmat;i++) PetscCall(STGetMatrixTransformed(pep->st,i,&A[i]));
1194: } else A = pep->A;
1195: PetscCall(PetscSubcommGetChild(matctx->subc,&child));
1196: PetscCall(PetscSubcommGetContiguousParent(matctx->subc,&contpar));
1198: /* Duplicate pep matrices */
1199: PetscCall(PetscMalloc3(pep->nmat,&matctx->A,nsubc,&matctx->scatter_id,nsubc,&matctx->scatterp_id));
1200: for (i=0;i<pep->nmat;i++) PetscCall(MatCreateRedundantMatrix(A[i],0,child,MAT_INITIAL_MATRIX,&matctx->A[i]));
1202: /* Create Scatter */
1203: PetscCall(MatCreateVecs(matctx->A[0],&matctx->t,NULL));
1204: PetscCall(MatGetLocalSize(matctx->A[0],&nloc_sub,NULL));
1205: PetscCall(VecCreateMPI(contpar,nloc_sub,PETSC_DECIDE,&matctx->tg));
1206: PetscCall(BVGetColumn(pep->V,0,&v));
1207: PetscCall(VecGetOwnershipRange(v,&n0,&m0));
1208: nloc0 = m0-n0;
1209: PetscCall(PetscMalloc2(matctx->subc->n*nloc0,&idx1,matctx->subc->n*nloc0,&idx2));
1210: j = 0;
1211: for (si=0;si<matctx->subc->n;si++) {
1212: for (i=n0;i<m0;i++) {
1213: idx1[j] = i;
1214: idx2[j++] = i+pep->n*si;
1215: }
1216: }
1217: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx1,PETSC_COPY_VALUES,&is1));
1218: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx2,PETSC_COPY_VALUES,&is2));
1219: PetscCall(VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_sub));
1220: PetscCall(ISDestroy(&is1));
1221: PetscCall(ISDestroy(&is2));
1222: for (si=0;si<matctx->subc->n;si++) {
1223: j=0;
1224: for (i=n0;i<m0;i++) {
1225: idx1[j] = i;
1226: idx2[j++] = i+pep->n*si;
1227: }
1228: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx1,PETSC_COPY_VALUES,&is1));
1229: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx2,PETSC_COPY_VALUES,&is2));
1230: PetscCall(VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_id[si]));
1231: PetscCall(ISDestroy(&is1));
1232: PetscCall(ISDestroy(&is2));
1233: }
1234: PetscCall(BVRestoreColumn(pep->V,0,&v));
1235: PetscCall(PetscFree2(idx1,idx2));
1237: /* Duplicate pep->V vecs */
1238: PetscCall(BVGetType(pep->V,&type));
1239: PetscCall(BVCreate(child,&matctx->V));
1240: PetscCall(BVSetType(matctx->V,type));
1241: PetscCall(BVSetSizesFromVec(matctx->V,matctx->t,k));
1242: if (pep->scheme==PEP_REFINE_SCHEME_EXPLICIT) PetscCall(BVDuplicateResize(matctx->V,PetscMax(k,pep->nmat),&matctx->W));
1243: for (i=0;i<k;i++) {
1244: PetscCall(BVGetColumn(pep->V,i,&v));
1245: PetscCall(VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD));
1246: PetscCall(VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD));
1247: PetscCall(BVRestoreColumn(pep->V,i,&v));
1248: PetscCall(VecGetArrayRead(matctx->tg,&array));
1249: PetscCall(VecPlaceArray(matctx->t,(const PetscScalar*)array));
1250: PetscCall(BVInsertVec(matctx->V,i,matctx->t));
1251: PetscCall(VecResetArray(matctx->t));
1252: PetscCall(VecRestoreArrayRead(matctx->tg,&array));
1253: }
1255: PetscCall(VecDuplicate(matctx->t,&matctx->Rv));
1256: PetscCall(VecDuplicate(matctx->t,&matctx->Vi));
1257: if (flg) PetscCall(PetscFree(A));
1258: PetscFunctionReturn(PETSC_SUCCESS);
1259: }
1261: static PetscErrorCode NRefSubcommDestroy(PEP pep,PEP_REFINE_EXPLICIT *matctx)
1262: {
1263: PetscInt i;
1265: PetscFunctionBegin;
1266: PetscCall(VecScatterDestroy(&matctx->scatter_sub));
1267: for (i=0;i<matctx->subc->n;i++) PetscCall(VecScatterDestroy(&matctx->scatter_id[i]));
1268: for (i=0;i<pep->nmat;i++) PetscCall(MatDestroy(&matctx->A[i]));
1269: if (pep->scheme==PEP_REFINE_SCHEME_EXPLICIT) {
1270: for (i=0;i<matctx->subc->n;i++) PetscCall(VecScatterDestroy(&matctx->scatterp_id[i]));
1271: PetscCall(VecDestroy(&matctx->tp));
1272: PetscCall(VecDestroy(&matctx->tpg));
1273: PetscCall(BVDestroy(&matctx->W));
1274: }
1275: PetscCall(PetscFree3(matctx->A,matctx->scatter_id,matctx->scatterp_id));
1276: PetscCall(BVDestroy(&matctx->V));
1277: PetscCall(VecDestroy(&matctx->t));
1278: PetscCall(VecDestroy(&matctx->tg));
1279: PetscCall(VecDestroy(&matctx->Rv));
1280: PetscCall(VecDestroy(&matctx->Vi));
1281: PetscFunctionReturn(PETSC_SUCCESS);
1282: }
1284: PetscErrorCode PEPNewtonRefinement_TOAR(PEP pep,PetscScalar sigma,PetscInt *maxits,PetscReal *tol,PetscInt k,PetscScalar *S,PetscInt lds)
1285: {
1286: PetscScalar *H,*work,*dH,*fH,*dVS;
1287: PetscInt ldh,i,j,its=1,nmat=pep->nmat,nsubc=pep->npart,rds;
1288: PetscBLASInt k_,ld_,*p,info;
1289: BV dV;
1290: PetscBool sinvert,flg;
1291: PEP_REFINE_EXPLICIT *matctx=NULL;
1292: Vec v;
1293: Mat M,P;
1294: PEP_REFINE_MATSHELL *ctx;
1296: PetscFunctionBegin;
1297: PetscCall(PetscLogEventBegin(PEP_Refine,pep,0,0,0));
1298: PetscCheck(k<=pep->n,PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Multiple Refinement available only for invariant pairs of dimension smaller than n=%" PetscInt_FMT,pep->n);
1299: /* the input tolerance is not being taken into account (by the moment) */
1300: its = *maxits;
1301: PetscCall(PetscMalloc3(k*k,&dH,nmat*k*k,&fH,k,&work));
1302: PetscCall(DSGetLeadingDimension(pep->ds,&ldh));
1303: PetscCall(PetscMalloc1(2*k*k,&dVS));
1304: PetscCall(STGetTransform(pep->st,&flg));
1305: if (!flg && pep->st && pep->ops->backtransform) { /* BackTransform */
1306: PetscCall(PetscBLASIntCast(k,&k_));
1307: PetscCall(PetscBLASIntCast(ldh,&ld_));
1308: PetscCall(PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinvert));
1309: if (sinvert) {
1310: PetscCall(DSGetArray(pep->ds,DS_MAT_A,&H));
1311: PetscCall(PetscMalloc1(k,&p));
1312: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1313: PetscCallBLAS("LAPACKgetrf",LAPACKgetrf_(&k_,&k_,H,&ld_,p,&info));
1314: SlepcCheckLapackInfo("getrf",info);
1315: PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&k_,H,&ld_,p,work,&k_,&info));
1316: SlepcCheckLapackInfo("getri",info);
1317: PetscCall(PetscFPTrapPop());
1318: PetscCall(DSRestoreArray(pep->ds,DS_MAT_A,&H));
1319: pep->ops->backtransform = NULL;
1320: }
1321: if (sigma!=0.0) {
1322: PetscCall(DSGetArray(pep->ds,DS_MAT_A,&H));
1323: for (i=0;i<k;i++) H[i+ldh*i] += sigma;
1324: PetscCall(DSRestoreArray(pep->ds,DS_MAT_A,&H));
1325: pep->ops->backtransform = NULL;
1326: }
1327: }
1328: if ((pep->scale==PEP_SCALE_BOTH || pep->scale==PEP_SCALE_SCALAR) && pep->sfactor!=1.0) {
1329: PetscCall(DSGetArray(pep->ds,DS_MAT_A,&H));
1330: for (j=0;j<k;j++) {
1331: for (i=0;i<k;i++) H[i+j*ldh] *= pep->sfactor;
1332: }
1333: PetscCall(DSRestoreArray(pep->ds,DS_MAT_A,&H));
1334: if (!flg) {
1335: /* Restore original values */
1336: for (i=0;i<pep->nmat;i++) {
1337: pep->pbc[pep->nmat+i] *= pep->sfactor;
1338: pep->pbc[2*pep->nmat+i] *= pep->sfactor*pep->sfactor;
1339: }
1340: }
1341: }
1342: if ((pep->scale==PEP_SCALE_DIAGONAL || pep->scale==PEP_SCALE_BOTH) && pep->Dr) {
1343: for (i=0;i<k;i++) {
1344: PetscCall(BVGetColumn(pep->V,i,&v));
1345: PetscCall(VecPointwiseMult(v,v,pep->Dr));
1346: PetscCall(BVRestoreColumn(pep->V,i,&v));
1347: }
1348: }
1349: PetscCall(DSGetArray(pep->ds,DS_MAT_A,&H));
1351: PetscCall(NRefOrthogStep(pep,k,H,ldh,fH,S,lds));
1352: /* check if H is in Schur form */
1353: for (i=0;i<k-1;i++) {
1354: #if !defined(PETSC_USE_COMPLEX)
1355: PetscCheck(H[i+1+i*ldh]==0.0,PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Iterative Refinement requires the complex Schur form of the projected matrix");
1356: #else
1357: PetscCheck(H[i+1+i*ldh]==0.0,PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Iterative Refinement requires an upper triangular projected matrix");
1358: #endif
1359: }
1360: PetscCheck(nsubc<=k,PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Number of subcommunicators should not be larger than the invariant pair dimension");
1361: PetscCall(BVSetActiveColumns(pep->V,0,k));
1362: PetscCall(BVDuplicateResize(pep->V,k,&dV));
1363: if (pep->scheme!=PEP_REFINE_SCHEME_SCHUR) {
1364: PetscCall(PetscMalloc1(1,&matctx));
1365: if (nsubc>1) { /* splitting in subcommunicators */
1366: matctx->subc = pep->refinesubc;
1367: PetscCall(NRefSubcommSetup(pep,k,matctx,nsubc));
1368: } else matctx->subc=NULL;
1369: }
1371: /* Loop performing iterative refinements */
1372: for (i=0;i<its;i++) {
1373: /* Pre-compute the polynomial basis evaluated in H */
1374: PetscCall(PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH));
1375: PetscCall(PEPNRefSetUp(pep,k,H,ldh,matctx,PetscNot(i)));
1376: /* Solve the linear system */
1377: PetscCall(PEPNRefForwardSubstitution(pep,k,S,lds,H,ldh,fH,dV,dVS,&rds,dH,k,pep->refineksp,matctx));
1378: /* Update X (=V*S) and H, and orthogonalize [X;X*fH1;...;XfH(deg-1)] */
1379: PetscCall(PEPNRefUpdateInvPair(pep,k,H,ldh,fH,dH,S,lds,dV,dVS,rds));
1380: }
1381: PetscCall(DSRestoreArray(pep->ds,DS_MAT_A,&H));
1382: if (!flg && sinvert) PetscCall(PetscFree(p));
1383: PetscCall(PetscFree3(dH,fH,work));
1384: PetscCall(PetscFree(dVS));
1385: PetscCall(BVDestroy(&dV));
1386: switch (pep->scheme) {
1387: case PEP_REFINE_SCHEME_EXPLICIT:
1388: for (i=0;i<2;i++) PetscCall(MatDestroy(&matctx->E[i]));
1389: PetscCall(PetscFree4(matctx->idxp,matctx->idxg,matctx->map0,matctx->map1));
1390: PetscCall(VecDestroy(&matctx->tN));
1391: PetscCall(VecDestroy(&matctx->ttN));
1392: PetscCall(VecDestroy(&matctx->t1));
1393: if (nsubc>1) PetscCall(NRefSubcommDestroy(pep,matctx));
1394: else {
1395: PetscCall(VecDestroy(&matctx->vseq));
1396: PetscCall(VecScatterDestroy(&matctx->scatterctx));
1397: }
1398: PetscCall(PetscFree(matctx));
1399: PetscCall(KSPGetOperators(pep->refineksp,&M,NULL));
1400: PetscCall(MatDestroy(&M));
1401: break;
1402: case PEP_REFINE_SCHEME_MBE:
1403: PetscCall(BVDestroy(&matctx->W));
1404: PetscCall(BVDestroy(&matctx->Wt));
1405: PetscCall(BVDestroy(&matctx->M2));
1406: PetscCall(BVDestroy(&matctx->M3));
1407: PetscCall(MatDestroy(&matctx->M1));
1408: PetscCall(VecDestroy(&matctx->t));
1409: PetscCall(PetscFree5(matctx->M4,matctx->w,matctx->wt,matctx->d,matctx->dt));
1410: if (nsubc>1) PetscCall(NRefSubcommDestroy(pep,matctx));
1411: PetscCall(PetscFree(matctx));
1412: break;
1413: case PEP_REFINE_SCHEME_SCHUR:
1414: PetscCall(KSPGetOperators(pep->refineksp,&M,&P));
1415: PetscCall(MatShellGetContext(M,&ctx));
1416: PetscCall(PetscFree5(ctx->A,ctx->M4,ctx->pM4,ctx->work,ctx->fih));
1417: PetscCall(MatDestroy(&ctx->M1));
1418: PetscCall(BVDestroy(&ctx->M2));
1419: PetscCall(BVDestroy(&ctx->M3));
1420: PetscCall(BVDestroy(&ctx->W));
1421: PetscCall(VecDestroy(&ctx->t));
1422: PetscCall(PetscFree(ctx));
1423: PetscCall(MatDestroy(&M));
1424: PetscCall(MatDestroy(&P));
1425: break;
1426: }
1427: PetscCall(PetscLogEventEnd(PEP_Refine,pep,0,0,0));
1428: PetscFunctionReturn(PETSC_SUCCESS);
1429: }