Actual source code: nrefine.c

slepc-3.21.1 2024-04-26
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    Newton refinement for polynomial eigenproblems.

 13:    References:

 15:        [1] T. Betcke and D. Kressner, "Perturbation, extraction and refinement
 16:            of invariant pairs for matrix polynomials", Linear Algebra Appl.
 17:            435(3):514-536, 2011.

 19:        [2] C. Campos and J.E. Roman, "Parallel iterative refinement in
 20:            polynomial eigenvalue problems", Numer. Linear Algebra Appl. 23(4):
 21:            730-745, 2016.
 22: */

 24: #include <slepc/private/pepimpl.h>
 25: #include <slepcblaslapack.h>

 27: typedef struct {
 28:   Mat          *A,M1;
 29:   BV           V,M2,M3,W;
 30:   PetscInt     k,nmat;
 31:   PetscScalar  *fih,*work,*M4;
 32:   PetscBLASInt *pM4;
 33:   PetscBool    compM1;
 34:   Vec          t;
 35: } PEP_REFINE_MATSHELL;

 37: typedef struct {
 38:   Mat          E[2],M1;
 39:   Vec          tN,ttN,t1,vseq;
 40:   VecScatter   scatterctx;
 41:   PetscBool    compM1;
 42:   PetscInt     *map0,*map1,*idxg,*idxp;
 43:   PetscSubcomm subc;
 44:   VecScatter   scatter_sub;
 45:   VecScatter   *scatter_id,*scatterp_id;
 46:   Mat          *A;
 47:   BV           V,W,M2,M3,Wt;
 48:   PetscScalar  *M4,*w,*wt,*d,*dt;
 49:   Vec          t,tg,Rv,Vi,tp,tpg;
 50:   PetscInt     idx,*cols;
 51: } PEP_REFINE_EXPLICIT;

 53: static PetscErrorCode MatMult_FS(Mat M ,Vec x,Vec y)
 54: {
 55:   PEP_REFINE_MATSHELL *ctx;
 56:   PetscInt            k,i;
 57:   PetscScalar         *c;
 58:   PetscBLASInt        k_,one=1,info;

 60:   PetscFunctionBegin;
 61:   PetscCall(MatShellGetContext(M,&ctx));
 62:   PetscCall(VecCopy(x,ctx->t));
 63:   k    = ctx->k;
 64:   c    = ctx->work;
 65:   PetscCall(PetscBLASIntCast(k,&k_));
 66:   PetscCall(MatMult(ctx->M1,x,y));
 67:   PetscCall(VecConjugate(ctx->t));
 68:   PetscCall(BVDotVec(ctx->M3,ctx->t,c));
 69:   for (i=0;i<k;i++) c[i] = PetscConj(c[i]);
 70:   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
 71:   PetscCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,c,&k_,&info));
 72:   PetscCall(PetscFPTrapPop());
 73:   SlepcCheckLapackInfo("getrs",info);
 74:   PetscCall(BVMultVec(ctx->M2,-1.0,1.0,y,c));
 75:   PetscFunctionReturn(PETSC_SUCCESS);
 76: }

 78: /*
 79:   Evaluates the first d elements of the polynomial basis
 80:   on a given matrix H which is considered to be triangular
 81: */
 82: static PetscErrorCode PEPEvaluateBasisforMatrix(PEP pep,PetscInt nm,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH)
 83: {
 84:   PetscInt       i,j,ldfh=nm*k,off,nmat=pep->nmat;
 85:   PetscReal      *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat,t;
 86:   PetscScalar    corr=0.0,alpha,beta;
 87:   PetscBLASInt   k_,ldh_,ldfh_;

 89:   PetscFunctionBegin;
 90:   PetscCall(PetscBLASIntCast(ldh,&ldh_));
 91:   PetscCall(PetscBLASIntCast(k,&k_));
 92:   PetscCall(PetscBLASIntCast(ldfh,&ldfh_));
 93:   PetscCall(PetscArrayzero(fH,nm*k*k));
 94:   if (nm>0) for (j=0;j<k;j++) fH[j+j*ldfh] = 1.0;
 95:   if (nm>1) {
 96:     t = b[0]/a[0];
 97:     off = k;
 98:     for (j=0;j<k;j++) {
 99:       for (i=0;i<k;i++) fH[off+i+j*ldfh] = H[i+j*ldh]/a[0];
100:       fH[j+j*ldfh] -= t;
101:     }
102:   }
103:   for (i=2;i<nm;i++) {
104:     off = i*k;
105:     if (i==2) {
106:       for (j=0;j<k;j++) {
107:         fH[off+j+j*ldfh] = 1.0;
108:         H[j+j*ldh] -= b[1];
109:       }
110:     } else {
111:       for (j=0;j<k;j++) {
112:         PetscCall(PetscArraycpy(fH+off+j*ldfh,fH+(i-2)*k+j*ldfh,k));
113:         H[j+j*ldh] += corr-b[i-1];
114:       }
115:     }
116:     corr  = b[i-1];
117:     beta  = -g[i-1]/a[i-1];
118:     alpha = 1/a[i-1];
119:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&alpha,H,&ldh_,fH+(i-1)*k,&ldfh_,&beta,fH+off,&ldfh_));
120:   }
121:   for (j=0;j<k;j++) H[j+j*ldh] += corr;
122:   PetscFunctionReturn(PETSC_SUCCESS);
123: }

125: static PetscErrorCode NRefSysSetup_shell(PEP pep,PetscInt k,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,PEP_REFINE_MATSHELL *ctx)
126: {
127:   PetscScalar       *DHii,*T12,*Tr,*Ts,*array,s,ss,sone=1.0,zero=0.0,*M4=ctx->M4,t,*v,*T;
128:   const PetscScalar *m3,*m2;
129:   PetscInt          i,d,j,nmat=pep->nmat,lda=nmat*k,deg=nmat-1,nloc,ld2,ld3;
130:   PetscReal         *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
131:   PetscBLASInt      k_,lda_,lds_,nloc_,ld2_,one=1,info;
132:   Mat               *A=ctx->A,Mk,M1=ctx->M1,P;
133:   BV                V=ctx->V,M2=ctx->M2,M3=ctx->M3,W=ctx->W;
134:   MatStructure      str;
135:   Vec               vc;

137:   PetscFunctionBegin;
138:   PetscCall(STGetMatStructure(pep->st,&str));
139:   PetscCall(PetscMalloc3(nmat*k*k,&T12,k*k,&Tr,PetscMax(k*k,nmat),&Ts));
140:   DHii = T12;
141:   PetscCall(PetscArrayzero(DHii,k*k*nmat));
142:   for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
143:   for (d=2;d<nmat;d++) {
144:     for (j=0;j<k;j++) {
145:       for (i=0;i<k;i++) {
146:         DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/(a[d-1]);
147:       }
148:     }
149:   }
150:   /* T11 */
151:   if (!ctx->compM1) {
152:     PetscCall(MatCopy(A[0],M1,DIFFERENT_NONZERO_PATTERN));
153:     PetscCall(PEPEvaluateBasis(pep,h,0,Ts,NULL));
154:     for (j=1;j<nmat;j++) PetscCall(MatAXPY(M1,Ts[j],A[j],str));
155:   }

157:   /* T22 */
158:   PetscCall(PetscBLASIntCast(lds,&lds_));
159:   PetscCall(PetscBLASIntCast(k,&k_));
160:   PetscCall(PetscBLASIntCast(lda,&lda_));
161:   PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
162:   for (i=1;i<deg;i++) {
163:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
164:     s = (i==1)?0.0:1.0;
165:     PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,M4,&k_));
166:   }
167:   for (i=0;i<k;i++) for (j=0;j<i;j++) { t=M4[i+j*k];M4[i+j*k]=M4[j+i*k];M4[j+i*k]=t; }

169:   /* T12 */
170:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&Mk));
171:   for (i=1;i<nmat;i++) {
172:     PetscCall(MatDenseGetArrayWrite(Mk,&array));
173:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,array,&k_));
174:     PetscCall(MatDenseRestoreArrayWrite(Mk,&array));
175:     PetscCall(BVSetActiveColumns(W,0,k));
176:     PetscCall(BVMult(W,1.0,0.0,V,Mk));
177:     if (i==1) PetscCall(BVMatMult(W,A[i],M2));
178:     else {
179:       PetscCall(BVMatMult(W,A[i],M3)); /* using M3 as work space */
180:       PetscCall(BVMult(M2,1.0,1.0,M3,NULL));
181:     }
182:   }

184:   /* T21 */
185:   PetscCall(MatDenseGetArrayWrite(Mk,&array));
186:   for (i=1;i<deg;i++) {
187:     s = (i==1)?0.0:1.0;
188:     ss = PetscConj(fh[i]);
189:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,array,&k_));
190:   }
191:   PetscCall(MatDenseRestoreArrayWrite(Mk,&array));
192:   PetscCall(BVSetActiveColumns(M3,0,k));
193:   PetscCall(BVMult(M3,1.0,0.0,V,Mk));
194:   for (i=0;i<k;i++) {
195:     PetscCall(BVGetColumn(M3,i,&vc));
196:     PetscCall(VecConjugate(vc));
197:     PetscCall(BVRestoreColumn(M3,i,&vc));
198:   }
199:   PetscCall(MatDestroy(&Mk));
200:   PetscCall(PetscFree3(T12,Tr,Ts));

202:   PetscCall(VecGetLocalSize(ctx->t,&nloc));
203:   PetscCall(PetscBLASIntCast(nloc,&nloc_));
204:   PetscCall(PetscMalloc1(nloc*k,&T));
205:   PetscCall(KSPGetOperators(pep->refineksp,NULL,&P));
206:   if (!ctx->compM1) PetscCall(MatCopy(ctx->M1,P,SAME_NONZERO_PATTERN));
207:   PetscCall(BVGetArrayRead(ctx->M2,&m2));
208:   PetscCall(BVGetLeadingDimension(ctx->M2,&ld2));
209:   PetscCall(PetscBLASIntCast(ld2,&ld2_));
210:   PetscCall(BVGetArrayRead(ctx->M3,&m3));
211:   PetscCall(BVGetLeadingDimension(ctx->M3,&ld3));
212:   PetscCall(VecGetArray(ctx->t,&v));
213:   for (i=0;i<nloc;i++) for (j=0;j<k;j++) T[j+i*k] = m3[i+j*ld3];
214:   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
215:   PetscCallBLAS("LAPACKgesv",LAPACKgesv_(&k_,&nloc_,ctx->M4,&k_,ctx->pM4,T,&k_,&info));
216:   PetscCall(PetscFPTrapPop());
217:   SlepcCheckLapackInfo("gesv",info);
218:   for (i=0;i<nloc;i++) v[i] = BLASdot_(&k_,m2+i,&ld2_,T+i*k,&one);
219:   PetscCall(VecRestoreArray(ctx->t,&v));
220:   PetscCall(BVRestoreArrayRead(ctx->M2,&m2));
221:   PetscCall(BVRestoreArrayRead(ctx->M3,&m3));
222:   PetscCall(MatDiagonalSet(P,ctx->t,ADD_VALUES));
223:   PetscCall(PetscFree(T));
224:   PetscCall(KSPSetUp(pep->refineksp));
225:   PetscFunctionReturn(PETSC_SUCCESS);
226: }

228: static PetscErrorCode NRefSysSolve_shell(KSP ksp,PetscInt nmat,Vec Rv,PetscScalar *Rh,PetscInt k,Vec dVi,PetscScalar *dHi)
229: {
230:   PetscScalar         *t0;
231:   PetscBLASInt        k_,one=1,info,lda_;
232:   PetscInt            i,lda=nmat*k;
233:   Mat                 M;
234:   PEP_REFINE_MATSHELL *ctx;

236:   PetscFunctionBegin;
237:   PetscCall(KSPGetOperators(ksp,&M,NULL));
238:   PetscCall(MatShellGetContext(M,&ctx));
239:   PetscCall(PetscCalloc1(k,&t0));
240:   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
241:   PetscCall(PetscBLASIntCast(lda,&lda_));
242:   PetscCall(PetscBLASIntCast(k,&k_));
243:   for (i=0;i<k;i++) t0[i] = Rh[i];
244:   PetscCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,t0,&k_,&info));
245:   SlepcCheckLapackInfo("getrs",info);
246:   PetscCall(BVMultVec(ctx->M2,-1.0,1.0,Rv,t0));
247:   PetscCall(KSPSolve(ksp,Rv,dVi));
248:   PetscCall(VecConjugate(dVi));
249:   PetscCall(BVDotVec(ctx->M3,dVi,dHi));
250:   PetscCall(VecConjugate(dVi));
251:   for (i=0;i<k;i++) dHi[i] = Rh[i]-PetscConj(dHi[i]);
252:   PetscCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,dHi,&k_,&info));
253:   SlepcCheckLapackInfo("getrs",info);
254:   PetscCall(PetscFPTrapPop());
255:   PetscCall(PetscFree(t0));
256:   PetscFunctionReturn(PETSC_SUCCESS);
257: }

259: /*
260:    Computes the residual P(H,V*S)*e_j for the polynomial
261: */
262: static PetscErrorCode NRefRightSide(PetscInt nmat,PetscReal *pcf,Mat *A,PetscInt k,BV V,PetscScalar *S,PetscInt lds,PetscInt j,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *DfH,PetscScalar *dH,BV dV,PetscScalar *dVS,PetscInt rds,Vec Rv,PetscScalar *Rh,BV W,Vec t)
263: {
264:   PetscScalar    *DS0,*DS1,*F,beta=0.0,sone=1.0,none=-1.0,tt=0.0,*h,zero=0.0,*Z,*c0;
265:   PetscReal      *a=pcf,*b=pcf+nmat,*g=b+nmat;
266:   PetscInt       i,ii,jj,lda;
267:   PetscBLASInt   lda_,k_,ldh_,lds_,nmat_,k2_,krds_,j_,one=1;
268:   Mat            M0;
269:   Vec            w;

271:   PetscFunctionBegin;
272:   PetscCall(PetscMalloc4(k*nmat,&h,k*k,&DS0,k*k,&DS1,k*k,&Z));
273:   lda = k*nmat;
274:   PetscCall(PetscBLASIntCast(k,&k_));
275:   PetscCall(PetscBLASIntCast(lds,&lds_));
276:   PetscCall(PetscBLASIntCast(lda,&lda_));
277:   PetscCall(PetscBLASIntCast(nmat,&nmat_));
278:   PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&nmat_,&k_,&sone,S,&lds_,fH+j*lda,&k_,&zero,h,&k_));
279:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,k,nmat,h,&M0));
280:   PetscCall(BVSetActiveColumns(W,0,nmat));
281:   PetscCall(BVMult(W,1.0,0.0,V,M0));
282:   PetscCall(MatDestroy(&M0));

284:   PetscCall(BVGetColumn(W,0,&w));
285:   PetscCall(MatMult(A[0],w,Rv));
286:   PetscCall(BVRestoreColumn(W,0,&w));
287:   for (i=1;i<nmat;i++) {
288:     PetscCall(BVGetColumn(W,i,&w));
289:     PetscCall(MatMult(A[i],w,t));
290:     PetscCall(BVRestoreColumn(W,i,&w));
291:     PetscCall(VecAXPY(Rv,1.0,t));
292:   }
293:   /* Update right-hand side */
294:   if (j) {
295:     PetscCall(PetscBLASIntCast(ldh,&ldh_));
296:     PetscCall(PetscArrayzero(Z,k*k));
297:     PetscCall(PetscArrayzero(DS0,k*k));
298:     PetscCall(PetscArraycpy(Z+(j-1)*k,dH+(j-1)*k,k));
299:     /* Update DfH */
300:     for (i=1;i<nmat;i++) {
301:       if (i>1) {
302:         beta = -g[i-1];
303:         PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,fH+(i-1)*k,&lda_,Z,&k_,&beta,DS0,&k_));
304:         tt += -b[i-1];
305:         for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
306:         tt = b[i-1];
307:         beta = 1.0/a[i-1];
308:         PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&beta,DS1,&k_,H,&ldh_,&beta,DS0,&k_));
309:         F = DS0; DS0 = DS1; DS1 = F;
310:       } else {
311:         PetscCall(PetscArrayzero(DS1,k*k));
312:         for (ii=0;ii<k;ii++) DS1[ii+(j-1)*k] = Z[ii+(j-1)*k]/a[0];
313:       }
314:       for (jj=j;jj<k;jj++) {
315:         for (ii=0;ii<k;ii++) DfH[k*i+ii+jj*lda] += DS1[ii+jj*k];
316:       }
317:     }
318:     for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
319:     /* Update right-hand side */
320:     PetscCall(PetscBLASIntCast(2*k,&k2_));
321:     PetscCall(PetscBLASIntCast(j,&j_));
322:     PetscCall(PetscBLASIntCast(k+rds,&krds_));
323:     c0 = DS0;
324:     PetscCall(PetscArrayzero(Rh,k));
325:     for (i=0;i<nmat;i++) {
326:       PetscCallBLAS("BLASgemv",BLASgemv_("N",&krds_,&j_,&sone,dVS,&k2_,fH+j*lda+i*k,&one,&zero,h,&one));
327:       PetscCallBLAS("BLASgemv",BLASgemv_("N",&k_,&k_,&sone,S,&lds_,DfH+i*k+j*lda,&one,&sone,h,&one));
328:       PetscCall(BVMultVec(V,1.0,0.0,t,h));
329:       PetscCall(BVSetActiveColumns(dV,0,rds));
330:       PetscCall(BVMultVec(dV,1.0,1.0,t,h+k));
331:       PetscCall(BVGetColumn(W,i,&w));
332:       PetscCall(MatMult(A[i],t,w));
333:       PetscCall(BVRestoreColumn(W,i,&w));
334:       if (i>0 && i<nmat-1) {
335:         PetscCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&sone,S,&lds_,h,&one,&zero,c0,&one));
336:         PetscCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&none,fH+i*k,&lda_,c0,&one,&sone,Rh,&one));
337:       }
338:     }

340:     for (i=0;i<nmat;i++) h[i] = -1.0;
341:     PetscCall(BVMultVec(W,1.0,1.0,Rv,h));
342:   }
343:   PetscCall(PetscFree4(h,DS0,DS1,Z));
344:   PetscFunctionReturn(PETSC_SUCCESS);
345: }

347: static PetscErrorCode NRefSysSolve_mbe(PetscInt k,PetscInt sz,BV W,PetscScalar *w,BV Wt,PetscScalar *wt,PetscScalar *d,PetscScalar *dt,KSP ksp,BV T2,BV T3 ,PetscScalar *T4,PetscBool trans,Vec x1,PetscScalar *x2,Vec sol1,PetscScalar *sol2,Vec vw)
348: {
349:   PetscInt       i,j,incf,incc;
350:   PetscScalar    *y,*g,*xx2,*ww,y2,*dd;
351:   Vec            v,t,xx1;
352:   BV             WW,T;

354:   PetscFunctionBegin;
355:   PetscCall(PetscMalloc3(sz,&y,sz,&g,k,&xx2));
356:   if (trans) {
357:     WW = W; ww = w; dd = d; T = T3; incf = 0; incc = 1;
358:   } else {
359:     WW = Wt; ww = wt; dd = dt; T = T2; incf = 1; incc = 0;
360:   }
361:   xx1 = vw;
362:   PetscCall(VecCopy(x1,xx1));
363:   PetscCall(PetscArraycpy(xx2,x2,sz));
364:   PetscCall(PetscArrayzero(sol2,k));
365:   for (i=sz-1;i>=0;i--) {
366:     PetscCall(BVGetColumn(WW,i,&v));
367:     PetscCall(VecConjugate(v));
368:     PetscCall(VecDot(xx1,v,y+i));
369:     PetscCall(VecConjugate(v));
370:     PetscCall(BVRestoreColumn(WW,i,&v));
371:     for (j=0;j<i;j++) y[i] += ww[j+i*k]*xx2[j];
372:     y[i] = -(y[i]-xx2[i])/dd[i];
373:     PetscCall(BVGetColumn(T,i,&t));
374:     PetscCall(VecAXPY(xx1,-y[i],t));
375:     PetscCall(BVRestoreColumn(T,i,&t));
376:     for (j=0;j<=i;j++) xx2[j] -= y[i]*T4[j*incf+incc*i+(i*incf+incc*j)*k];
377:     g[i] = xx2[i];
378:   }
379:   if (trans) PetscCall(KSPSolveTranspose(ksp,xx1,sol1));
380:   else PetscCall(KSPSolve(ksp,xx1,sol1));
381:   if (trans) {
382:     WW = Wt; ww = wt; dd = dt; T = T2; incf = 1; incc = 0;
383:   } else {
384:     WW = W; ww = w; dd = d; T = T3; incf = 0; incc = 1;
385:   }
386:   for (i=0;i<sz;i++) {
387:     PetscCall(BVGetColumn(T,i,&t));
388:     PetscCall(VecConjugate(t));
389:     PetscCall(VecDot(sol1,t,&y2));
390:     PetscCall(VecConjugate(t));
391:     PetscCall(BVRestoreColumn(T,i,&t));
392:     for (j=0;j<i;j++) y2 += sol2[j]*T4[j*incf+incc*i+(i*incf+incc*j)*k];
393:     y2 = (g[i]-y2)/dd[i];
394:     PetscCall(BVGetColumn(WW,i,&v));
395:     PetscCall(VecAXPY(sol1,-y2,v));
396:     for (j=0;j<i;j++) sol2[j] -= ww[j+i*k]*y2;
397:     sol2[i] = y[i]+y2;
398:     PetscCall(BVRestoreColumn(WW,i,&v));
399:   }
400:   PetscCall(PetscFree3(y,g,xx2));
401:   PetscFunctionReturn(PETSC_SUCCESS);
402: }

404: static PetscErrorCode NRefSysSetup_mbe(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,BV V,PEP_REFINE_EXPLICIT *matctx)
405: {
406:   PetscInt       i,j,l,nmat=pep->nmat,lda=nmat*k,deg=nmat-1;
407:   Mat            M1=matctx->M1,*A,*At,Mk;
408:   PetscReal      *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
409:   PetscScalar    s,ss,*DHii,*T12,*array,*Ts,*Tr,*M4=matctx->M4,sone=1.0,zero=0.0;
410:   PetscScalar    *w=matctx->w,*wt=matctx->wt,*d=matctx->d,*dt=matctx->dt;
411:   PetscBLASInt   lds_,lda_,k_;
412:   MatStructure   str;
413:   PetscBool      flg;
414:   BV             M2=matctx->M2,M3=matctx->M3,W=matctx->W,Wt=matctx->Wt;
415:   Vec            vc,vc2;

417:   PetscFunctionBegin;
418:   PetscCall(PetscMalloc3(nmat*k*k,&T12,k*k,&Tr,PetscMax(k*k,nmat),&Ts));
419:   PetscCall(STGetMatStructure(pep->st,&str));
420:   PetscCall(STGetTransform(pep->st,&flg));
421:   if (flg) {
422:     PetscCall(PetscMalloc1(pep->nmat,&At));
423:     for (i=0;i<pep->nmat;i++) PetscCall(STGetMatrixTransformed(pep->st,i,&At[i]));
424:   } else At = pep->A;
425:   if (matctx->subc) A = matctx->A;
426:   else A = At;
427:   /* Form the explicit system matrix */
428:   DHii = T12;
429:   PetscCall(PetscArrayzero(DHii,k*k*nmat));
430:   for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
431:   for (l=2;l<nmat;l++) {
432:     for (j=0;j<k;j++) {
433:       for (i=0;i<k;i++) {
434:         DHii[l*k+i+j*lda] = ((h-b[l-1])*DHii[(l-1)*k+i+j*lda]+fH[(l-1)*k+i+j*lda]-g[l-1]*DHii[(l-2)*k+i+j*lda])/a[l-1];
435:       }
436:     }
437:   }

439:   /* T11 */
440:   if (!matctx->compM1) {
441:     PetscCall(MatCopy(A[0],M1,DIFFERENT_NONZERO_PATTERN));
442:     PetscCall(PEPEvaluateBasis(pep,h,0,Ts,NULL));
443:     for (j=1;j<nmat;j++) PetscCall(MatAXPY(M1,Ts[j],A[j],str));
444:   }

446:   /* T22 */
447:   PetscCall(PetscBLASIntCast(lds,&lds_));
448:   PetscCall(PetscBLASIntCast(k,&k_));
449:   PetscCall(PetscBLASIntCast(lda,&lda_));
450:   PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
451:   for (i=1;i<deg;i++) {
452:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
453:     s = (i==1)?0.0:1.0;
454:     PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,M4,&k_));
455:   }

457:   /* T12 */
458:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&Mk));
459:   for (i=1;i<nmat;i++) {
460:     PetscCall(MatDenseGetArrayWrite(Mk,&array));
461:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,array,&k_));
462:     PetscCall(MatDenseRestoreArrayWrite(Mk,&array));
463:     PetscCall(BVSetActiveColumns(W,0,k));
464:     PetscCall(BVMult(W,1.0,0.0,V,Mk));
465:     if (i==1) PetscCall(BVMatMult(W,A[i],M2));
466:     else {
467:       PetscCall(BVMatMult(W,A[i],M3)); /* using M3 as work space */
468:       PetscCall(BVMult(M2,1.0,1.0,M3,NULL));
469:     }
470:   }

472:   /* T21 */
473:   PetscCall(MatDenseGetArrayWrite(Mk,&array));
474:   for (i=1;i<deg;i++) {
475:     s = (i==1)?0.0:1.0;
476:     ss = PetscConj(fh[i]);
477:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,array,&k_));
478:   }
479:   PetscCall(MatDenseRestoreArrayWrite(Mk,&array));
480:   PetscCall(BVSetActiveColumns(M3,0,k));
481:   PetscCall(BVMult(M3,1.0,0.0,V,Mk));
482:   for (i=0;i<k;i++) {
483:     PetscCall(BVGetColumn(M3,i,&vc));
484:     PetscCall(VecConjugate(vc));
485:     PetscCall(BVRestoreColumn(M3,i,&vc));
486:   }

488:   PetscCall(PEP_KSPSetOperators(ksp,M1,M1));
489:   PetscCall(KSPSetUp(ksp));
490:   PetscCall(MatDestroy(&Mk));

492:   /* Set up for BEMW */
493:   for (i=0;i<k;i++) {
494:     PetscCall(BVGetColumn(M2,i,&vc));
495:     PetscCall(BVGetColumn(W,i,&vc2));
496:     PetscCall(NRefSysSolve_mbe(k,i,W,w,Wt,wt,d,dt,ksp,M2,M3,M4,PETSC_FALSE,vc,M4+i*k,vc2,w+i*k,matctx->t));
497:     PetscCall(BVRestoreColumn(M2,i,&vc));
498:     PetscCall(BVGetColumn(M3,i,&vc));
499:     PetscCall(VecConjugate(vc));
500:     PetscCall(VecDot(vc2,vc,&d[i]));
501:     PetscCall(VecConjugate(vc));
502:     PetscCall(BVRestoreColumn(M3,i,&vc));
503:     for (j=0;j<i;j++) d[i] += M4[i+j*k]*w[j+i*k];
504:     d[i] = M4[i+i*k]-d[i];
505:     PetscCall(BVRestoreColumn(W,i,&vc2));

507:     PetscCall(BVGetColumn(M3,i,&vc));
508:     PetscCall(BVGetColumn(Wt,i,&vc2));
509:     for (j=0;j<=i;j++) Ts[j] = M4[i+j*k];
510:     PetscCall(NRefSysSolve_mbe(k,i,W,w,Wt,wt,d,dt,ksp,M2,M3,M4,PETSC_TRUE,vc,Ts,vc2,wt+i*k,matctx->t));
511:     PetscCall(BVRestoreColumn(M3,i,&vc));
512:     PetscCall(BVGetColumn(M2,i,&vc));
513:     PetscCall(VecConjugate(vc2));
514:     PetscCall(VecDot(vc,vc2,&dt[i]));
515:     PetscCall(VecConjugate(vc2));
516:     PetscCall(BVRestoreColumn(M2,i,&vc));
517:     for (j=0;j<i;j++) dt[i] += M4[j+i*k]*wt[j+i*k];
518:     dt[i] = M4[i+i*k]-dt[i];
519:     PetscCall(BVRestoreColumn(Wt,i,&vc2));
520:   }

522:   if (flg) PetscCall(PetscFree(At));
523:   PetscCall(PetscFree3(T12,Tr,Ts));
524:   PetscFunctionReturn(PETSC_SUCCESS);
525: }

527: static PetscErrorCode NRefSysSetup_explicit(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,BV V,PEP_REFINE_EXPLICIT *matctx,BV W)
528: {
529:   PetscInt          i,j,d,n,n0,m0,n1,m1,nmat=pep->nmat,lda=nmat*k,deg=nmat-1;
530:   PetscInt          *idxg=matctx->idxg,*idxp=matctx->idxp,idx,ncols;
531:   Mat               M,*E=matctx->E,*A,*At,Mk,Md;
532:   PetscReal         *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
533:   PetscScalar       s,ss,*DHii,*T22,*T21,*T12,*Ts,*Tr,*array,*ts,sone=1.0,zero=0.0;
534:   PetscBLASInt      lds_,lda_,k_;
535:   const PetscInt    *idxmc;
536:   const PetscScalar *valsc,*carray;
537:   MatStructure      str;
538:   Vec               vc,vc0;
539:   PetscBool         flg;

541:   PetscFunctionBegin;
542:   PetscCall(PetscMalloc5(k*k,&T22,k*k,&T21,nmat*k*k,&T12,k*k,&Tr,k*k,&Ts));
543:   PetscCall(STGetMatStructure(pep->st,&str));
544:   PetscCall(KSPGetOperators(ksp,&M,NULL));
545:   PetscCall(MatGetOwnershipRange(E[1],&n1,&m1));
546:   PetscCall(MatGetOwnershipRange(E[0],&n0,&m0));
547:   PetscCall(MatGetOwnershipRange(M,&n,NULL));
548:   PetscCall(PetscMalloc1(nmat,&ts));
549:   PetscCall(STGetTransform(pep->st,&flg));
550:   if (flg) {
551:     PetscCall(PetscMalloc1(pep->nmat,&At));
552:     for (i=0;i<pep->nmat;i++) PetscCall(STGetMatrixTransformed(pep->st,i,&At[i]));
553:   } else At = pep->A;
554:   if (matctx->subc) A = matctx->A;
555:   else A = At;
556:   /* Form the explicit system matrix */
557:   DHii = T12;
558:   PetscCall(PetscArrayzero(DHii,k*k*nmat));
559:   for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
560:   for (d=2;d<nmat;d++) {
561:     for (j=0;j<k;j++) {
562:       for (i=0;i<k;i++) {
563:         DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/a[d-1];
564:       }
565:     }
566:   }

568:   /* T11 */
569:   if (!matctx->compM1) {
570:     PetscCall(MatCopy(A[0],E[0],DIFFERENT_NONZERO_PATTERN));
571:     PetscCall(PEPEvaluateBasis(pep,h,0,Ts,NULL));
572:     for (j=1;j<nmat;j++) PetscCall(MatAXPY(E[0],Ts[j],A[j],str));
573:   }
574:   for (i=n0;i<m0;i++) {
575:     PetscCall(MatGetRow(E[0],i,&ncols,&idxmc,&valsc));
576:     idx = n+i-n0;
577:     for (j=0;j<ncols;j++) {
578:       idxg[j] = matctx->map0[idxmc[j]];
579:     }
580:     PetscCall(MatSetValues(M,1,&idx,ncols,idxg,valsc,INSERT_VALUES));
581:     PetscCall(MatRestoreRow(E[0],i,&ncols,&idxmc,&valsc));
582:   }

584:   /* T22 */
585:   PetscCall(PetscBLASIntCast(lds,&lds_));
586:   PetscCall(PetscBLASIntCast(k,&k_));
587:   PetscCall(PetscBLASIntCast(lda,&lda_));
588:   PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
589:   for (i=1;i<deg;i++) {
590:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
591:     s = (i==1)?0.0:1.0;
592:     PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,T22,&k_));
593:   }
594:   for (j=0;j<k;j++) idxp[j] = matctx->map1[j];
595:   for (i=0;i<m1-n1;i++) {
596:     idx = n+m0-n0+i;
597:     for (j=0;j<k;j++) {
598:       Tr[j] = T22[n1+i+j*k];
599:     }
600:     PetscCall(MatSetValues(M,1,&idx,k,idxp,Tr,INSERT_VALUES));
601:   }

603:   /* T21 */
604:   for (i=1;i<deg;i++) {
605:     s = (i==1)?0.0:1.0;
606:     ss = PetscConj(fh[i]);
607:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,T21,&k_));
608:   }
609:   PetscCall(BVSetActiveColumns(W,0,k));
610:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,k,k,T21,&Mk));
611:   PetscCall(BVMult(W,1.0,0.0,V,Mk));
612:   for (i=0;i<k;i++) {
613:     PetscCall(BVGetColumn(W,i,&vc));
614:     PetscCall(VecConjugate(vc));
615:     PetscCall(VecGetArrayRead(vc,&carray));
616:     idx = matctx->map1[i];
617:     PetscCall(MatSetValues(M,1,&idx,m0-n0,matctx->map0+n0,carray,INSERT_VALUES));
618:     PetscCall(VecRestoreArrayRead(vc,&carray));
619:     PetscCall(BVRestoreColumn(W,i,&vc));
620:   }

622:   /* T12 */
623:   for (i=1;i<nmat;i++) {
624:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,Ts,&k_));
625:     for (j=0;j<k;j++) PetscCall(PetscArraycpy(T12+i*k+j*lda,Ts+j*k,k));
626:   }
627:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,k,nmat-1,NULL,&Md));
628:   for (i=0;i<nmat;i++) ts[i] = 1.0;
629:   for (j=0;j<k;j++) {
630:     PetscCall(MatDenseGetArrayWrite(Md,&array));
631:     PetscCall(PetscArraycpy(array,T12+k+j*lda,(nmat-1)*k));
632:     PetscCall(MatDenseRestoreArrayWrite(Md,&array));
633:     PetscCall(BVSetActiveColumns(W,0,nmat-1));
634:     PetscCall(BVMult(W,1.0,0.0,V,Md));
635:     for (i=nmat-1;i>0;i--) {
636:       PetscCall(BVGetColumn(W,i-1,&vc0));
637:       PetscCall(BVGetColumn(W,i,&vc));
638:       PetscCall(MatMult(A[i],vc0,vc));
639:       PetscCall(BVRestoreColumn(W,i-1,&vc0));
640:       PetscCall(BVRestoreColumn(W,i,&vc));
641:     }
642:     PetscCall(BVSetActiveColumns(W,1,nmat));
643:     PetscCall(BVGetColumn(W,0,&vc0));
644:     PetscCall(BVMultVec(W,1.0,0.0,vc0,ts));
645:     PetscCall(VecGetArrayRead(vc0,&carray));
646:     idx = matctx->map1[j];
647:     PetscCall(MatSetValues(M,m0-n0,matctx->map0+n0,1,&idx,carray,INSERT_VALUES));
648:     PetscCall(VecRestoreArrayRead(vc0,&carray));
649:     PetscCall(BVRestoreColumn(W,0,&vc0));
650:   }
651:   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
652:   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
653:   PetscCall(PEP_KSPSetOperators(ksp,M,M));
654:   PetscCall(KSPSetUp(ksp));
655:   PetscCall(PetscFree(ts));
656:   PetscCall(MatDestroy(&Mk));
657:   PetscCall(MatDestroy(&Md));
658:   if (flg) PetscCall(PetscFree(At));
659:   PetscCall(PetscFree5(T22,T21,T12,Tr,Ts));
660:   PetscFunctionReturn(PETSC_SUCCESS);
661: }

663: static PetscErrorCode NRefSysSolve_explicit(PetscInt k,KSP ksp,Vec Rv,PetscScalar *Rh,Vec dVi,PetscScalar *dHi,PEP_REFINE_EXPLICIT *matctx)
664: {
665:   PetscInt          n0,m0,n1,m1,i;
666:   PetscScalar       *arrayV;
667:   const PetscScalar *array;

669:   PetscFunctionBegin;
670:   PetscCall(MatGetOwnershipRange(matctx->E[1],&n1,&m1));
671:   PetscCall(MatGetOwnershipRange(matctx->E[0],&n0,&m0));

673:   /* Right side */
674:   PetscCall(VecGetArrayRead(Rv,&array));
675:   PetscCall(VecSetValues(matctx->tN,m0-n0,matctx->map0+n0,array,INSERT_VALUES));
676:   PetscCall(VecRestoreArrayRead(Rv,&array));
677:   PetscCall(VecSetValues(matctx->tN,m1-n1,matctx->map1+n1,Rh+n1,INSERT_VALUES));
678:   PetscCall(VecAssemblyBegin(matctx->tN));
679:   PetscCall(VecAssemblyEnd(matctx->tN));

681:   /* Solve */
682:   PetscCall(KSPSolve(ksp,matctx->tN,matctx->ttN));

684:   /* Retrieve solution */
685:   PetscCall(VecGetArray(dVi,&arrayV));
686:   PetscCall(VecGetArrayRead(matctx->ttN,&array));
687:   PetscCall(PetscArraycpy(arrayV,array,m0-n0));
688:   PetscCall(VecRestoreArray(dVi,&arrayV));
689:   if (!matctx->subc) {
690:     PetscCall(VecGetArray(matctx->t1,&arrayV));
691:     for (i=0;i<m1-n1;i++) arrayV[i] =  array[m0-n0+i];
692:     PetscCall(VecRestoreArray(matctx->t1,&arrayV));
693:     PetscCall(VecRestoreArrayRead(matctx->ttN,&array));
694:     PetscCall(VecScatterBegin(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD));
695:     PetscCall(VecScatterEnd(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD));
696:     PetscCall(VecGetArrayRead(matctx->vseq,&array));
697:     for (i=0;i<k;i++) dHi[i] = array[i];
698:     PetscCall(VecRestoreArrayRead(matctx->vseq,&array));
699:   }
700:   PetscFunctionReturn(PETSC_SUCCESS);
701: }

703: static PetscErrorCode NRefSysIter(PetscInt i,PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar *H,PetscInt ldh,Vec Rv,PetscScalar *Rh,BV V,Vec dVi,PetscScalar *dHi,PEP_REFINE_EXPLICIT *matctx,BV W)
704: {
705:   PetscInt            j,m,lda=pep->nmat*k,n0,m0,idx;
706:   PetscMPIInt         root,len;
707:   PetscScalar         *array2,h;
708:   const PetscScalar   *array;
709:   Vec                 R,Vi;
710:   PEP_REFINE_MATSHELL *ctx;
711:   Mat                 M;

713:   PetscFunctionBegin;
714:   if (!matctx || !matctx->subc) {
715:     for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+i+i*lda];
716:     h   = H[i+i*ldh];
717:     idx = i;
718:     R   = Rv;
719:     Vi  = dVi;
720:     switch (pep->scheme) {
721:     case PEP_REFINE_SCHEME_EXPLICIT:
722:       PetscCall(NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,V,matctx,W));
723:       matctx->compM1 = PETSC_FALSE;
724:       break;
725:     case PEP_REFINE_SCHEME_MBE:
726:       PetscCall(NRefSysSetup_mbe(pep,k,ksp,fH,S,lds,fh,h,V,matctx));
727:       matctx->compM1 = PETSC_FALSE;
728:       break;
729:     case PEP_REFINE_SCHEME_SCHUR:
730:       PetscCall(KSPGetOperators(ksp,&M,NULL));
731:       PetscCall(MatShellGetContext(M,&ctx));
732:       PetscCall(NRefSysSetup_shell(pep,k,fH,S,lds,fh,h,ctx));
733:       ctx->compM1 = PETSC_FALSE;
734:       break;
735:     }
736:   } else {
737:     if (i%matctx->subc->n==0 && (idx=i+matctx->subc->color)<k) {
738:       for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+idx+idx*lda];
739:       h = H[idx+idx*ldh];
740:       matctx->idx = idx;
741:       switch (pep->scheme) {
742:       case PEP_REFINE_SCHEME_EXPLICIT:
743:         PetscCall(NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,matctx->V,matctx,matctx->W));
744:         matctx->compM1 = PETSC_FALSE;
745:         break;
746:       case PEP_REFINE_SCHEME_MBE:
747:         PetscCall(NRefSysSetup_mbe(pep,k,ksp,fH,S,lds,fh,h,matctx->V,matctx));
748:         matctx->compM1 = PETSC_FALSE;
749:         break;
750:       case PEP_REFINE_SCHEME_SCHUR:
751:         break;
752:       }
753:     } else idx = matctx->idx;
754:     PetscCall(VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD));
755:     PetscCall(VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD));
756:     PetscCall(VecGetArrayRead(matctx->tg,&array));
757:     PetscCall(VecPlaceArray(matctx->t,array));
758:     PetscCall(VecCopy(matctx->t,matctx->Rv));
759:     PetscCall(VecResetArray(matctx->t));
760:     PetscCall(VecRestoreArrayRead(matctx->tg,&array));
761:     R  = matctx->Rv;
762:     Vi = matctx->Vi;
763:   }
764:   if (idx==i && idx<k) {
765:     switch (pep->scheme) {
766:       case PEP_REFINE_SCHEME_EXPLICIT:
767:         PetscCall(NRefSysSolve_explicit(k,ksp,R,Rh,Vi,dHi,matctx));
768:         break;
769:       case PEP_REFINE_SCHEME_MBE:
770:         PetscCall(NRefSysSolve_mbe(k,k,matctx->W,matctx->w,matctx->Wt,matctx->wt,matctx->d,matctx->dt,ksp,matctx->M2,matctx->M3 ,matctx->M4,PETSC_FALSE,R,Rh,Vi,dHi,matctx->t));
771:         break;
772:       case PEP_REFINE_SCHEME_SCHUR:
773:         PetscCall(NRefSysSolve_shell(ksp,pep->nmat,R,Rh,k,Vi,dHi));
774:         break;
775:     }
776:   }
777:   if (matctx && matctx->subc) {
778:     PetscCall(VecGetLocalSize(Vi,&m));
779:     PetscCall(VecGetArrayRead(Vi,&array));
780:     PetscCall(VecGetArray(matctx->tg,&array2));
781:     PetscCall(PetscArraycpy(array2,array,m));
782:     PetscCall(VecRestoreArray(matctx->tg,&array2));
783:     PetscCall(VecRestoreArrayRead(Vi,&array));
784:     PetscCall(VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE));
785:     PetscCall(VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE));
786:     switch (pep->scheme) {
787:     case PEP_REFINE_SCHEME_EXPLICIT:
788:       PetscCall(MatGetOwnershipRange(matctx->E[0],&n0,&m0));
789:       PetscCall(VecGetArrayRead(matctx->ttN,&array));
790:       PetscCall(VecPlaceArray(matctx->tp,array+m0-n0));
791:       PetscCall(VecScatterBegin(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD));
792:       PetscCall(VecScatterEnd(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD));
793:       PetscCall(VecResetArray(matctx->tp));
794:       PetscCall(VecRestoreArrayRead(matctx->ttN,&array));
795:       PetscCall(VecGetArrayRead(matctx->tpg,&array));
796:       for (j=0;j<k;j++) dHi[j] = array[j];
797:       PetscCall(VecRestoreArrayRead(matctx->tpg,&array));
798:       break;
799:      case PEP_REFINE_SCHEME_MBE:
800:       root = 0;
801:       for (j=0;j<i%matctx->subc->n;j++) root += matctx->subc->subsize[j];
802:       PetscCall(PetscMPIIntCast(k,&len));
803:       PetscCallMPI(MPI_Bcast(dHi,len,MPIU_SCALAR,root,matctx->subc->dupparent));
804:       break;
805:     case PEP_REFINE_SCHEME_SCHUR:
806:       break;
807:     }
808:   }
809:   PetscFunctionReturn(PETSC_SUCCESS);
810: }

812: static PetscErrorCode PEPNRefForwardSubstitution(PEP pep,PetscInt k,PetscScalar *S,PetscInt lds,PetscScalar *H,PetscInt ldh,PetscScalar *fH,BV dV,PetscScalar *dVS,PetscInt *rds,PetscScalar *dH,PetscInt lddh,KSP ksp,PEP_REFINE_EXPLICIT *matctx)
813: {
814:   PetscInt            i,nmat=pep->nmat,lda=nmat*k;
815:   PetscScalar         *fh,*Rh,*DfH;
816:   PetscReal           norm;
817:   BV                  W;
818:   Vec                 Rv,t,dvi;
819:   PEP_REFINE_MATSHELL *ctx;
820:   Mat                 M,*At;
821:   PetscBool           flg,lindep;

823:   PetscFunctionBegin;
824:   PetscCall(PetscMalloc2(nmat*k*k,&DfH,k,&Rh));
825:   *rds = 0;
826:   PetscCall(BVCreateVec(pep->V,&Rv));
827:   switch (pep->scheme) {
828:   case PEP_REFINE_SCHEME_EXPLICIT:
829:     PetscCall(BVCreateVec(pep->V,&t));
830:     PetscCall(BVDuplicateResize(pep->V,PetscMax(k,nmat),&W));
831:     PetscCall(PetscMalloc1(nmat,&fh));
832:     break;
833:   case PEP_REFINE_SCHEME_MBE:
834:     if (matctx->subc) {
835:       PetscCall(BVCreateVec(pep->V,&t));
836:       PetscCall(BVDuplicateResize(pep->V,PetscMax(k,nmat),&W));
837:     } else {
838:       W = matctx->W;
839:       PetscCall(PetscObjectReference((PetscObject)W));
840:       t = matctx->t;
841:       PetscCall(PetscObjectReference((PetscObject)t));
842:     }
843:     PetscCall(BVScale(matctx->W,0.0));
844:     PetscCall(BVScale(matctx->Wt,0.0));
845:     PetscCall(BVScale(matctx->M2,0.0));
846:     PetscCall(BVScale(matctx->M3,0.0));
847:     PetscCall(PetscMalloc1(nmat,&fh));
848:     break;
849:   case PEP_REFINE_SCHEME_SCHUR:
850:     PetscCall(KSPGetOperators(ksp,&M,NULL));
851:     PetscCall(MatShellGetContext(M,&ctx));
852:     PetscCall(BVCreateVec(pep->V,&t));
853:     PetscCall(BVDuplicateResize(pep->V,PetscMax(k,nmat),&W));
854:     fh = ctx->fih;
855:     break;
856:   }
857:   PetscCall(PetscArrayzero(dVS,2*k*k));
858:   PetscCall(PetscArrayzero(DfH,lda*k));
859:   PetscCall(STGetTransform(pep->st,&flg));
860:   if (flg) {
861:     PetscCall(PetscMalloc1(pep->nmat,&At));
862:     for (i=0;i<pep->nmat;i++) PetscCall(STGetMatrixTransformed(pep->st,i,&At[i]));
863:   } else At = pep->A;

865:   /* Main loop for computing the i-th columns of dX and dS */
866:   for (i=0;i<k;i++) {
867:     /* Compute and update i-th column of the right hand side */
868:     PetscCall(PetscArrayzero(Rh,k));
869:     PetscCall(NRefRightSide(nmat,pep->pbc,At,k,pep->V,S,lds,i,H,ldh,fH,DfH,dH,dV,dVS,*rds,Rv,Rh,W,t));

871:     /* Update and solve system */
872:     PetscCall(BVGetColumn(dV,i,&dvi));
873:     PetscCall(NRefSysIter(i,pep,k,ksp,fH,S,lds,fh,H,ldh,Rv,Rh,pep->V,dvi,dH+i*k,matctx,W));
874:     /* Orthogonalize computed solution */
875:     PetscCall(BVOrthogonalizeVec(pep->V,dvi,dVS+i*2*k,&norm,&lindep));
876:     PetscCall(BVRestoreColumn(dV,i,&dvi));
877:     if (!lindep) {
878:       PetscCall(BVOrthogonalizeColumn(dV,i,dVS+k+i*2*k,&norm,&lindep));
879:       if (!lindep) {
880:         dVS[k+i+i*2*k] = norm;
881:         PetscCall(BVScaleColumn(dV,i,1.0/norm));
882:         (*rds)++;
883:       }
884:     }
885:   }
886:   PetscCall(BVSetActiveColumns(dV,0,*rds));
887:   PetscCall(VecDestroy(&t));
888:   PetscCall(VecDestroy(&Rv));
889:   PetscCall(BVDestroy(&W));
890:   if (flg) PetscCall(PetscFree(At));
891:   PetscCall(PetscFree2(DfH,Rh));
892:   if (pep->scheme!=PEP_REFINE_SCHEME_SCHUR) PetscCall(PetscFree(fh));
893:   PetscFunctionReturn(PETSC_SUCCESS);
894: }

896: static PetscErrorCode NRefOrthogStep(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *S,PetscInt lds)
897: {
898:   PetscInt       j,nmat=pep->nmat,deg=nmat-1,lda=nmat*k,ldg;
899:   PetscScalar    *G,*tau,sone=1.0,zero=0.0,*work;
900:   PetscBLASInt   lds_,k_,ldh_,info,ldg_,lda_;

902:   PetscFunctionBegin;
903:   PetscCall(PetscMalloc3(k,&tau,k,&work,deg*k*k,&G));
904:   PetscCall(PetscBLASIntCast(lds,&lds_));
905:   PetscCall(PetscBLASIntCast(lda,&lda_));
906:   PetscCall(PetscBLASIntCast(k,&k_));

908:   /* Form auxiliary matrix for the orthogonalization step */
909:   ldg = deg*k;
910:   PetscCall(PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH));
911:   PetscCall(PetscBLASIntCast(ldg,&ldg_));
912:   PetscCall(PetscBLASIntCast(ldh,&ldh_));
913:   for (j=0;j<deg;j++) {
914:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,fH+j*k,&lda_,&zero,G+j*k,&ldg_));
915:   }
916:   /* Orthogonalize and update S */
917:   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
918:   PetscCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&ldg_,&k_,G,&ldg_,tau,work,&k_,&info));
919:   PetscCall(PetscFPTrapPop());
920:   SlepcCheckLapackInfo("geqrf",info);
921:   PetscCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,S,&lds_));

923:   /* Update H */
924:   PetscCallBLAS("BLAStrmm",BLAStrmm_("L","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
925:   PetscCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
926:   PetscCall(PetscFree3(tau,work,G));
927:   PetscFunctionReturn(PETSC_SUCCESS);
928: }

930: static PetscErrorCode PEPNRefUpdateInvPair(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *dH,PetscScalar *S,PetscInt lds,BV dV,PetscScalar *dVS,PetscInt rds)
931: {
932:   PetscInt       i,j,nmat=pep->nmat,lda=nmat*k;
933:   PetscScalar    *tau,*array,*work;
934:   PetscBLASInt   lds_,k_,lda_,ldh_,kdrs_,info,k2_;
935:   Mat            M0;

937:   PetscFunctionBegin;
938:   PetscCall(PetscMalloc2(k,&tau,k,&work));
939:   PetscCall(PetscBLASIntCast(lds,&lds_));
940:   PetscCall(PetscBLASIntCast(lda,&lda_));
941:   PetscCall(PetscBLASIntCast(ldh,&ldh_));
942:   PetscCall(PetscBLASIntCast(k,&k_));
943:   PetscCall(PetscBLASIntCast(2*k,&k2_));
944:   PetscCall(PetscBLASIntCast((k+rds),&kdrs_));
945:   /* Update H */
946:   for (j=0;j<k;j++) {
947:     for (i=0;i<k;i++) H[i+j*ldh] -= dH[i+j*k];
948:   }
949:   /* Update V */
950:   for (j=0;j<k;j++) {
951:     for (i=0;i<k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k]+S[i+j*lds];
952:     for (i=k;i<2*k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k];
953:   }
954:   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
955:   PetscCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&kdrs_,&k_,dVS,&k2_,tau,work,&k_,&info));
956:   SlepcCheckLapackInfo("geqrf",info);
957:   /* Copy triangular matrix in S */
958:   for (j=0;j<k;j++) {
959:     for (i=0;i<=j;i++) S[i+j*lds] = dVS[i+j*2*k];
960:     for (i=j+1;i<k;i++) S[i+j*lds] = 0.0;
961:   }
962:   PetscCallBLAS("LAPACKorgqr",LAPACKorgqr_(&k2_,&k_,&k_,dVS,&k2_,tau,work,&k_,&info));
963:   SlepcCheckLapackInfo("orgqr",info);
964:   PetscCall(PetscFPTrapPop());
965:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&M0));
966:   PetscCall(MatDenseGetArrayWrite(M0,&array));
967:   for (j=0;j<k;j++) PetscCall(PetscArraycpy(array+j*k,dVS+j*2*k,k));
968:   PetscCall(MatDenseRestoreArrayWrite(M0,&array));
969:   PetscCall(BVMultInPlace(pep->V,M0,0,k));
970:   if (rds) {
971:     PetscCall(MatDenseGetArrayWrite(M0,&array));
972:     for (j=0;j<k;j++) PetscCall(PetscArraycpy(array+j*k,dVS+k+j*2*k,rds));
973:     PetscCall(MatDenseRestoreArrayWrite(M0,&array));
974:     PetscCall(BVMultInPlace(dV,M0,0,k));
975:     PetscCall(BVMult(pep->V,1.0,1.0,dV,NULL));
976:   }
977:   PetscCall(MatDestroy(&M0));
978:   PetscCall(NRefOrthogStep(pep,k,H,ldh,fH,S,lds));
979:   PetscCall(PetscFree2(tau,work));
980:   PetscFunctionReturn(PETSC_SUCCESS);
981: }

983: static PetscErrorCode PEPNRefSetUp(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PEP_REFINE_EXPLICIT *matctx,PetscBool ini)
984: {
985:   PEP_REFINE_MATSHELL *ctx;
986:   PetscScalar         t,*coef;
987:   const PetscScalar   *array;
988:   MatStructure        str;
989:   PetscInt            j,nmat=pep->nmat,n0,m0,n1,m1,n0_,m0_,n1_,m1_,N0,N1,p,*idx1,*idx2,count,si,i,l0;
990:   MPI_Comm            comm;
991:   PetscMPIInt         np;
992:   const PetscInt      *rgs0,*rgs1;
993:   Mat                 B,C,*E,*A,*At;
994:   IS                  is1,is2;
995:   Vec                 v;
996:   PetscBool           flg;
997:   Mat                 M,P;

999:   PetscFunctionBegin;
1000:   PetscCall(PetscMalloc1(nmat,&coef));
1001:   PetscCall(STGetTransform(pep->st,&flg));
1002:   if (flg) {
1003:     PetscCall(PetscMalloc1(pep->nmat,&At));
1004:     for (i=0;i<pep->nmat;i++) PetscCall(STGetMatrixTransformed(pep->st,i,&At[i]));
1005:   } else At = pep->A;
1006:   switch (pep->scheme) {
1007:   case PEP_REFINE_SCHEME_EXPLICIT:
1008:     if (ini) {
1009:       if (matctx->subc) {
1010:         A = matctx->A;
1011:         PetscCall(PetscSubcommGetChild(matctx->subc,&comm));
1012:       } else {
1013:         A = At;
1014:         PetscCall(PetscObjectGetComm((PetscObject)pep,&comm));
1015:       }
1016:       E = matctx->E;
1017:       PetscCall(STGetMatStructure(pep->st,&str));
1018:       PetscCall(MatDuplicate(A[0],MAT_COPY_VALUES,&E[0]));
1019:       j = (matctx->subc)?matctx->subc->color:0;
1020:       PetscCall(PEPEvaluateBasis(pep,H[j+j*ldh],0,coef,NULL));
1021:       for (j=1;j<nmat;j++) PetscCall(MatAXPY(E[0],coef[j],A[j],str));
1022:       PetscCall(MatCreateDense(comm,PETSC_DECIDE,PETSC_DECIDE,k,k,NULL,&E[1]));
1023:       PetscCall(MatGetOwnershipRange(E[0],&n0,&m0));
1024:       PetscCall(MatGetOwnershipRange(E[1],&n1,&m1));
1025:       PetscCall(MatGetOwnershipRangeColumn(E[0],&n0_,&m0_));
1026:       PetscCall(MatGetOwnershipRangeColumn(E[1],&n1_,&m1_));
1027:       /* T12 and T21 are computed from V and V*, so,
1028:          they must have the same column and row ranges */
1029:       PetscCheck(m0_-n0_==m0-n0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inconsistent dimensions");
1030:       PetscCall(MatCreateDense(comm,m0-n0,m1_-n1_,PETSC_DECIDE,PETSC_DECIDE,NULL,&B));
1031:       PetscCall(MatCreateDense(comm,m1-n1,m0_-n0_,PETSC_DECIDE,PETSC_DECIDE,NULL,&C));
1032:       PetscCall(MatCreateTile(1.0,E[0],1.0,B,1.0,C,1.0,E[1],&M));
1033:       PetscCall(MatDestroy(&B));
1034:       PetscCall(MatDestroy(&C));
1035:       matctx->compM1 = PETSC_TRUE;
1036:       PetscCall(MatGetSize(E[0],NULL,&N0));
1037:       PetscCall(MatGetSize(E[1],NULL,&N1));
1038:       PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M),&np));
1039:       PetscCall(MatGetOwnershipRanges(E[0],&rgs0));
1040:       PetscCall(MatGetOwnershipRanges(E[1],&rgs1));
1041:       PetscCall(PetscMalloc4(PetscMax(k,N1),&matctx->idxp,N0,&matctx->idxg,N0,&matctx->map0,N1,&matctx->map1));
1042:       /* Create column (and row) mapping */
1043:       for (p=0;p<np;p++) {
1044:         for (j=rgs0[p];j<rgs0[p+1];j++) matctx->map0[j] = j+rgs1[p];
1045:         for (j=rgs1[p];j<rgs1[p+1];j++) matctx->map1[j] = j+rgs0[p+1];
1046:       }
1047:       PetscCall(MatCreateVecs(M,NULL,&matctx->tN));
1048:       PetscCall(MatCreateVecs(matctx->E[1],NULL,&matctx->t1));
1049:       PetscCall(VecDuplicate(matctx->tN,&matctx->ttN));
1050:       if (matctx->subc) {
1051:         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pep),&np));
1052:         count = np*k;
1053:         PetscCall(PetscMalloc2(count,&idx1,count,&idx2));
1054:         PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)pep),m1-n1,PETSC_DECIDE,&matctx->tp));
1055:         PetscCall(VecGetOwnershipRange(matctx->tp,&l0,NULL));
1056:         PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)pep),k,PETSC_DECIDE,&matctx->tpg));
1057:         for (si=0;si<matctx->subc->n;si++) {
1058:           if (matctx->subc->color==si) {
1059:             j=0;
1060:             if (matctx->subc->color==si) {
1061:               for (p=0;p<np;p++) {
1062:                 for (i=n1;i<m1;i++) {
1063:                   idx1[j] = l0+i-n1;
1064:                   idx2[j++] =p*k+i;
1065:                 }
1066:               }
1067:             }
1068:             count = np*(m1-n1);
1069:           } else count =0;
1070:           PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx1,PETSC_COPY_VALUES,&is1));
1071:           PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx2,PETSC_COPY_VALUES,&is2));
1072:           PetscCall(VecScatterCreate(matctx->tp,is1,matctx->tpg,is2,&matctx->scatterp_id[si]));
1073:           PetscCall(ISDestroy(&is1));
1074:           PetscCall(ISDestroy(&is2));
1075:         }
1076:         PetscCall(PetscFree2(idx1,idx2));
1077:       } else PetscCall(VecScatterCreateToAll(matctx->t1,&matctx->scatterctx,&matctx->vseq));
1078:       P = M;
1079:     } else {
1080:       if (matctx->subc) {
1081:         /* Scatter vectors pep->V */
1082:         for (i=0;i<k;i++) {
1083:           PetscCall(BVGetColumn(pep->V,i,&v));
1084:           PetscCall(VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD));
1085:           PetscCall(VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD));
1086:           PetscCall(BVRestoreColumn(pep->V,i,&v));
1087:           PetscCall(VecGetArrayRead(matctx->tg,&array));
1088:           PetscCall(VecPlaceArray(matctx->t,(const PetscScalar*)array));
1089:           PetscCall(BVInsertVec(matctx->V,i,matctx->t));
1090:           PetscCall(VecResetArray(matctx->t));
1091:           PetscCall(VecRestoreArrayRead(matctx->tg,&array));
1092:         }
1093:       }
1094:     }
1095:     break;
1096:   case PEP_REFINE_SCHEME_MBE:
1097:     if (ini) {
1098:       if (matctx->subc) {
1099:         A = matctx->A;
1100:         PetscCall(PetscSubcommGetChild(matctx->subc,&comm));
1101:       } else {
1102:         matctx->V = pep->V;
1103:         A = At;
1104:         PetscCall(PetscObjectGetComm((PetscObject)pep,&comm));
1105:         PetscCall(MatCreateVecs(pep->A[0],&matctx->t,NULL));
1106:       }
1107:       PetscCall(STGetMatStructure(pep->st,&str));
1108:       PetscCall(MatDuplicate(A[0],MAT_COPY_VALUES,&matctx->M1));
1109:       j = (matctx->subc)?matctx->subc->color:0;
1110:       PetscCall(PEPEvaluateBasis(pep,H[j+j*ldh],0,coef,NULL));
1111:       for (j=1;j<nmat;j++) PetscCall(MatAXPY(matctx->M1,coef[j],A[j],str));
1112:       PetscCall(BVDuplicateResize(matctx->V,PetscMax(k,pep->nmat),&matctx->W));
1113:       PetscCall(BVDuplicateResize(matctx->V,k,&matctx->M2));
1114:       PetscCall(BVDuplicate(matctx->M2,&matctx->M3));
1115:       PetscCall(BVDuplicate(matctx->M2,&matctx->Wt));
1116:       PetscCall(PetscMalloc5(k*k,&matctx->M4,k*k,&matctx->w,k*k,&matctx->wt,k,&matctx->d,k,&matctx->dt));
1117:       matctx->compM1 = PETSC_TRUE;
1118:       M = matctx->M1;
1119:       P = M;
1120:     }
1121:     break;
1122:   case PEP_REFINE_SCHEME_SCHUR:
1123:     if (ini) {
1124:       PetscCall(PetscObjectGetComm((PetscObject)pep,&comm));
1125:       PetscCall(MatGetSize(At[0],&m0,&n0));
1126:       PetscCall(PetscMalloc1(1,&ctx));
1127:       PetscCall(STGetMatStructure(pep->st,&str));
1128:       /* Create a shell matrix to solve the linear system */
1129:       ctx->V = pep->V;
1130:       ctx->k = k; ctx->nmat = nmat;
1131:       PetscCall(PetscMalloc5(nmat,&ctx->A,k*k,&ctx->M4,k,&ctx->pM4,2*k*k,&ctx->work,nmat,&ctx->fih));
1132:       for (i=0;i<nmat;i++) ctx->A[i] = At[i];
1133:       PetscCall(PetscArrayzero(ctx->M4,k*k));
1134:       PetscCall(MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,m0,n0,ctx,&M));
1135:       PetscCall(MatShellSetOperation(M,MATOP_MULT,(void(*)(void))MatMult_FS));
1136:       PetscCall(BVDuplicateResize(ctx->V,PetscMax(k,pep->nmat),&ctx->W));
1137:       PetscCall(BVDuplicateResize(ctx->V,k,&ctx->M2));
1138:       PetscCall(BVDuplicate(ctx->M2,&ctx->M3));
1139:       PetscCall(BVCreateVec(pep->V,&ctx->t));
1140:       PetscCall(MatDuplicate(At[0],MAT_COPY_VALUES,&ctx->M1));
1141:       PetscCall(PEPEvaluateBasis(pep,H[0],0,coef,NULL));
1142:       for (j=1;j<nmat;j++) PetscCall(MatAXPY(ctx->M1,coef[j],At[j],str));
1143:       PetscCall(MatDuplicate(At[0],MAT_COPY_VALUES,&P));
1144:       /* Compute a precond matrix for the system */
1145:       t = H[0];
1146:       PetscCall(PEPEvaluateBasis(pep,t,0,coef,NULL));
1147:       for (j=1;j<nmat;j++) PetscCall(MatAXPY(P,coef[j],At[j],str));
1148:       ctx->compM1 = PETSC_TRUE;
1149:     }
1150:     break;
1151:   }
1152:   if (ini) {
1153:     PetscCall(PEPRefineGetKSP(pep,&pep->refineksp));
1154:     PetscCall(KSPSetErrorIfNotConverged(pep->refineksp,PETSC_TRUE));
1155:     PetscCall(PEP_KSPSetOperators(pep->refineksp,M,P));
1156:     PetscCall(KSPSetFromOptions(pep->refineksp));
1157:   }

1159:   if (!ini && matctx && matctx->subc) {
1160:      /* Scatter vectors pep->V */
1161:     for (i=0;i<k;i++) {
1162:       PetscCall(BVGetColumn(pep->V,i,&v));
1163:       PetscCall(VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD));
1164:       PetscCall(VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD));
1165:       PetscCall(BVRestoreColumn(pep->V,i,&v));
1166:       PetscCall(VecGetArrayRead(matctx->tg,&array));
1167:       PetscCall(VecPlaceArray(matctx->t,(const PetscScalar*)array));
1168:       PetscCall(BVInsertVec(matctx->V,i,matctx->t));
1169:       PetscCall(VecResetArray(matctx->t));
1170:       PetscCall(VecRestoreArrayRead(matctx->tg,&array));
1171:     }
1172:    }
1173:   PetscCall(PetscFree(coef));
1174:   if (flg) PetscCall(PetscFree(At));
1175:   PetscFunctionReturn(PETSC_SUCCESS);
1176: }

1178: static PetscErrorCode NRefSubcommSetup(PEP pep,PetscInt k,PEP_REFINE_EXPLICIT *matctx,PetscInt nsubc)
1179: {
1180:   PetscInt          i,si,j,m0,n0,nloc0,nloc_sub,*idx1,*idx2;
1181:   IS                is1,is2;
1182:   BVType            type;
1183:   Vec               v;
1184:   const PetscScalar *array;
1185:   Mat               *A;
1186:   PetscBool         flg;
1187:   MPI_Comm          contpar,child;

1189:   PetscFunctionBegin;
1190:   PetscCall(STGetTransform(pep->st,&flg));
1191:   if (flg) {
1192:     PetscCall(PetscMalloc1(pep->nmat,&A));
1193:     for (i=0;i<pep->nmat;i++) PetscCall(STGetMatrixTransformed(pep->st,i,&A[i]));
1194:   } else A = pep->A;
1195:   PetscCall(PetscSubcommGetChild(matctx->subc,&child));
1196:   PetscCall(PetscSubcommGetContiguousParent(matctx->subc,&contpar));

1198:   /* Duplicate pep matrices */
1199:   PetscCall(PetscMalloc3(pep->nmat,&matctx->A,nsubc,&matctx->scatter_id,nsubc,&matctx->scatterp_id));
1200:   for (i=0;i<pep->nmat;i++) PetscCall(MatCreateRedundantMatrix(A[i],0,child,MAT_INITIAL_MATRIX,&matctx->A[i]));

1202:   /* Create Scatter */
1203:   PetscCall(MatCreateVecs(matctx->A[0],&matctx->t,NULL));
1204:   PetscCall(MatGetLocalSize(matctx->A[0],&nloc_sub,NULL));
1205:   PetscCall(VecCreateMPI(contpar,nloc_sub,PETSC_DECIDE,&matctx->tg));
1206:   PetscCall(BVGetColumn(pep->V,0,&v));
1207:   PetscCall(VecGetOwnershipRange(v,&n0,&m0));
1208:   nloc0 = m0-n0;
1209:   PetscCall(PetscMalloc2(matctx->subc->n*nloc0,&idx1,matctx->subc->n*nloc0,&idx2));
1210:   j = 0;
1211:   for (si=0;si<matctx->subc->n;si++) {
1212:     for (i=n0;i<m0;i++) {
1213:       idx1[j]   = i;
1214:       idx2[j++] = i+pep->n*si;
1215:     }
1216:   }
1217:   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx1,PETSC_COPY_VALUES,&is1));
1218:   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx2,PETSC_COPY_VALUES,&is2));
1219:   PetscCall(VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_sub));
1220:   PetscCall(ISDestroy(&is1));
1221:   PetscCall(ISDestroy(&is2));
1222:   for (si=0;si<matctx->subc->n;si++) {
1223:     j=0;
1224:     for (i=n0;i<m0;i++) {
1225:       idx1[j] = i;
1226:       idx2[j++] = i+pep->n*si;
1227:     }
1228:     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx1,PETSC_COPY_VALUES,&is1));
1229:     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx2,PETSC_COPY_VALUES,&is2));
1230:     PetscCall(VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_id[si]));
1231:     PetscCall(ISDestroy(&is1));
1232:     PetscCall(ISDestroy(&is2));
1233:   }
1234:   PetscCall(BVRestoreColumn(pep->V,0,&v));
1235:   PetscCall(PetscFree2(idx1,idx2));

1237:   /* Duplicate pep->V vecs */
1238:   PetscCall(BVGetType(pep->V,&type));
1239:   PetscCall(BVCreate(child,&matctx->V));
1240:   PetscCall(BVSetType(matctx->V,type));
1241:   PetscCall(BVSetSizesFromVec(matctx->V,matctx->t,k));
1242:   if (pep->scheme==PEP_REFINE_SCHEME_EXPLICIT) PetscCall(BVDuplicateResize(matctx->V,PetscMax(k,pep->nmat),&matctx->W));
1243:   for (i=0;i<k;i++) {
1244:     PetscCall(BVGetColumn(pep->V,i,&v));
1245:     PetscCall(VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD));
1246:     PetscCall(VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD));
1247:     PetscCall(BVRestoreColumn(pep->V,i,&v));
1248:     PetscCall(VecGetArrayRead(matctx->tg,&array));
1249:     PetscCall(VecPlaceArray(matctx->t,(const PetscScalar*)array));
1250:     PetscCall(BVInsertVec(matctx->V,i,matctx->t));
1251:     PetscCall(VecResetArray(matctx->t));
1252:     PetscCall(VecRestoreArrayRead(matctx->tg,&array));
1253:   }

1255:   PetscCall(VecDuplicate(matctx->t,&matctx->Rv));
1256:   PetscCall(VecDuplicate(matctx->t,&matctx->Vi));
1257:   if (flg) PetscCall(PetscFree(A));
1258:   PetscFunctionReturn(PETSC_SUCCESS);
1259: }

1261: static PetscErrorCode NRefSubcommDestroy(PEP pep,PEP_REFINE_EXPLICIT *matctx)
1262: {
1263:   PetscInt       i;

1265:   PetscFunctionBegin;
1266:   PetscCall(VecScatterDestroy(&matctx->scatter_sub));
1267:   for (i=0;i<matctx->subc->n;i++) PetscCall(VecScatterDestroy(&matctx->scatter_id[i]));
1268:   for (i=0;i<pep->nmat;i++) PetscCall(MatDestroy(&matctx->A[i]));
1269:   if (pep->scheme==PEP_REFINE_SCHEME_EXPLICIT) {
1270:     for (i=0;i<matctx->subc->n;i++) PetscCall(VecScatterDestroy(&matctx->scatterp_id[i]));
1271:     PetscCall(VecDestroy(&matctx->tp));
1272:     PetscCall(VecDestroy(&matctx->tpg));
1273:     PetscCall(BVDestroy(&matctx->W));
1274:   }
1275:   PetscCall(PetscFree3(matctx->A,matctx->scatter_id,matctx->scatterp_id));
1276:   PetscCall(BVDestroy(&matctx->V));
1277:   PetscCall(VecDestroy(&matctx->t));
1278:   PetscCall(VecDestroy(&matctx->tg));
1279:   PetscCall(VecDestroy(&matctx->Rv));
1280:   PetscCall(VecDestroy(&matctx->Vi));
1281:   PetscFunctionReturn(PETSC_SUCCESS);
1282: }

1284: PetscErrorCode PEPNewtonRefinement_TOAR(PEP pep,PetscScalar sigma,PetscInt *maxits,PetscReal *tol,PetscInt k,PetscScalar *S,PetscInt lds)
1285: {
1286:   PetscScalar         *H,*work,*dH,*fH,*dVS;
1287:   PetscInt            ldh,i,j,its=1,nmat=pep->nmat,nsubc=pep->npart,rds;
1288:   PetscBLASInt        k_,ld_,*p,info;
1289:   BV                  dV;
1290:   PetscBool           sinvert,flg;
1291:   PEP_REFINE_EXPLICIT *matctx=NULL;
1292:   Vec                 v;
1293:   Mat                 M,P;
1294:   PEP_REFINE_MATSHELL *ctx;

1296:   PetscFunctionBegin;
1297:   PetscCall(PetscLogEventBegin(PEP_Refine,pep,0,0,0));
1298:   PetscCheck(k<=pep->n,PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Multiple Refinement available only for invariant pairs of dimension smaller than n=%" PetscInt_FMT,pep->n);
1299:   /* the input tolerance is not being taken into account (by the moment) */
1300:   its = *maxits;
1301:   PetscCall(PetscMalloc3(k*k,&dH,nmat*k*k,&fH,k,&work));
1302:   PetscCall(DSGetLeadingDimension(pep->ds,&ldh));
1303:   PetscCall(PetscMalloc1(2*k*k,&dVS));
1304:   PetscCall(STGetTransform(pep->st,&flg));
1305:   if (!flg && pep->st && pep->ops->backtransform) { /* BackTransform */
1306:     PetscCall(PetscBLASIntCast(k,&k_));
1307:     PetscCall(PetscBLASIntCast(ldh,&ld_));
1308:     PetscCall(PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinvert));
1309:     if (sinvert) {
1310:       PetscCall(DSGetArray(pep->ds,DS_MAT_A,&H));
1311:       PetscCall(PetscMalloc1(k,&p));
1312:       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1313:       PetscCallBLAS("LAPACKgetrf",LAPACKgetrf_(&k_,&k_,H,&ld_,p,&info));
1314:       SlepcCheckLapackInfo("getrf",info);
1315:       PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&k_,H,&ld_,p,work,&k_,&info));
1316:       SlepcCheckLapackInfo("getri",info);
1317:       PetscCall(PetscFPTrapPop());
1318:       PetscCall(DSRestoreArray(pep->ds,DS_MAT_A,&H));
1319:       pep->ops->backtransform = NULL;
1320:     }
1321:     if (sigma!=0.0) {
1322:       PetscCall(DSGetArray(pep->ds,DS_MAT_A,&H));
1323:       for (i=0;i<k;i++) H[i+ldh*i] += sigma;
1324:       PetscCall(DSRestoreArray(pep->ds,DS_MAT_A,&H));
1325:       pep->ops->backtransform = NULL;
1326:     }
1327:   }
1328:   if ((pep->scale==PEP_SCALE_BOTH || pep->scale==PEP_SCALE_SCALAR) && pep->sfactor!=1.0) {
1329:     PetscCall(DSGetArray(pep->ds,DS_MAT_A,&H));
1330:     for (j=0;j<k;j++) {
1331:       for (i=0;i<k;i++) H[i+j*ldh] *= pep->sfactor;
1332:     }
1333:     PetscCall(DSRestoreArray(pep->ds,DS_MAT_A,&H));
1334:     if (!flg) {
1335:       /* Restore original values */
1336:       for (i=0;i<pep->nmat;i++) {
1337:         pep->pbc[pep->nmat+i] *= pep->sfactor;
1338:         pep->pbc[2*pep->nmat+i] *= pep->sfactor*pep->sfactor;
1339:       }
1340:     }
1341:   }
1342:   if ((pep->scale==PEP_SCALE_DIAGONAL || pep->scale==PEP_SCALE_BOTH) && pep->Dr) {
1343:     for (i=0;i<k;i++) {
1344:       PetscCall(BVGetColumn(pep->V,i,&v));
1345:       PetscCall(VecPointwiseMult(v,v,pep->Dr));
1346:       PetscCall(BVRestoreColumn(pep->V,i,&v));
1347:     }
1348:   }
1349:   PetscCall(DSGetArray(pep->ds,DS_MAT_A,&H));

1351:   PetscCall(NRefOrthogStep(pep,k,H,ldh,fH,S,lds));
1352:   /* check if H is in Schur form */
1353:   for (i=0;i<k-1;i++) {
1354: #if !defined(PETSC_USE_COMPLEX)
1355:     PetscCheck(H[i+1+i*ldh]==0.0,PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Iterative Refinement requires the complex Schur form of the projected matrix");
1356: #else
1357:     PetscCheck(H[i+1+i*ldh]==0.0,PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Iterative Refinement requires an upper triangular projected matrix");
1358: #endif
1359:   }
1360:   PetscCheck(nsubc<=k,PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Number of subcommunicators should not be larger than the invariant pair dimension");
1361:   PetscCall(BVSetActiveColumns(pep->V,0,k));
1362:   PetscCall(BVDuplicateResize(pep->V,k,&dV));
1363:   if (pep->scheme!=PEP_REFINE_SCHEME_SCHUR) {
1364:     PetscCall(PetscMalloc1(1,&matctx));
1365:     if (nsubc>1) { /* splitting in subcommunicators */
1366:       matctx->subc = pep->refinesubc;
1367:       PetscCall(NRefSubcommSetup(pep,k,matctx,nsubc));
1368:     } else matctx->subc=NULL;
1369:   }

1371:   /* Loop performing iterative refinements */
1372:   for (i=0;i<its;i++) {
1373:     /* Pre-compute the polynomial basis evaluated in H */
1374:     PetscCall(PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH));
1375:     PetscCall(PEPNRefSetUp(pep,k,H,ldh,matctx,PetscNot(i)));
1376:     /* Solve the linear system */
1377:     PetscCall(PEPNRefForwardSubstitution(pep,k,S,lds,H,ldh,fH,dV,dVS,&rds,dH,k,pep->refineksp,matctx));
1378:     /* Update X (=V*S) and H, and orthogonalize [X;X*fH1;...;XfH(deg-1)] */
1379:     PetscCall(PEPNRefUpdateInvPair(pep,k,H,ldh,fH,dH,S,lds,dV,dVS,rds));
1380:   }
1381:   PetscCall(DSRestoreArray(pep->ds,DS_MAT_A,&H));
1382:   if (!flg && sinvert) PetscCall(PetscFree(p));
1383:   PetscCall(PetscFree3(dH,fH,work));
1384:   PetscCall(PetscFree(dVS));
1385:   PetscCall(BVDestroy(&dV));
1386:   switch (pep->scheme) {
1387:   case PEP_REFINE_SCHEME_EXPLICIT:
1388:     for (i=0;i<2;i++) PetscCall(MatDestroy(&matctx->E[i]));
1389:     PetscCall(PetscFree4(matctx->idxp,matctx->idxg,matctx->map0,matctx->map1));
1390:     PetscCall(VecDestroy(&matctx->tN));
1391:     PetscCall(VecDestroy(&matctx->ttN));
1392:     PetscCall(VecDestroy(&matctx->t1));
1393:     if (nsubc>1) PetscCall(NRefSubcommDestroy(pep,matctx));
1394:     else {
1395:       PetscCall(VecDestroy(&matctx->vseq));
1396:       PetscCall(VecScatterDestroy(&matctx->scatterctx));
1397:     }
1398:     PetscCall(PetscFree(matctx));
1399:     PetscCall(KSPGetOperators(pep->refineksp,&M,NULL));
1400:     PetscCall(MatDestroy(&M));
1401:     break;
1402:   case PEP_REFINE_SCHEME_MBE:
1403:     PetscCall(BVDestroy(&matctx->W));
1404:     PetscCall(BVDestroy(&matctx->Wt));
1405:     PetscCall(BVDestroy(&matctx->M2));
1406:     PetscCall(BVDestroy(&matctx->M3));
1407:     PetscCall(MatDestroy(&matctx->M1));
1408:     PetscCall(VecDestroy(&matctx->t));
1409:     PetscCall(PetscFree5(matctx->M4,matctx->w,matctx->wt,matctx->d,matctx->dt));
1410:     if (nsubc>1) PetscCall(NRefSubcommDestroy(pep,matctx));
1411:     PetscCall(PetscFree(matctx));
1412:     break;
1413:   case PEP_REFINE_SCHEME_SCHUR:
1414:     PetscCall(KSPGetOperators(pep->refineksp,&M,&P));
1415:     PetscCall(MatShellGetContext(M,&ctx));
1416:     PetscCall(PetscFree5(ctx->A,ctx->M4,ctx->pM4,ctx->work,ctx->fih));
1417:     PetscCall(MatDestroy(&ctx->M1));
1418:     PetscCall(BVDestroy(&ctx->M2));
1419:     PetscCall(BVDestroy(&ctx->M3));
1420:     PetscCall(BVDestroy(&ctx->W));
1421:     PetscCall(VecDestroy(&ctx->t));
1422:     PetscCall(PetscFree(ctx));
1423:     PetscCall(MatDestroy(&M));
1424:     PetscCall(MatDestroy(&P));
1425:     break;
1426:   }
1427:   PetscCall(PetscLogEventEnd(PEP_Refine,pep,0,0,0));
1428:   PetscFunctionReturn(PETSC_SUCCESS);
1429: }