Actual source code: nepdefl.c

slepc-3.21.1 2024-04-26
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */

 11: #include <slepc/private/nepimpl.h>
 12: #include <slepcblaslapack.h>
 13: #include "nepdefl.h"

 15: PetscErrorCode NEPDeflationGetInvariantPair(NEP_EXT_OP extop,BV *X,Mat *H)
 16: {
 17:   PetscFunctionBegin;
 18:   if (X) *X = extop->X;
 19:   if (H) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,extop->szd+1,extop->szd+1,extop->H,H));
 20:   PetscFunctionReturn(PETSC_SUCCESS);
 21: }

 23: static PetscErrorCode NEPDeflationExtendInvariantPair(NEP_EXT_OP extop,Vec u,PetscScalar lambda,PetscInt k)
 24: {
 25:   Vec            uu;
 26:   PetscInt       ld,i;
 27:   PetscMPIInt    np;
 28:   PetscReal      norm;

 30:   PetscFunctionBegin;
 31:   PetscCall(BVGetColumn(extop->X,k,&uu));
 32:   ld = extop->szd+1;
 33:   PetscCall(NEPDeflationCopyToExtendedVec(extop,uu,extop->H+k*ld,u,PETSC_TRUE));
 34:   PetscCall(BVRestoreColumn(extop->X,k,&uu));
 35:   PetscCall(BVNormColumn(extop->X,k,NORM_2,&norm));
 36:   PetscCall(BVScaleColumn(extop->X,k,1.0/norm));
 37:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)u),&np));
 38:   for (i=0;i<k;i++) extop->H[k*ld+i] *= PetscSqrtReal(np)/norm;
 39:   extop->H[k*(ld+1)] = lambda;
 40:   PetscFunctionReturn(PETSC_SUCCESS);
 41: }

 43: PetscErrorCode NEPDeflationExtractEigenpair(NEP_EXT_OP extop,PetscInt k,Vec u,PetscScalar lambda,DS ds)
 44: {
 45:   Mat            A,H;
 46:   PetscInt       ldh=extop->szd+1,ldds,k1=k+1;
 47:   PetscScalar    *eigr,*eigi,*t,*Z;
 48:   Vec            x;

 50:   PetscFunctionBegin;
 51:   PetscCall(NEPDeflationExtendInvariantPair(extop,u,lambda,k));
 52:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,k1,k1,extop->H,&H));
 53:   PetscCall(MatDenseSetLDA(H,ldh));
 54:   PetscCall(PetscCalloc3(k1,&eigr,k1,&eigi,extop->szd,&t));
 55:   PetscCall(DSReset(ds));
 56:   PetscCall(DSSetType(ds,DSNHEP));
 57:   PetscCall(DSAllocate(ds,ldh));
 58:   PetscCall(DSGetLeadingDimension(ds,&ldds));
 59:   PetscCall(DSSetDimensions(ds,k1,0,k1));
 60:   PetscCall(DSGetMat(ds,DS_MAT_A,&A));
 61:   PetscCall(MatCopy(H,A,SAME_NONZERO_PATTERN));
 62:   PetscCall(DSRestoreMat(ds,DS_MAT_A,&A));
 63:   PetscCall(MatDestroy(&H));
 64:   PetscCall(DSSolve(ds,eigr,eigi));
 65:   PetscCall(DSVectors(ds,DS_MAT_X,&k,NULL));
 66:   PetscCall(DSGetArray(ds,DS_MAT_X,&Z));
 67:   PetscCall(BVMultColumn(extop->X,1.0,Z[k*ldds+k],k,Z+k*ldds));
 68:   PetscCall(DSRestoreArray(ds,DS_MAT_X,&Z));
 69:   PetscCall(BVGetColumn(extop->X,k,&x));
 70:   PetscCall(NEPDeflationCopyToExtendedVec(extop,x,t,u,PETSC_FALSE));
 71:   PetscCall(BVRestoreColumn(extop->X,k,&x));
 72:   PetscCall(PetscFree3(eigr,eigi,t));
 73:   PetscFunctionReturn(PETSC_SUCCESS);
 74: }

 76: PetscErrorCode NEPDeflationCopyToExtendedVec(NEP_EXT_OP extop,Vec v,PetscScalar *a,Vec vex,PetscBool back)
 77: {
 78:   PetscMPIInt    np,rk,count;
 79:   PetscScalar    *array1,*array2;
 80:   PetscInt       nloc;

 82:   PetscFunctionBegin;
 83:   if (extop->szd) {
 84:     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)vex),&rk));
 85:     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)vex),&np));
 86:     PetscCall(BVGetSizes(extop->nep->V,&nloc,NULL,NULL));
 87:     if (v) {
 88:       PetscCall(VecGetArray(v,&array1));
 89:       PetscCall(VecGetArray(vex,&array2));
 90:       if (back) PetscCall(PetscArraycpy(array1,array2,nloc));
 91:       else PetscCall(PetscArraycpy(array2,array1,nloc));
 92:       PetscCall(VecRestoreArray(v,&array1));
 93:       PetscCall(VecRestoreArray(vex,&array2));
 94:     }
 95:     if (a) {
 96:       PetscCall(VecGetArray(vex,&array2));
 97:       if (back) {
 98:         PetscCall(PetscArraycpy(a,array2+nloc,extop->szd));
 99:         PetscCall(PetscMPIIntCast(extop->szd,&count));
100:         PetscCallMPI(MPI_Bcast(a,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)vex)));
101:       } else {
102:         PetscCall(PetscArraycpy(array2+nloc,a,extop->szd));
103:         PetscCall(PetscMPIIntCast(extop->szd,&count));
104:         PetscCallMPI(MPI_Bcast(array2+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)vex)));
105:       }
106:       PetscCall(VecRestoreArray(vex,&array2));
107:     }
108:   } else {
109:     if (back) PetscCall(VecCopy(vex,v));
110:     else PetscCall(VecCopy(v,vex));
111:   }
112:   PetscFunctionReturn(PETSC_SUCCESS);
113: }

115: PetscErrorCode NEPDeflationCreateVec(NEP_EXT_OP extop,Vec *v)
116: {
117:   PetscInt       nloc;
118:   Vec            u;
119:   VecType        type;

121:   PetscFunctionBegin;
122:   if (extop->szd) {
123:     PetscCall(BVGetColumn(extop->nep->V,0,&u));
124:     PetscCall(VecGetType(u,&type));
125:     PetscCall(BVRestoreColumn(extop->nep->V,0,&u));
126:     PetscCall(VecCreate(PetscObjectComm((PetscObject)extop->nep),v));
127:     PetscCall(VecSetType(*v,type));
128:     PetscCall(BVGetSizes(extop->nep->V,&nloc,NULL,NULL));
129:     nloc += extop->szd;
130:     PetscCall(VecSetSizes(*v,nloc,PETSC_DECIDE));
131:   } else PetscCall(BVCreateVec(extop->nep->V,v));
132:   PetscFunctionReturn(PETSC_SUCCESS);
133: }

135: PetscErrorCode NEPDeflationCreateBV(NEP_EXT_OP extop,PetscInt sz,BV *V)
136: {
137:   PetscInt           nloc;
138:   BVType             type;
139:   BVOrthogType       otype;
140:   BVOrthogRefineType oref;
141:   PetscReal          oeta;
142:   BVOrthogBlockType  oblock;
143:   NEP                nep=extop->nep;
144:   VecType            vtype;

146:   PetscFunctionBegin;
147:   if (extop->szd) {
148:     PetscCall(BVGetSizes(nep->V,&nloc,NULL,NULL));
149:     PetscCall(BVCreate(PetscObjectComm((PetscObject)nep),V));
150:     PetscCall(BVSetSizes(*V,nloc+extop->szd,PETSC_DECIDE,sz));
151:     PetscCall(BVGetType(nep->V,&type));
152:     PetscCall(BVSetType(*V,type));
153:     PetscCall(BVGetVecType(nep->V,&vtype));
154:     PetscCall(BVSetVecType(*V,vtype));
155:     PetscCall(BVGetOrthogonalization(nep->V,&otype,&oref,&oeta,&oblock));
156:     PetscCall(BVSetOrthogonalization(*V,otype,oref,oeta,oblock));
157:     PetscCall(PetscObjectStateIncrease((PetscObject)*V));
158:   } else PetscCall(BVDuplicateResize(nep->V,sz,V));
159:   PetscFunctionReturn(PETSC_SUCCESS);
160: }

162: PetscErrorCode NEPDeflationSetRandomVec(NEP_EXT_OP extop,Vec v)
163: {
164:   PetscInt       n,next,i;
165:   PetscRandom    rand;
166:   PetscScalar    *array;
167:   PetscMPIInt    nn,np;

169:   PetscFunctionBegin;
170:   PetscCall(BVGetRandomContext(extop->nep->V,&rand));
171:   PetscCall(VecSetRandom(v,rand));
172:   if (extop->szd) {
173:     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)v),&np));
174:     PetscCall(BVGetSizes(extop->nep->V,&n,NULL,NULL));
175:     PetscCall(VecGetLocalSize(v,&next));
176:     PetscCall(VecGetArray(v,&array));
177:     for (i=n+extop->n;i<next;i++) array[i] = 0.0;
178:     for (i=n;i<n+extop->n;i++) array[i] /= PetscSqrtReal(np);
179:     PetscCall(PetscMPIIntCast(extop->n,&nn));
180:     PetscCallMPI(MPI_Bcast(array+n,nn,MPIU_SCALAR,0,PetscObjectComm((PetscObject)v)));
181:     PetscCall(VecRestoreArray(v,&array));
182:   }
183:   PetscFunctionReturn(PETSC_SUCCESS);
184: }

186: static PetscErrorCode NEPDeflationEvaluateBasisMat(NEP_EXT_OP extop,PetscInt idx,PetscBool hat,PetscScalar *bval,PetscScalar *Hj,PetscScalar *Hjprev)
187: {
188:   PetscInt       i,k,n=extop->n,ldhj=extop->szd,ldh=extop->szd+1;
189:   PetscScalar    sone=1.0,zero=0.0;
190:   PetscBLASInt   ldh_,ldhj_,n_;

192:   PetscFunctionBegin;
193:   i = (idx<0)?extop->szd*extop->szd*(-idx):extop->szd*extop->szd;
194:   PetscCall(PetscArrayzero(Hj,i));
195:   PetscCall(PetscBLASIntCast(ldhj+1,&ldh_));
196:   PetscCall(PetscBLASIntCast(ldhj,&ldhj_));
197:   PetscCall(PetscBLASIntCast(n,&n_));
198:   if (idx<1) {
199:     if (!hat) for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 1.0;
200:     else for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 0.0;
201:   } else {
202:       for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[idx-1];
203:       PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hjprev,&ldhj_,&zero,Hj,&ldhj_));
204:       for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[idx-1];
205:       if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[idx-1];
206:   }
207:   if (idx<0) {
208:     idx = -idx;
209:     for (k=1;k<idx;k++) {
210:       for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[k-1];
211:       PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hj+(k-1)*ldhj*ldhj,&ldhj_,&zero,Hj+k*ldhj*ldhj,&ldhj_));
212:       for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[k-1];
213:       if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[k-1];
214:     }
215:   }
216:   PetscFunctionReturn(PETSC_SUCCESS);
217: }

219: PetscErrorCode NEPDeflationLocking(NEP_EXT_OP extop,Vec u,PetscScalar lambda)
220: {
221:   PetscInt       i;

223:   PetscFunctionBegin;
224:   PetscCall(NEPDeflationExtendInvariantPair(extop,u,lambda,extop->n));
225:   extop->n++;
226:   PetscCall(BVSetActiveColumns(extop->X,0,extop->n));
227:   if (extop->n <= extop->szd) {
228:     /* update XpX */
229:     PetscCall(BVDotColumn(extop->X,extop->n-1,extop->XpX+(extop->n-1)*extop->szd));
230:     extop->XpX[(extop->n-1)*(1+extop->szd)] = 1.0;
231:     for (i=0;i<extop->n-1;i++) extop->XpX[i*extop->szd+extop->n-1] = PetscConj(extop->XpX[(extop->n-1)*extop->szd+i]);
232:     /* determine minimality index */
233:     extop->midx = PetscMin(extop->max_midx,extop->n);
234:     /* polynominal basis coefficients */
235:     for (i=0;i<extop->midx;i++) extop->bc[i] = extop->nep->target;
236:     /* evaluate the polynomial basis in H */
237:     PetscCall(NEPDeflationEvaluateBasisMat(extop,-extop->midx,PETSC_FALSE,NULL,extop->Hj,NULL));
238:   }
239:   PetscFunctionReturn(PETSC_SUCCESS);
240: }

242: static PetscErrorCode NEPDeflationEvaluateHatFunction(NEP_EXT_OP extop, PetscInt idx,PetscScalar lambda,PetscScalar *y,PetscScalar *hfj,PetscScalar *hfjp,PetscInt ld)
243: {
244:   PetscInt          i,j,k,off,ini,fin,sz,ldh,n=extop->n;
245:   Mat               A,B;
246:   PetscScalar       *array;
247:   const PetscScalar *barray;

249:   PetscFunctionBegin;
250:   if (idx<0) {ini = 0; fin = extop->nep->nt;}
251:   else {ini = idx; fin = idx+1;}
252:   if (y) sz = hfjp?n+2:n+1;
253:   else sz = hfjp?3*n:2*n;
254:   ldh = extop->szd+1;
255:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&A));
256:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&B));
257:   PetscCall(MatDenseGetArray(A,&array));
258:   for (j=0;j<n;j++)
259:     for (i=0;i<n;i++) array[j*sz+i] = extop->H[j*ldh+i];
260:   PetscCall(MatDenseRestoreArrayWrite(A,&array));
261:   if (y) {
262:     PetscCall(MatDenseGetArray(A,&array));
263:     array[extop->n*(sz+1)] = lambda;
264:     if (hfjp) { array[(n+1)*sz+n] = 1.0; array[(n+1)*sz+n+1] = lambda;}
265:     for (i=0;i<n;i++) array[n*sz+i] = y[i];
266:     PetscCall(MatDenseRestoreArrayWrite(A,&array));
267:     for (j=ini;j<fin;j++) {
268:       PetscCall(FNEvaluateFunctionMat(extop->nep->f[j],A,B));
269:       PetscCall(MatDenseGetArrayRead(B,&barray));
270:       for (i=0;i<n;i++) hfj[j*ld+i] = barray[n*sz+i];
271:       if (hfjp) for (i=0;i<n;i++) hfjp[j*ld+i] = barray[(n+1)*sz+i];
272:       PetscCall(MatDenseRestoreArrayRead(B,&barray));
273:     }
274:   } else {
275:     off = idx<0?ld*n:0;
276:     PetscCall(MatDenseGetArray(A,&array));
277:     for (k=0;k<n;k++) {
278:       array[(n+k)*sz+k] = 1.0;
279:       array[(n+k)*sz+n+k] = lambda;
280:     }
281:     if (hfjp) for (k=0;k<n;k++) {
282:       array[(2*n+k)*sz+n+k] = 1.0;
283:       array[(2*n+k)*sz+2*n+k] = lambda;
284:     }
285:     PetscCall(MatDenseRestoreArray(A,&array));
286:     for (j=ini;j<fin;j++) {
287:       PetscCall(FNEvaluateFunctionMat(extop->nep->f[j],A,B));
288:       PetscCall(MatDenseGetArrayRead(B,&barray));
289:       for (i=0;i<n;i++) for (k=0;k<n;k++) hfj[j*off+i*ld+k] = barray[n*sz+i*sz+k];
290:       if (hfjp) for (k=0;k<n;k++) for (i=0;i<n;i++) hfjp[j*off+i*ld+k] = barray[2*n*sz+i*sz+k];
291:       PetscCall(MatDenseRestoreArrayRead(B,&barray));
292:     }
293:   }
294:   PetscCall(MatDestroy(&A));
295:   PetscCall(MatDestroy(&B));
296:   PetscFunctionReturn(PETSC_SUCCESS);
297: }

299: static PetscErrorCode MatMult_NEPDeflation(Mat M,Vec x,Vec y)
300: {
301:   NEP_DEF_MATSHELL  *matctx;
302:   NEP_EXT_OP        extop;
303:   Vec               x1,y1;
304:   PetscScalar       *yy,sone=1.0,zero=0.0;
305:   const PetscScalar *xx;
306:   PetscInt          nloc,i;
307:   PetscMPIInt       np;
308:   PetscBLASInt      n_,one=1,szd_;

310:   PetscFunctionBegin;
311:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M),&np));
312:   PetscCall(MatShellGetContext(M,&matctx));
313:   extop = matctx->extop;
314:   if (extop->ref) PetscCall(VecZeroEntries(y));
315:   if (extop->szd) {
316:     x1 = matctx->w[0]; y1 = matctx->w[1];
317:     PetscCall(VecGetArrayRead(x,&xx));
318:     PetscCall(VecPlaceArray(x1,xx));
319:     PetscCall(VecGetArray(y,&yy));
320:     PetscCall(VecPlaceArray(y1,yy));
321:     PetscCall(MatMult(matctx->T,x1,y1));
322:     if (!extop->ref && extop->n) {
323:       PetscCall(VecGetLocalSize(x1,&nloc));
324:       /* copy for avoiding warning of constant array xx */
325:       for (i=0;i<extop->n;i++) matctx->work[i] = xx[nloc+i]*PetscSqrtReal(np);
326:       PetscCall(BVMultVec(matctx->U,1.0,1.0,y1,matctx->work));
327:       PetscCall(BVDotVec(extop->X,x1,matctx->work));
328:       PetscCall(PetscBLASIntCast(extop->n,&n_));
329:       PetscCall(PetscBLASIntCast(extop->szd,&szd_));
330:       PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->A,&szd_,matctx->work,&one,&zero,yy+nloc,&one));
331:       PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->B,&szd_,xx+nloc,&one,&sone,yy+nloc,&one));
332:       for (i=0;i<extop->n;i++) yy[nloc+i] /= PetscSqrtReal(np);
333:     }
334:     PetscCall(VecResetArray(x1));
335:     PetscCall(VecRestoreArrayRead(x,&xx));
336:     PetscCall(VecResetArray(y1));
337:     PetscCall(VecRestoreArray(y,&yy));
338:   } else PetscCall(MatMult(matctx->T,x,y));
339:   PetscFunctionReturn(PETSC_SUCCESS);
340: }

342: static PetscErrorCode MatCreateVecs_NEPDeflation(Mat M,Vec *right,Vec *left)
343: {
344:   NEP_DEF_MATSHELL *matctx;

346:   PetscFunctionBegin;
347:   PetscCall(MatShellGetContext(M,&matctx));
348:   if (right) PetscCall(VecDuplicate(matctx->w[0],right));
349:   if (left) PetscCall(VecDuplicate(matctx->w[0],left));
350:   PetscFunctionReturn(PETSC_SUCCESS);
351: }

353: static PetscErrorCode MatDestroy_NEPDeflation(Mat M)
354: {
355:   NEP_DEF_MATSHELL *matctx;

357:   PetscFunctionBegin;
358:   PetscCall(MatShellGetContext(M,&matctx));
359:   if (matctx->extop->szd) {
360:     PetscCall(BVDestroy(&matctx->U));
361:     PetscCall(PetscFree2(matctx->hfj,matctx->work));
362:     PetscCall(PetscFree2(matctx->A,matctx->B));
363:     PetscCall(VecDestroy(&matctx->w[0]));
364:     PetscCall(VecDestroy(&matctx->w[1]));
365:   }
366:   if (matctx->P != matctx->T) PetscCall(MatDestroy(&matctx->P));
367:   PetscCall(MatDestroy(&matctx->T));
368:   PetscCall(PetscFree(matctx));
369:   PetscFunctionReturn(PETSC_SUCCESS);
370: }

372: static PetscErrorCode NEPDeflationEvaluateBasis(NEP_EXT_OP extop,PetscScalar lambda,PetscInt n,PetscScalar *val,PetscBool jacobian)
373: {
374:   PetscScalar p;
375:   PetscInt    i;

377:   PetscFunctionBegin;
378:   if (!jacobian) {
379:     val[0] = 1.0;
380:     for (i=1;i<extop->n;i++) val[i] = val[i-1]*(lambda-extop->bc[i-1]);
381:   } else {
382:     val[0] = 0.0;
383:     p = 1.0;
384:     for (i=1;i<extop->n;i++) {
385:       val[i] = val[i-1]*(lambda-extop->bc[i-1])+p;
386:       p *= (lambda-extop->bc[i-1]);
387:     }
388:   }
389:   PetscFunctionReturn(PETSC_SUCCESS);
390: }

392: static PetscErrorCode NEPDeflationComputeShellMat(NEP_EXT_OP extop,PetscScalar lambda,PetscBool jacobian,Mat *M)
393: {
394:   NEP_DEF_MATSHELL *matctx,*matctxC;
395:   PetscInt         nloc,mloc,n=extop->n,j,i,szd=extop->szd,ldh=szd+1,k;
396:   Mat              F,Mshell,Mcomp;
397:   PetscBool        ini=PETSC_FALSE;
398:   PetscScalar      *hf,*hfj,*hfjp,sone=1.0,*hH,*hHprev,*pts,*B,*A,*Hj=extop->Hj,*basisv,zero=0.0;
399:   PetscBLASInt     n_,info,szd_;

401:   PetscFunctionBegin;
402:   if (!M) Mshell = jacobian?extop->MJ:extop->MF;
403:   else Mshell = *M;
404:   Mcomp = jacobian?extop->MF:extop->MJ;
405:   if (!Mshell) {
406:     ini = PETSC_TRUE;
407:     PetscCall(PetscNew(&matctx));
408:     PetscCall(MatGetLocalSize(extop->nep->function,&mloc,&nloc));
409:     nloc += szd; mloc += szd;
410:     PetscCall(MatCreateShell(PetscObjectComm((PetscObject)extop->nep),nloc,mloc,PETSC_DETERMINE,PETSC_DETERMINE,matctx,&Mshell));
411:     PetscCall(MatShellSetOperation(Mshell,MATOP_MULT,(void(*)(void))MatMult_NEPDeflation));
412:     PetscCall(MatShellSetOperation(Mshell,MATOP_CREATE_VECS,(void(*)(void))MatCreateVecs_NEPDeflation));
413:     PetscCall(MatShellSetOperation(Mshell,MATOP_DESTROY,(void(*)(void))MatDestroy_NEPDeflation));
414:     matctx->nep = extop->nep;
415:     matctx->extop = extop;
416:     if (!M) {
417:       if (jacobian) { matctx->jacob = PETSC_TRUE; matctx->T = extop->nep->jacobian; extop->MJ = Mshell; }
418:       else { matctx->jacob = PETSC_FALSE; matctx->T = extop->nep->function; extop->MF = Mshell; }
419:       PetscCall(PetscObjectReference((PetscObject)matctx->T));
420:       if (!jacobian) {
421:         if (extop->nep->function_pre && extop->nep->function_pre != extop->nep->function) {
422:           matctx->P = extop->nep->function_pre;
423:           PetscCall(PetscObjectReference((PetscObject)matctx->P));
424:         } else matctx->P = matctx->T;
425:       }
426:     } else {
427:       matctx->jacob = jacobian;
428:       PetscCall(MatDuplicate(jacobian?extop->nep->jacobian:extop->nep->function,MAT_DO_NOT_COPY_VALUES,&matctx->T));
429:       *M = Mshell;
430:       if (!jacobian) {
431:         if (extop->nep->function_pre && extop->nep->function_pre != extop->nep->function) PetscCall(MatDuplicate(extop->nep->function_pre,MAT_DO_NOT_COPY_VALUES,&matctx->P));
432:         else matctx->P = matctx->T;
433:       }
434:     }
435:     if (szd) {
436:       PetscCall(BVCreateVec(extop->nep->V,matctx->w));
437:       PetscCall(VecDuplicate(matctx->w[0],matctx->w+1));
438:       PetscCall(BVDuplicateResize(extop->nep->V,szd,&matctx->U));
439:       PetscCall(PetscMalloc2(extop->simpU?2*(szd)*(szd):2*(szd)*(szd)*extop->nep->nt,&matctx->hfj,szd,&matctx->work));
440:       PetscCall(PetscMalloc2(szd*szd,&matctx->A,szd*szd,&matctx->B));
441:     }
442:   } else PetscCall(MatShellGetContext(Mshell,&matctx));
443:   if (ini || matctx->theta != lambda || matctx->n != extop->n) {
444:     if (ini || matctx->theta != lambda) {
445:       if (jacobian) PetscCall(NEPComputeJacobian(extop->nep,lambda,matctx->T));
446:       else PetscCall(NEPComputeFunction(extop->nep,lambda,matctx->T,matctx->P));
447:     }
448:     if (n) {
449:       matctx->hfjset = PETSC_FALSE;
450:       if (!extop->simpU) {
451:         /* likely hfjp has been already computed */
452:         if (Mcomp) {
453:           PetscCall(MatShellGetContext(Mcomp,&matctxC));
454:           if (matctxC->hfjset && matctxC->theta == lambda && matctxC->n == extop->n) {
455:             PetscCall(PetscArraycpy(matctx->hfj,matctxC->hfj,2*extop->szd*extop->szd*extop->nep->nt));
456:             matctx->hfjset = PETSC_TRUE;
457:           }
458:         }
459:         hfj = matctx->hfj; hfjp = matctx->hfj+extop->szd*extop->szd*extop->nep->nt;
460:         if (!matctx->hfjset) {
461:           PetscCall(NEPDeflationEvaluateHatFunction(extop,-1,lambda,NULL,hfj,hfjp,n));
462:           matctx->hfjset = PETSC_TRUE;
463:         }
464:         PetscCall(BVSetActiveColumns(matctx->U,0,n));
465:         hf = jacobian?hfjp:hfj;
466:         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n,n,hf,&F));
467:         PetscCall(BVMatMult(extop->X,extop->nep->A[0],matctx->U));
468:         PetscCall(BVMultInPlace(matctx->U,F,0,n));
469:         PetscCall(BVSetActiveColumns(extop->W,0,extop->n));
470:         for (j=1;j<extop->nep->nt;j++) {
471:           PetscCall(BVMatMult(extop->X,extop->nep->A[j],extop->W));
472:           PetscCall(MatDensePlaceArray(F,hf+j*n*n));
473:           PetscCall(BVMult(matctx->U,1.0,1.0,extop->W,F));
474:           PetscCall(MatDenseResetArray(F));
475:         }
476:         PetscCall(MatDestroy(&F));
477:       } else {
478:         hfj = matctx->hfj;
479:         PetscCall(BVSetActiveColumns(matctx->U,0,n));
480:         PetscCall(BVMatMult(extop->X,matctx->T,matctx->U));
481:         for (j=0;j<n;j++) {
482:           for (i=0;i<n;i++) hfj[j*n+i] = -extop->H[j*ldh+i];
483:           hfj[j*(n+1)] += lambda;
484:         }
485:         PetscCall(PetscBLASIntCast(n,&n_));
486:         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
487:         PetscCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,hfj,&n_,&info));
488:         PetscCall(PetscFPTrapPop());
489:         SlepcCheckLapackInfo("trtri",info);
490:         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n,n,hfj,&F));
491:         PetscCall(BVMultInPlace(matctx->U,F,0,n));
492:         if (jacobian) {
493:           PetscCall(NEPDeflationComputeFunction(extop,lambda,NULL));
494:           PetscCall(MatShellGetContext(extop->MF,&matctxC));
495:           PetscCall(BVMult(matctx->U,-1.0,1.0,matctxC->U,F));
496:         }
497:         PetscCall(MatDestroy(&F));
498:       }
499:       PetscCall(PetscCalloc3(n,&basisv,szd*szd,&hH,szd*szd,&hHprev));
500:       PetscCall(NEPDeflationEvaluateBasis(extop,lambda,n,basisv,jacobian));
501:       A = matctx->A;
502:       PetscCall(PetscArrayzero(A,szd*szd));
503:       if (!jacobian) for (i=0;i<n;i++) A[i*(szd+1)] = 1.0;
504:       for (j=0;j<n;j++)
505:         for (i=0;i<n;i++)
506:           for (k=1;k<extop->midx;k++) A[j*szd+i] += basisv[k]*PetscConj(Hj[k*szd*szd+i*szd+j]);
507:       PetscCall(PetscBLASIntCast(n,&n_));
508:       PetscCall(PetscBLASIntCast(szd,&szd_));
509:       B = matctx->B;
510:       PetscCall(PetscArrayzero(B,szd*szd));
511:       for (i=1;i<extop->midx;i++) {
512:         PetscCall(NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev));
513:         PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
514:         PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,B,&szd_));
515:         pts = hHprev; hHprev = hH; hH = pts;
516:       }
517:       PetscCall(PetscFree3(basisv,hH,hHprev));
518:     }
519:     matctx->theta = lambda;
520:     matctx->n = extop->n;
521:   }
522:   PetscFunctionReturn(PETSC_SUCCESS);
523: }

525: PetscErrorCode NEPDeflationComputeFunction(NEP_EXT_OP extop,PetscScalar lambda,Mat *F)
526: {
527:   PetscFunctionBegin;
528:   PetscCall(NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,NULL));
529:   if (F) *F = extop->MF;
530:   PetscFunctionReturn(PETSC_SUCCESS);
531: }

533: PetscErrorCode NEPDeflationComputeJacobian(NEP_EXT_OP extop,PetscScalar lambda,Mat *J)
534: {
535:   PetscFunctionBegin;
536:   PetscCall(NEPDeflationComputeShellMat(extop,lambda,PETSC_TRUE,NULL));
537:   if (J) *J = extop->MJ;
538:   PetscFunctionReturn(PETSC_SUCCESS);
539: }

541: PetscErrorCode NEPDeflationSolveSetUp(NEP_EXT_OP extop,PetscScalar lambda)
542: {
543:   NEP_DEF_MATSHELL  *matctx;
544:   NEP_DEF_FUN_SOLVE solve;
545:   PetscInt          i,j,n=extop->n;
546:   Vec               u,tu;
547:   Mat               F;
548:   PetscScalar       snone=-1.0,sone=1.0;
549:   PetscBLASInt      n_,szd_,ldh_,*p,info;
550:   Mat               Mshell;

552:   PetscFunctionBegin;
553:   solve = extop->solve;
554:   if (lambda!=solve->theta || n!=solve->n) {
555:     PetscCall(NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,solve->sincf?NULL:&solve->T));
556:     Mshell = (solve->sincf)?extop->MF:solve->T;
557:     PetscCall(MatShellGetContext(Mshell,&matctx));
558:     PetscCall(NEP_KSPSetOperators(solve->ksp,matctx->T,matctx->P));
559:     if (!extop->ref && n) {
560:       PetscCall(PetscBLASIntCast(n,&n_));
561:       PetscCall(PetscBLASIntCast(extop->szd,&szd_));
562:       PetscCall(PetscBLASIntCast(extop->szd+1,&ldh_));
563:       if (!extop->simpU) {
564:         PetscCall(BVSetActiveColumns(solve->T_1U,0,n));
565:         for (i=0;i<n;i++) {
566:           PetscCall(BVGetColumn(matctx->U,i,&u));
567:           PetscCall(BVGetColumn(solve->T_1U,i,&tu));
568:           PetscCall(KSPSolve(solve->ksp,u,tu));
569:           PetscCall(BVRestoreColumn(solve->T_1U,i,&tu));
570:           PetscCall(BVRestoreColumn(matctx->U,i,&u));
571:         }
572:         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n,n,solve->work,&F));
573:         PetscCall(BVDot(solve->T_1U,extop->X,F));
574:         PetscCall(MatDestroy(&F));
575:       } else {
576:         for (j=0;j<n;j++)
577:           for (i=0;i<n;i++) solve->work[j*n+i] = extop->XpX[j*extop->szd+i];
578:         for (i=0;i<n;i++) extop->H[i*ldh_+i] -= lambda;
579:         PetscCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n_,&n_,&snone,extop->H,&ldh_,solve->work,&n_));
580:         for (i=0;i<n;i++) extop->H[i*ldh_+i] += lambda;
581:       }
582:       PetscCall(PetscArraycpy(solve->M,matctx->B,extop->szd*extop->szd));
583:       PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&snone,matctx->A,&szd_,solve->work,&n_,&sone,solve->M,&szd_));
584:       PetscCall(PetscMalloc1(n,&p));
585:       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
586:       PetscCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n_,&n_,solve->M,&szd_,p,&info));
587:       SlepcCheckLapackInfo("getrf",info);
588:       PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&n_,solve->M,&szd_,p,solve->work,&n_,&info));
589:       SlepcCheckLapackInfo("getri",info);
590:       PetscCall(PetscFPTrapPop());
591:       PetscCall(PetscFree(p));
592:     }
593:     solve->theta = lambda;
594:     solve->n = n;
595:   }
596:   PetscFunctionReturn(PETSC_SUCCESS);
597: }

599: PetscErrorCode NEPDeflationFunctionSolve(NEP_EXT_OP extop,Vec b,Vec x)
600: {
601:   Vec               b1,x1;
602:   PetscScalar       *xx,*bb,*x2,*b2,*w,*w2,snone=-1.0,sone=1.0,zero=0.0;
603:   NEP_DEF_MATSHELL  *matctx;
604:   NEP_DEF_FUN_SOLVE solve=extop->solve;
605:   PetscBLASInt      one=1,szd_,n_,ldh_;
606:   PetscInt          nloc,i;
607:   PetscMPIInt       np,count;

609:   PetscFunctionBegin;
610:   if (extop->ref) PetscCall(VecZeroEntries(x));
611:   if (extop->szd) {
612:     x1 = solve->w[0]; b1 = solve->w[1];
613:     PetscCall(VecGetArray(x,&xx));
614:     PetscCall(VecPlaceArray(x1,xx));
615:     PetscCall(VecGetArray(b,&bb));
616:     PetscCall(VecPlaceArray(b1,bb));
617:   } else {
618:     b1 = b; x1 = x;
619:   }
620:   PetscCall(KSPSolve(extop->solve->ksp,b1,x1));
621:   if (!extop->ref && extop->n && extop->szd) {
622:     PetscCall(PetscBLASIntCast(extop->szd,&szd_));
623:     PetscCall(PetscBLASIntCast(extop->n,&n_));
624:     PetscCall(PetscBLASIntCast(extop->szd+1,&ldh_));
625:     PetscCall(BVGetSizes(extop->nep->V,&nloc,NULL,NULL));
626:     PetscCall(PetscMalloc2(extop->n,&b2,extop->n,&x2));
627:     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)b),&np));
628:     for (i=0;i<extop->n;i++) b2[i] = bb[nloc+i]*PetscSqrtReal(np);
629:     w = solve->work; w2 = solve->work+extop->n;
630:     PetscCall(MatShellGetContext(solve->sincf?extop->MF:solve->T,&matctx));
631:     PetscCall(PetscArraycpy(w2,b2,extop->n));
632:     PetscCall(BVDotVec(extop->X,x1,w));
633:     PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&snone,matctx->A,&szd_,w,&one,&sone,w2,&one));
634:     PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,solve->M,&szd_,w2,&one,&zero,x2,&one));
635:     if (extop->simpU) {
636:       for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] -= solve->theta;
637:       for (i=0;i<extop->n;i++) w[i] = x2[i];
638:       PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&one,&snone,extop->H,&ldh_,w,&n_));
639:       for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] += solve->theta;
640:       PetscCall(BVMultVec(extop->X,-1.0,1.0,x1,w));
641:     } else PetscCall(BVMultVec(solve->T_1U,-1.0,1.0,x1,x2));
642:     for (i=0;i<extop->n;i++) xx[i+nloc] = x2[i]/PetscSqrtReal(np);
643:     PetscCall(PetscMPIIntCast(extop->n,&count));
644:     PetscCallMPI(MPI_Bcast(xx+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)b)));
645:   }
646:   if (extop->szd) {
647:     PetscCall(VecResetArray(x1));
648:     PetscCall(VecRestoreArray(x,&xx));
649:     PetscCall(VecResetArray(b1));
650:     PetscCall(VecRestoreArray(b,&bb));
651:     if (!extop->ref && extop->n) PetscCall(PetscFree2(b2,x2));
652:   }
653:   PetscFunctionReturn(PETSC_SUCCESS);
654: }

656: PetscErrorCode NEPDeflationSetRefine(NEP_EXT_OP extop,PetscBool ref)
657: {
658:   PetscFunctionBegin;
659:   extop->ref = ref;
660:   PetscFunctionReturn(PETSC_SUCCESS);
661: }

663: PetscErrorCode NEPDeflationReset(NEP_EXT_OP extop)
664: {
665:   PetscInt          j;
666:   NEP_DEF_FUN_SOLVE solve;

668:   PetscFunctionBegin;
669:   if (!extop) PetscFunctionReturn(PETSC_SUCCESS);
670:   PetscCall(PetscFree(extop->H));
671:   PetscCall(BVDestroy(&extop->X));
672:   if (extop->szd) {
673:     PetscCall(VecDestroy(&extop->w));
674:     PetscCall(PetscFree3(extop->Hj,extop->XpX,extop->bc));
675:     PetscCall(BVDestroy(&extop->W));
676:   }
677:   PetscCall(MatDestroy(&extop->MF));
678:   PetscCall(MatDestroy(&extop->MJ));
679:   if (extop->solve) {
680:     solve = extop->solve;
681:     if (extop->szd) {
682:       if (!extop->simpU) PetscCall(BVDestroy(&solve->T_1U));
683:       PetscCall(PetscFree2(solve->M,solve->work));
684:       PetscCall(VecDestroy(&solve->w[0]));
685:       PetscCall(VecDestroy(&solve->w[1]));
686:     }
687:     if (!solve->sincf) PetscCall(MatDestroy(&solve->T));
688:     PetscCall(PetscFree(extop->solve));
689:   }
690:   if (extop->proj) {
691:     if (extop->szd) {
692:       for (j=0;j<extop->nep->nt;j++) PetscCall(MatDestroy(&extop->proj->V1pApX[j]));
693:       PetscCall(MatDestroy(&extop->proj->XpV1));
694:       PetscCall(PetscFree3(extop->proj->V2,extop->proj->V1pApX,extop->proj->work));
695:       PetscCall(VecDestroy(&extop->proj->w));
696:       PetscCall(BVDestroy(&extop->proj->V1));
697:     }
698:     PetscCall(PetscFree(extop->proj));
699:   }
700:   PetscCall(PetscFree(extop));
701:   PetscFunctionReturn(PETSC_SUCCESS);
702: }

704: PetscErrorCode NEPDeflationInitialize(NEP nep,BV X,KSP ksp,PetscBool sincfun,PetscInt sz,NEP_EXT_OP *extop)
705: {
706:   NEP_EXT_OP        op;
707:   NEP_DEF_FUN_SOLVE solve;
708:   PetscInt          szd;
709:   Vec               x;

711:   PetscFunctionBegin;
712:   PetscCall(NEPDeflationReset(*extop));
713:   PetscCall(PetscNew(&op));
714:   *extop  = op;
715:   op->nep = nep;
716:   op->n   = 0;
717:   op->szd = szd = sz-1;
718:   op->max_midx = PetscMin(MAX_MINIDX,szd);
719:   op->X = X;
720:   if (!X) PetscCall(BVDuplicateResize(nep->V,sz,&op->X));
721:   else PetscCall(PetscObjectReference((PetscObject)X));
722:   PetscCall(PetscCalloc1(sz*sz,&(op)->H));
723:   if (op->szd) {
724:     PetscCall(BVGetColumn(op->X,0,&x));
725:     PetscCall(VecDuplicate(x,&op->w));
726:     PetscCall(BVRestoreColumn(op->X,0,&x));
727:     op->simpU = PETSC_FALSE;
728:     if (nep->fui==NEP_USER_INTERFACE_SPLIT) {
729:       /* undocumented option to use the simple expression for U = T*X*inv(lambda-H) */
730:       PetscCall(PetscOptionsGetBool(NULL,NULL,"-nep_deflation_simpleu",&op->simpU,NULL));
731:     } else {
732:       op->simpU = PETSC_TRUE;
733:     }
734:     PetscCall(PetscCalloc3(szd*szd*op->max_midx,&(op)->Hj,szd*szd,&(op)->XpX,szd,&op->bc));
735:     PetscCall(BVDuplicateResize(op->X,op->szd,&op->W));
736:   }
737:   if (ksp) {
738:     PetscCall(PetscNew(&solve));
739:     op->solve    = solve;
740:     solve->ksp   = ksp;
741:     solve->sincf = sincfun;
742:     solve->n     = -1;
743:     if (op->szd) {
744:       if (!op->simpU) PetscCall(BVDuplicateResize(nep->V,szd,&solve->T_1U));
745:       PetscCall(PetscMalloc2(szd*szd,&solve->M,2*szd*szd,&solve->work));
746:       PetscCall(BVCreateVec(nep->V,&solve->w[0]));
747:       PetscCall(VecDuplicate(solve->w[0],&solve->w[1]));
748:     }
749:   }
750:   PetscFunctionReturn(PETSC_SUCCESS);
751: }

753: static PetscErrorCode NEPDeflationDSNEPComputeMatrix(DS ds,PetscScalar lambda,PetscBool deriv,DSMatType mat,void *ctx)
754: {
755:   Mat               A,Ei;
756:   PetscScalar       *T,*w1,*w2,*w=NULL,*ww,*hH,*hHprev,*pts;
757:   PetscScalar       alpha,alpha2,*AB,sone=1.0,zero=0.0,*basisv,s;
758:   const PetscScalar *E;
759:   PetscInt          i,ldds,nwork=0,szd,nv,j,k,n;
760:   PetscBLASInt      inc=1,nv_,ldds_,dim_,szdk,szd_,n_,ldh_;
761:   PetscMPIInt       np;
762:   NEP_DEF_PROJECT   proj=(NEP_DEF_PROJECT)ctx;
763:   NEP_EXT_OP        extop=proj->extop;
764:   NEP               nep=extop->nep;

766:   PetscFunctionBegin;
767:   PetscCall(DSGetDimensions(ds,&nv,NULL,NULL,NULL));
768:   PetscCall(DSGetLeadingDimension(ds,&ldds));
769:   PetscCall(DSGetMat(ds,mat,&A));
770:   PetscCall(MatZeroEntries(A));
771:   /* mat = V1^*T(lambda)V1 */
772:   for (i=0;i<nep->nt;i++) {
773:     if (deriv) PetscCall(FNEvaluateDerivative(nep->f[i],lambda,&alpha));
774:     else PetscCall(FNEvaluateFunction(nep->f[i],lambda,&alpha));
775:     PetscCall(DSGetMat(ds,DSMatExtra[i],&Ei));
776:     PetscCall(MatAXPY(A,alpha,Ei,SAME_NONZERO_PATTERN));
777:     PetscCall(DSRestoreMat(ds,DSMatExtra[i],&Ei));
778:   }
779:   PetscCall(DSRestoreMat(ds,mat,&A));
780:   if (!extop->ref && extop->n) {
781:     PetscCall(DSGetArray(ds,mat,&T));
782:     n = extop->n;
783:     szd = extop->szd;
784:     PetscCall(PetscArrayzero(proj->work,proj->lwork));
785:     PetscCall(PetscBLASIntCast(nv,&nv_));
786:     PetscCall(PetscBLASIntCast(n,&n_));
787:     PetscCall(PetscBLASIntCast(ldds,&ldds_));
788:     PetscCall(PetscBLASIntCast(szd,&szd_));
789:     PetscCall(PetscBLASIntCast(proj->dim,&dim_));
790:     PetscCall(PetscBLASIntCast(extop->szd+1,&ldh_));
791:     w1 = proj->work; w2 = proj->work+proj->dim*proj->dim;
792:     nwork += 2*proj->dim*proj->dim;

794:     /* mat = mat + V1^*U(lambda)V2 */
795:     for (i=0;i<nep->nt;i++) {
796:       if (extop->simpU) {
797:         if (deriv) PetscCall(FNEvaluateDerivative(nep->f[i],lambda,&alpha));
798:         else PetscCall(FNEvaluateFunction(nep->f[i],lambda,&alpha));
799:         ww = w1; w = w2;
800:         PetscCall(PetscArraycpy(ww,proj->V2,szd*nv));
801:         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np));
802:         for (j=0;j<szd*nv;j++) ww[j] *= PetscSqrtReal(np);
803:         for (j=0;j<n;j++) extop->H[j*ldh_+j] -= lambda;
804:         alpha = -alpha;
805:         PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha,extop->H,&ldh_,ww,&szd_));
806:         if (deriv) {
807:           PetscCall(PetscBLASIntCast(szd*nv,&szdk));
808:           PetscCall(FNEvaluateFunction(nep->f[i],lambda,&alpha2));
809:           PetscCall(PetscArraycpy(w,proj->V2,szd*nv));
810:           for (j=0;j<szd*nv;j++) w[j] *= PetscSqrtReal(np);
811:           alpha2 = -alpha2;
812:           PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
813:           alpha2 = 1.0;
814:           PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
815:           PetscCallBLAS("BLASaxpy",BLASaxpy_(&szdk,&sone,w,&inc,ww,&inc));
816:         }
817:         for (j=0;j<n;j++) extop->H[j*ldh_+j] += lambda;
818:       } else {
819:         PetscCall(NEPDeflationEvaluateHatFunction(extop,i,lambda,NULL,w1,w2,szd));
820:         w = deriv?w2:w1; ww = deriv?w1:w2;
821:         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np));
822:         s = PetscSqrtReal(np);
823:         PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&s,w,&szd_,proj->V2,&szd_,&zero,ww,&szd_));
824:       }
825:       PetscCall(MatDenseGetArrayRead(proj->V1pApX[i],&E));
826:       PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&nv_,&nv_,&n_,&sone,E,&dim_,ww,&szd_,&sone,T,&ldds_));
827:       PetscCall(MatDenseRestoreArrayRead(proj->V1pApX[i],&E));
828:     }

830:     /* mat = mat + V2^*A(lambda)V1 */
831:     basisv = proj->work+nwork; nwork += szd;
832:     hH     = proj->work+nwork; nwork += szd*szd;
833:     hHprev = proj->work+nwork; nwork += szd*szd;
834:     AB     = proj->work+nwork;
835:     PetscCall(NEPDeflationEvaluateBasis(extop,lambda,n,basisv,deriv));
836:     if (!deriv) for (i=0;i<n;i++) AB[i*(szd+1)] = 1.0;
837:     for (j=0;j<n;j++)
838:       for (i=0;i<n;i++)
839:         for (k=1;k<extop->midx;k++) AB[j*szd+i] += basisv[k]*PetscConj(extop->Hj[k*szd*szd+i*szd+j]);
840:     PetscCall(MatDenseGetArrayRead(proj->XpV1,&E));
841:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,E,&szd_,&zero,w,&szd_));
842:     PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
843:     PetscCall(MatDenseRestoreArrayRead(proj->XpV1,&E));

845:     /* mat = mat + V2^*B(lambda)V2 */
846:     PetscCall(PetscArrayzero(AB,szd*szd));
847:     for (i=1;i<extop->midx;i++) {
848:       PetscCall(NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev));
849:       PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
850:       PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,AB,&szd_));
851:       pts = hHprev; hHprev = hH; hH = pts;
852:     }
853:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,proj->V2,&szd_,&zero,w,&szd_));
854:     PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
855:     PetscCall(DSRestoreArray(ds,mat,&T));
856:   }
857:   PetscFunctionReturn(PETSC_SUCCESS);
858: }

860: PetscErrorCode NEPDeflationProjectOperator(NEP_EXT_OP extop,BV Vext,DS ds,PetscInt j0,PetscInt j1)
861: {
862:   PetscInt        k,j,n=extop->n,dim;
863:   Vec             v,ve;
864:   BV              V1;
865:   Mat             G;
866:   NEP             nep=extop->nep;
867:   NEP_DEF_PROJECT proj;

869:   PetscFunctionBegin;
870:   NEPCheckSplit(extop->nep,1);
871:   proj = extop->proj;
872:   if (!proj) {
873:     /* Initialize the projection data structure */
874:     PetscCall(PetscNew(&proj));
875:     extop->proj = proj;
876:     proj->extop = extop;
877:     PetscCall(BVGetSizes(Vext,NULL,NULL,&dim));
878:     proj->dim = dim;
879:     if (extop->szd) {
880:       proj->lwork = 3*dim*dim+2*extop->szd*extop->szd+extop->szd;
881:       PetscCall(PetscMalloc3(dim*extop->szd,&proj->V2,nep->nt,&proj->V1pApX,proj->lwork,&proj->work));
882:       for (j=0;j<nep->nt;j++) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,proj->dim,extop->szd,NULL,&proj->V1pApX[j]));
883:       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,extop->szd,proj->dim,NULL,&proj->XpV1));
884:       PetscCall(BVCreateVec(extop->X,&proj->w));
885:       PetscCall(BVDuplicateResize(extop->X,proj->dim,&proj->V1));
886:     }
887:     PetscCall(DSNEPSetComputeMatrixFunction(ds,NEPDeflationDSNEPComputeMatrix,(void*)proj));
888:   }

890:   /* Split Vext in V1 and V2 */
891:   if (extop->szd) {
892:     for (j=j0;j<j1;j++) {
893:       PetscCall(BVGetColumn(Vext,j,&ve));
894:       PetscCall(BVGetColumn(proj->V1,j,&v));
895:       PetscCall(NEPDeflationCopyToExtendedVec(extop,v,proj->V2+j*extop->szd,ve,PETSC_TRUE));
896:       PetscCall(BVRestoreColumn(proj->V1,j,&v));
897:       PetscCall(BVRestoreColumn(Vext,j,&ve));
898:     }
899:     V1 = proj->V1;
900:   } else V1 = Vext;

902:   /* Compute matrices V1^* A_i V1 */
903:   PetscCall(BVSetActiveColumns(V1,j0,j1));
904:   for (k=0;k<nep->nt;k++) {
905:     PetscCall(DSGetMat(ds,DSMatExtra[k],&G));
906:     PetscCall(BVMatProject(V1,nep->A[k],V1,G));
907:     PetscCall(DSRestoreMat(ds,DSMatExtra[k],&G));
908:   }

910:   if (extop->n) {
911:     if (extop->szd) {
912:       /* Compute matrices V1^* A_i X  and V1^* X */
913:       PetscCall(BVSetActiveColumns(extop->W,0,n));
914:       for (k=0;k<nep->nt;k++) {
915:         PetscCall(BVMatMult(extop->X,nep->A[k],extop->W));
916:         PetscCall(BVDot(extop->W,V1,proj->V1pApX[k]));
917:       }
918:       PetscCall(BVDot(V1,extop->X,proj->XpV1));
919:     }
920:   }
921:   PetscFunctionReturn(PETSC_SUCCESS);
922: }