Actual source code: nepdefl.c
slepc-3.21.1 2024-04-26
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/nepimpl.h>
12: #include <slepcblaslapack.h>
13: #include "nepdefl.h"
15: PetscErrorCode NEPDeflationGetInvariantPair(NEP_EXT_OP extop,BV *X,Mat *H)
16: {
17: PetscFunctionBegin;
18: if (X) *X = extop->X;
19: if (H) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,extop->szd+1,extop->szd+1,extop->H,H));
20: PetscFunctionReturn(PETSC_SUCCESS);
21: }
23: static PetscErrorCode NEPDeflationExtendInvariantPair(NEP_EXT_OP extop,Vec u,PetscScalar lambda,PetscInt k)
24: {
25: Vec uu;
26: PetscInt ld,i;
27: PetscMPIInt np;
28: PetscReal norm;
30: PetscFunctionBegin;
31: PetscCall(BVGetColumn(extop->X,k,&uu));
32: ld = extop->szd+1;
33: PetscCall(NEPDeflationCopyToExtendedVec(extop,uu,extop->H+k*ld,u,PETSC_TRUE));
34: PetscCall(BVRestoreColumn(extop->X,k,&uu));
35: PetscCall(BVNormColumn(extop->X,k,NORM_2,&norm));
36: PetscCall(BVScaleColumn(extop->X,k,1.0/norm));
37: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)u),&np));
38: for (i=0;i<k;i++) extop->H[k*ld+i] *= PetscSqrtReal(np)/norm;
39: extop->H[k*(ld+1)] = lambda;
40: PetscFunctionReturn(PETSC_SUCCESS);
41: }
43: PetscErrorCode NEPDeflationExtractEigenpair(NEP_EXT_OP extop,PetscInt k,Vec u,PetscScalar lambda,DS ds)
44: {
45: Mat A,H;
46: PetscInt ldh=extop->szd+1,ldds,k1=k+1;
47: PetscScalar *eigr,*eigi,*t,*Z;
48: Vec x;
50: PetscFunctionBegin;
51: PetscCall(NEPDeflationExtendInvariantPair(extop,u,lambda,k));
52: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,k1,k1,extop->H,&H));
53: PetscCall(MatDenseSetLDA(H,ldh));
54: PetscCall(PetscCalloc3(k1,&eigr,k1,&eigi,extop->szd,&t));
55: PetscCall(DSReset(ds));
56: PetscCall(DSSetType(ds,DSNHEP));
57: PetscCall(DSAllocate(ds,ldh));
58: PetscCall(DSGetLeadingDimension(ds,&ldds));
59: PetscCall(DSSetDimensions(ds,k1,0,k1));
60: PetscCall(DSGetMat(ds,DS_MAT_A,&A));
61: PetscCall(MatCopy(H,A,SAME_NONZERO_PATTERN));
62: PetscCall(DSRestoreMat(ds,DS_MAT_A,&A));
63: PetscCall(MatDestroy(&H));
64: PetscCall(DSSolve(ds,eigr,eigi));
65: PetscCall(DSVectors(ds,DS_MAT_X,&k,NULL));
66: PetscCall(DSGetArray(ds,DS_MAT_X,&Z));
67: PetscCall(BVMultColumn(extop->X,1.0,Z[k*ldds+k],k,Z+k*ldds));
68: PetscCall(DSRestoreArray(ds,DS_MAT_X,&Z));
69: PetscCall(BVGetColumn(extop->X,k,&x));
70: PetscCall(NEPDeflationCopyToExtendedVec(extop,x,t,u,PETSC_FALSE));
71: PetscCall(BVRestoreColumn(extop->X,k,&x));
72: PetscCall(PetscFree3(eigr,eigi,t));
73: PetscFunctionReturn(PETSC_SUCCESS);
74: }
76: PetscErrorCode NEPDeflationCopyToExtendedVec(NEP_EXT_OP extop,Vec v,PetscScalar *a,Vec vex,PetscBool back)
77: {
78: PetscMPIInt np,rk,count;
79: PetscScalar *array1,*array2;
80: PetscInt nloc;
82: PetscFunctionBegin;
83: if (extop->szd) {
84: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)vex),&rk));
85: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)vex),&np));
86: PetscCall(BVGetSizes(extop->nep->V,&nloc,NULL,NULL));
87: if (v) {
88: PetscCall(VecGetArray(v,&array1));
89: PetscCall(VecGetArray(vex,&array2));
90: if (back) PetscCall(PetscArraycpy(array1,array2,nloc));
91: else PetscCall(PetscArraycpy(array2,array1,nloc));
92: PetscCall(VecRestoreArray(v,&array1));
93: PetscCall(VecRestoreArray(vex,&array2));
94: }
95: if (a) {
96: PetscCall(VecGetArray(vex,&array2));
97: if (back) {
98: PetscCall(PetscArraycpy(a,array2+nloc,extop->szd));
99: PetscCall(PetscMPIIntCast(extop->szd,&count));
100: PetscCallMPI(MPI_Bcast(a,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)vex)));
101: } else {
102: PetscCall(PetscArraycpy(array2+nloc,a,extop->szd));
103: PetscCall(PetscMPIIntCast(extop->szd,&count));
104: PetscCallMPI(MPI_Bcast(array2+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)vex)));
105: }
106: PetscCall(VecRestoreArray(vex,&array2));
107: }
108: } else {
109: if (back) PetscCall(VecCopy(vex,v));
110: else PetscCall(VecCopy(v,vex));
111: }
112: PetscFunctionReturn(PETSC_SUCCESS);
113: }
115: PetscErrorCode NEPDeflationCreateVec(NEP_EXT_OP extop,Vec *v)
116: {
117: PetscInt nloc;
118: Vec u;
119: VecType type;
121: PetscFunctionBegin;
122: if (extop->szd) {
123: PetscCall(BVGetColumn(extop->nep->V,0,&u));
124: PetscCall(VecGetType(u,&type));
125: PetscCall(BVRestoreColumn(extop->nep->V,0,&u));
126: PetscCall(VecCreate(PetscObjectComm((PetscObject)extop->nep),v));
127: PetscCall(VecSetType(*v,type));
128: PetscCall(BVGetSizes(extop->nep->V,&nloc,NULL,NULL));
129: nloc += extop->szd;
130: PetscCall(VecSetSizes(*v,nloc,PETSC_DECIDE));
131: } else PetscCall(BVCreateVec(extop->nep->V,v));
132: PetscFunctionReturn(PETSC_SUCCESS);
133: }
135: PetscErrorCode NEPDeflationCreateBV(NEP_EXT_OP extop,PetscInt sz,BV *V)
136: {
137: PetscInt nloc;
138: BVType type;
139: BVOrthogType otype;
140: BVOrthogRefineType oref;
141: PetscReal oeta;
142: BVOrthogBlockType oblock;
143: NEP nep=extop->nep;
144: VecType vtype;
146: PetscFunctionBegin;
147: if (extop->szd) {
148: PetscCall(BVGetSizes(nep->V,&nloc,NULL,NULL));
149: PetscCall(BVCreate(PetscObjectComm((PetscObject)nep),V));
150: PetscCall(BVSetSizes(*V,nloc+extop->szd,PETSC_DECIDE,sz));
151: PetscCall(BVGetType(nep->V,&type));
152: PetscCall(BVSetType(*V,type));
153: PetscCall(BVGetVecType(nep->V,&vtype));
154: PetscCall(BVSetVecType(*V,vtype));
155: PetscCall(BVGetOrthogonalization(nep->V,&otype,&oref,&oeta,&oblock));
156: PetscCall(BVSetOrthogonalization(*V,otype,oref,oeta,oblock));
157: PetscCall(PetscObjectStateIncrease((PetscObject)*V));
158: } else PetscCall(BVDuplicateResize(nep->V,sz,V));
159: PetscFunctionReturn(PETSC_SUCCESS);
160: }
162: PetscErrorCode NEPDeflationSetRandomVec(NEP_EXT_OP extop,Vec v)
163: {
164: PetscInt n,next,i;
165: PetscRandom rand;
166: PetscScalar *array;
167: PetscMPIInt nn,np;
169: PetscFunctionBegin;
170: PetscCall(BVGetRandomContext(extop->nep->V,&rand));
171: PetscCall(VecSetRandom(v,rand));
172: if (extop->szd) {
173: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)v),&np));
174: PetscCall(BVGetSizes(extop->nep->V,&n,NULL,NULL));
175: PetscCall(VecGetLocalSize(v,&next));
176: PetscCall(VecGetArray(v,&array));
177: for (i=n+extop->n;i<next;i++) array[i] = 0.0;
178: for (i=n;i<n+extop->n;i++) array[i] /= PetscSqrtReal(np);
179: PetscCall(PetscMPIIntCast(extop->n,&nn));
180: PetscCallMPI(MPI_Bcast(array+n,nn,MPIU_SCALAR,0,PetscObjectComm((PetscObject)v)));
181: PetscCall(VecRestoreArray(v,&array));
182: }
183: PetscFunctionReturn(PETSC_SUCCESS);
184: }
186: static PetscErrorCode NEPDeflationEvaluateBasisMat(NEP_EXT_OP extop,PetscInt idx,PetscBool hat,PetscScalar *bval,PetscScalar *Hj,PetscScalar *Hjprev)
187: {
188: PetscInt i,k,n=extop->n,ldhj=extop->szd,ldh=extop->szd+1;
189: PetscScalar sone=1.0,zero=0.0;
190: PetscBLASInt ldh_,ldhj_,n_;
192: PetscFunctionBegin;
193: i = (idx<0)?extop->szd*extop->szd*(-idx):extop->szd*extop->szd;
194: PetscCall(PetscArrayzero(Hj,i));
195: PetscCall(PetscBLASIntCast(ldhj+1,&ldh_));
196: PetscCall(PetscBLASIntCast(ldhj,&ldhj_));
197: PetscCall(PetscBLASIntCast(n,&n_));
198: if (idx<1) {
199: if (!hat) for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 1.0;
200: else for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 0.0;
201: } else {
202: for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[idx-1];
203: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hjprev,&ldhj_,&zero,Hj,&ldhj_));
204: for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[idx-1];
205: if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[idx-1];
206: }
207: if (idx<0) {
208: idx = -idx;
209: for (k=1;k<idx;k++) {
210: for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[k-1];
211: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hj+(k-1)*ldhj*ldhj,&ldhj_,&zero,Hj+k*ldhj*ldhj,&ldhj_));
212: for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[k-1];
213: if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[k-1];
214: }
215: }
216: PetscFunctionReturn(PETSC_SUCCESS);
217: }
219: PetscErrorCode NEPDeflationLocking(NEP_EXT_OP extop,Vec u,PetscScalar lambda)
220: {
221: PetscInt i;
223: PetscFunctionBegin;
224: PetscCall(NEPDeflationExtendInvariantPair(extop,u,lambda,extop->n));
225: extop->n++;
226: PetscCall(BVSetActiveColumns(extop->X,0,extop->n));
227: if (extop->n <= extop->szd) {
228: /* update XpX */
229: PetscCall(BVDotColumn(extop->X,extop->n-1,extop->XpX+(extop->n-1)*extop->szd));
230: extop->XpX[(extop->n-1)*(1+extop->szd)] = 1.0;
231: for (i=0;i<extop->n-1;i++) extop->XpX[i*extop->szd+extop->n-1] = PetscConj(extop->XpX[(extop->n-1)*extop->szd+i]);
232: /* determine minimality index */
233: extop->midx = PetscMin(extop->max_midx,extop->n);
234: /* polynominal basis coefficients */
235: for (i=0;i<extop->midx;i++) extop->bc[i] = extop->nep->target;
236: /* evaluate the polynomial basis in H */
237: PetscCall(NEPDeflationEvaluateBasisMat(extop,-extop->midx,PETSC_FALSE,NULL,extop->Hj,NULL));
238: }
239: PetscFunctionReturn(PETSC_SUCCESS);
240: }
242: static PetscErrorCode NEPDeflationEvaluateHatFunction(NEP_EXT_OP extop, PetscInt idx,PetscScalar lambda,PetscScalar *y,PetscScalar *hfj,PetscScalar *hfjp,PetscInt ld)
243: {
244: PetscInt i,j,k,off,ini,fin,sz,ldh,n=extop->n;
245: Mat A,B;
246: PetscScalar *array;
247: const PetscScalar *barray;
249: PetscFunctionBegin;
250: if (idx<0) {ini = 0; fin = extop->nep->nt;}
251: else {ini = idx; fin = idx+1;}
252: if (y) sz = hfjp?n+2:n+1;
253: else sz = hfjp?3*n:2*n;
254: ldh = extop->szd+1;
255: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&A));
256: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&B));
257: PetscCall(MatDenseGetArray(A,&array));
258: for (j=0;j<n;j++)
259: for (i=0;i<n;i++) array[j*sz+i] = extop->H[j*ldh+i];
260: PetscCall(MatDenseRestoreArrayWrite(A,&array));
261: if (y) {
262: PetscCall(MatDenseGetArray(A,&array));
263: array[extop->n*(sz+1)] = lambda;
264: if (hfjp) { array[(n+1)*sz+n] = 1.0; array[(n+1)*sz+n+1] = lambda;}
265: for (i=0;i<n;i++) array[n*sz+i] = y[i];
266: PetscCall(MatDenseRestoreArrayWrite(A,&array));
267: for (j=ini;j<fin;j++) {
268: PetscCall(FNEvaluateFunctionMat(extop->nep->f[j],A,B));
269: PetscCall(MatDenseGetArrayRead(B,&barray));
270: for (i=0;i<n;i++) hfj[j*ld+i] = barray[n*sz+i];
271: if (hfjp) for (i=0;i<n;i++) hfjp[j*ld+i] = barray[(n+1)*sz+i];
272: PetscCall(MatDenseRestoreArrayRead(B,&barray));
273: }
274: } else {
275: off = idx<0?ld*n:0;
276: PetscCall(MatDenseGetArray(A,&array));
277: for (k=0;k<n;k++) {
278: array[(n+k)*sz+k] = 1.0;
279: array[(n+k)*sz+n+k] = lambda;
280: }
281: if (hfjp) for (k=0;k<n;k++) {
282: array[(2*n+k)*sz+n+k] = 1.0;
283: array[(2*n+k)*sz+2*n+k] = lambda;
284: }
285: PetscCall(MatDenseRestoreArray(A,&array));
286: for (j=ini;j<fin;j++) {
287: PetscCall(FNEvaluateFunctionMat(extop->nep->f[j],A,B));
288: PetscCall(MatDenseGetArrayRead(B,&barray));
289: for (i=0;i<n;i++) for (k=0;k<n;k++) hfj[j*off+i*ld+k] = barray[n*sz+i*sz+k];
290: if (hfjp) for (k=0;k<n;k++) for (i=0;i<n;i++) hfjp[j*off+i*ld+k] = barray[2*n*sz+i*sz+k];
291: PetscCall(MatDenseRestoreArrayRead(B,&barray));
292: }
293: }
294: PetscCall(MatDestroy(&A));
295: PetscCall(MatDestroy(&B));
296: PetscFunctionReturn(PETSC_SUCCESS);
297: }
299: static PetscErrorCode MatMult_NEPDeflation(Mat M,Vec x,Vec y)
300: {
301: NEP_DEF_MATSHELL *matctx;
302: NEP_EXT_OP extop;
303: Vec x1,y1;
304: PetscScalar *yy,sone=1.0,zero=0.0;
305: const PetscScalar *xx;
306: PetscInt nloc,i;
307: PetscMPIInt np;
308: PetscBLASInt n_,one=1,szd_;
310: PetscFunctionBegin;
311: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M),&np));
312: PetscCall(MatShellGetContext(M,&matctx));
313: extop = matctx->extop;
314: if (extop->ref) PetscCall(VecZeroEntries(y));
315: if (extop->szd) {
316: x1 = matctx->w[0]; y1 = matctx->w[1];
317: PetscCall(VecGetArrayRead(x,&xx));
318: PetscCall(VecPlaceArray(x1,xx));
319: PetscCall(VecGetArray(y,&yy));
320: PetscCall(VecPlaceArray(y1,yy));
321: PetscCall(MatMult(matctx->T,x1,y1));
322: if (!extop->ref && extop->n) {
323: PetscCall(VecGetLocalSize(x1,&nloc));
324: /* copy for avoiding warning of constant array xx */
325: for (i=0;i<extop->n;i++) matctx->work[i] = xx[nloc+i]*PetscSqrtReal(np);
326: PetscCall(BVMultVec(matctx->U,1.0,1.0,y1,matctx->work));
327: PetscCall(BVDotVec(extop->X,x1,matctx->work));
328: PetscCall(PetscBLASIntCast(extop->n,&n_));
329: PetscCall(PetscBLASIntCast(extop->szd,&szd_));
330: PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->A,&szd_,matctx->work,&one,&zero,yy+nloc,&one));
331: PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->B,&szd_,xx+nloc,&one,&sone,yy+nloc,&one));
332: for (i=0;i<extop->n;i++) yy[nloc+i] /= PetscSqrtReal(np);
333: }
334: PetscCall(VecResetArray(x1));
335: PetscCall(VecRestoreArrayRead(x,&xx));
336: PetscCall(VecResetArray(y1));
337: PetscCall(VecRestoreArray(y,&yy));
338: } else PetscCall(MatMult(matctx->T,x,y));
339: PetscFunctionReturn(PETSC_SUCCESS);
340: }
342: static PetscErrorCode MatCreateVecs_NEPDeflation(Mat M,Vec *right,Vec *left)
343: {
344: NEP_DEF_MATSHELL *matctx;
346: PetscFunctionBegin;
347: PetscCall(MatShellGetContext(M,&matctx));
348: if (right) PetscCall(VecDuplicate(matctx->w[0],right));
349: if (left) PetscCall(VecDuplicate(matctx->w[0],left));
350: PetscFunctionReturn(PETSC_SUCCESS);
351: }
353: static PetscErrorCode MatDestroy_NEPDeflation(Mat M)
354: {
355: NEP_DEF_MATSHELL *matctx;
357: PetscFunctionBegin;
358: PetscCall(MatShellGetContext(M,&matctx));
359: if (matctx->extop->szd) {
360: PetscCall(BVDestroy(&matctx->U));
361: PetscCall(PetscFree2(matctx->hfj,matctx->work));
362: PetscCall(PetscFree2(matctx->A,matctx->B));
363: PetscCall(VecDestroy(&matctx->w[0]));
364: PetscCall(VecDestroy(&matctx->w[1]));
365: }
366: if (matctx->P != matctx->T) PetscCall(MatDestroy(&matctx->P));
367: PetscCall(MatDestroy(&matctx->T));
368: PetscCall(PetscFree(matctx));
369: PetscFunctionReturn(PETSC_SUCCESS);
370: }
372: static PetscErrorCode NEPDeflationEvaluateBasis(NEP_EXT_OP extop,PetscScalar lambda,PetscInt n,PetscScalar *val,PetscBool jacobian)
373: {
374: PetscScalar p;
375: PetscInt i;
377: PetscFunctionBegin;
378: if (!jacobian) {
379: val[0] = 1.0;
380: for (i=1;i<extop->n;i++) val[i] = val[i-1]*(lambda-extop->bc[i-1]);
381: } else {
382: val[0] = 0.0;
383: p = 1.0;
384: for (i=1;i<extop->n;i++) {
385: val[i] = val[i-1]*(lambda-extop->bc[i-1])+p;
386: p *= (lambda-extop->bc[i-1]);
387: }
388: }
389: PetscFunctionReturn(PETSC_SUCCESS);
390: }
392: static PetscErrorCode NEPDeflationComputeShellMat(NEP_EXT_OP extop,PetscScalar lambda,PetscBool jacobian,Mat *M)
393: {
394: NEP_DEF_MATSHELL *matctx,*matctxC;
395: PetscInt nloc,mloc,n=extop->n,j,i,szd=extop->szd,ldh=szd+1,k;
396: Mat F,Mshell,Mcomp;
397: PetscBool ini=PETSC_FALSE;
398: PetscScalar *hf,*hfj,*hfjp,sone=1.0,*hH,*hHprev,*pts,*B,*A,*Hj=extop->Hj,*basisv,zero=0.0;
399: PetscBLASInt n_,info,szd_;
401: PetscFunctionBegin;
402: if (!M) Mshell = jacobian?extop->MJ:extop->MF;
403: else Mshell = *M;
404: Mcomp = jacobian?extop->MF:extop->MJ;
405: if (!Mshell) {
406: ini = PETSC_TRUE;
407: PetscCall(PetscNew(&matctx));
408: PetscCall(MatGetLocalSize(extop->nep->function,&mloc,&nloc));
409: nloc += szd; mloc += szd;
410: PetscCall(MatCreateShell(PetscObjectComm((PetscObject)extop->nep),nloc,mloc,PETSC_DETERMINE,PETSC_DETERMINE,matctx,&Mshell));
411: PetscCall(MatShellSetOperation(Mshell,MATOP_MULT,(void(*)(void))MatMult_NEPDeflation));
412: PetscCall(MatShellSetOperation(Mshell,MATOP_CREATE_VECS,(void(*)(void))MatCreateVecs_NEPDeflation));
413: PetscCall(MatShellSetOperation(Mshell,MATOP_DESTROY,(void(*)(void))MatDestroy_NEPDeflation));
414: matctx->nep = extop->nep;
415: matctx->extop = extop;
416: if (!M) {
417: if (jacobian) { matctx->jacob = PETSC_TRUE; matctx->T = extop->nep->jacobian; extop->MJ = Mshell; }
418: else { matctx->jacob = PETSC_FALSE; matctx->T = extop->nep->function; extop->MF = Mshell; }
419: PetscCall(PetscObjectReference((PetscObject)matctx->T));
420: if (!jacobian) {
421: if (extop->nep->function_pre && extop->nep->function_pre != extop->nep->function) {
422: matctx->P = extop->nep->function_pre;
423: PetscCall(PetscObjectReference((PetscObject)matctx->P));
424: } else matctx->P = matctx->T;
425: }
426: } else {
427: matctx->jacob = jacobian;
428: PetscCall(MatDuplicate(jacobian?extop->nep->jacobian:extop->nep->function,MAT_DO_NOT_COPY_VALUES,&matctx->T));
429: *M = Mshell;
430: if (!jacobian) {
431: if (extop->nep->function_pre && extop->nep->function_pre != extop->nep->function) PetscCall(MatDuplicate(extop->nep->function_pre,MAT_DO_NOT_COPY_VALUES,&matctx->P));
432: else matctx->P = matctx->T;
433: }
434: }
435: if (szd) {
436: PetscCall(BVCreateVec(extop->nep->V,matctx->w));
437: PetscCall(VecDuplicate(matctx->w[0],matctx->w+1));
438: PetscCall(BVDuplicateResize(extop->nep->V,szd,&matctx->U));
439: PetscCall(PetscMalloc2(extop->simpU?2*(szd)*(szd):2*(szd)*(szd)*extop->nep->nt,&matctx->hfj,szd,&matctx->work));
440: PetscCall(PetscMalloc2(szd*szd,&matctx->A,szd*szd,&matctx->B));
441: }
442: } else PetscCall(MatShellGetContext(Mshell,&matctx));
443: if (ini || matctx->theta != lambda || matctx->n != extop->n) {
444: if (ini || matctx->theta != lambda) {
445: if (jacobian) PetscCall(NEPComputeJacobian(extop->nep,lambda,matctx->T));
446: else PetscCall(NEPComputeFunction(extop->nep,lambda,matctx->T,matctx->P));
447: }
448: if (n) {
449: matctx->hfjset = PETSC_FALSE;
450: if (!extop->simpU) {
451: /* likely hfjp has been already computed */
452: if (Mcomp) {
453: PetscCall(MatShellGetContext(Mcomp,&matctxC));
454: if (matctxC->hfjset && matctxC->theta == lambda && matctxC->n == extop->n) {
455: PetscCall(PetscArraycpy(matctx->hfj,matctxC->hfj,2*extop->szd*extop->szd*extop->nep->nt));
456: matctx->hfjset = PETSC_TRUE;
457: }
458: }
459: hfj = matctx->hfj; hfjp = matctx->hfj+extop->szd*extop->szd*extop->nep->nt;
460: if (!matctx->hfjset) {
461: PetscCall(NEPDeflationEvaluateHatFunction(extop,-1,lambda,NULL,hfj,hfjp,n));
462: matctx->hfjset = PETSC_TRUE;
463: }
464: PetscCall(BVSetActiveColumns(matctx->U,0,n));
465: hf = jacobian?hfjp:hfj;
466: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n,n,hf,&F));
467: PetscCall(BVMatMult(extop->X,extop->nep->A[0],matctx->U));
468: PetscCall(BVMultInPlace(matctx->U,F,0,n));
469: PetscCall(BVSetActiveColumns(extop->W,0,extop->n));
470: for (j=1;j<extop->nep->nt;j++) {
471: PetscCall(BVMatMult(extop->X,extop->nep->A[j],extop->W));
472: PetscCall(MatDensePlaceArray(F,hf+j*n*n));
473: PetscCall(BVMult(matctx->U,1.0,1.0,extop->W,F));
474: PetscCall(MatDenseResetArray(F));
475: }
476: PetscCall(MatDestroy(&F));
477: } else {
478: hfj = matctx->hfj;
479: PetscCall(BVSetActiveColumns(matctx->U,0,n));
480: PetscCall(BVMatMult(extop->X,matctx->T,matctx->U));
481: for (j=0;j<n;j++) {
482: for (i=0;i<n;i++) hfj[j*n+i] = -extop->H[j*ldh+i];
483: hfj[j*(n+1)] += lambda;
484: }
485: PetscCall(PetscBLASIntCast(n,&n_));
486: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
487: PetscCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,hfj,&n_,&info));
488: PetscCall(PetscFPTrapPop());
489: SlepcCheckLapackInfo("trtri",info);
490: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n,n,hfj,&F));
491: PetscCall(BVMultInPlace(matctx->U,F,0,n));
492: if (jacobian) {
493: PetscCall(NEPDeflationComputeFunction(extop,lambda,NULL));
494: PetscCall(MatShellGetContext(extop->MF,&matctxC));
495: PetscCall(BVMult(matctx->U,-1.0,1.0,matctxC->U,F));
496: }
497: PetscCall(MatDestroy(&F));
498: }
499: PetscCall(PetscCalloc3(n,&basisv,szd*szd,&hH,szd*szd,&hHprev));
500: PetscCall(NEPDeflationEvaluateBasis(extop,lambda,n,basisv,jacobian));
501: A = matctx->A;
502: PetscCall(PetscArrayzero(A,szd*szd));
503: if (!jacobian) for (i=0;i<n;i++) A[i*(szd+1)] = 1.0;
504: for (j=0;j<n;j++)
505: for (i=0;i<n;i++)
506: for (k=1;k<extop->midx;k++) A[j*szd+i] += basisv[k]*PetscConj(Hj[k*szd*szd+i*szd+j]);
507: PetscCall(PetscBLASIntCast(n,&n_));
508: PetscCall(PetscBLASIntCast(szd,&szd_));
509: B = matctx->B;
510: PetscCall(PetscArrayzero(B,szd*szd));
511: for (i=1;i<extop->midx;i++) {
512: PetscCall(NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev));
513: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
514: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,B,&szd_));
515: pts = hHprev; hHprev = hH; hH = pts;
516: }
517: PetscCall(PetscFree3(basisv,hH,hHprev));
518: }
519: matctx->theta = lambda;
520: matctx->n = extop->n;
521: }
522: PetscFunctionReturn(PETSC_SUCCESS);
523: }
525: PetscErrorCode NEPDeflationComputeFunction(NEP_EXT_OP extop,PetscScalar lambda,Mat *F)
526: {
527: PetscFunctionBegin;
528: PetscCall(NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,NULL));
529: if (F) *F = extop->MF;
530: PetscFunctionReturn(PETSC_SUCCESS);
531: }
533: PetscErrorCode NEPDeflationComputeJacobian(NEP_EXT_OP extop,PetscScalar lambda,Mat *J)
534: {
535: PetscFunctionBegin;
536: PetscCall(NEPDeflationComputeShellMat(extop,lambda,PETSC_TRUE,NULL));
537: if (J) *J = extop->MJ;
538: PetscFunctionReturn(PETSC_SUCCESS);
539: }
541: PetscErrorCode NEPDeflationSolveSetUp(NEP_EXT_OP extop,PetscScalar lambda)
542: {
543: NEP_DEF_MATSHELL *matctx;
544: NEP_DEF_FUN_SOLVE solve;
545: PetscInt i,j,n=extop->n;
546: Vec u,tu;
547: Mat F;
548: PetscScalar snone=-1.0,sone=1.0;
549: PetscBLASInt n_,szd_,ldh_,*p,info;
550: Mat Mshell;
552: PetscFunctionBegin;
553: solve = extop->solve;
554: if (lambda!=solve->theta || n!=solve->n) {
555: PetscCall(NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,solve->sincf?NULL:&solve->T));
556: Mshell = (solve->sincf)?extop->MF:solve->T;
557: PetscCall(MatShellGetContext(Mshell,&matctx));
558: PetscCall(NEP_KSPSetOperators(solve->ksp,matctx->T,matctx->P));
559: if (!extop->ref && n) {
560: PetscCall(PetscBLASIntCast(n,&n_));
561: PetscCall(PetscBLASIntCast(extop->szd,&szd_));
562: PetscCall(PetscBLASIntCast(extop->szd+1,&ldh_));
563: if (!extop->simpU) {
564: PetscCall(BVSetActiveColumns(solve->T_1U,0,n));
565: for (i=0;i<n;i++) {
566: PetscCall(BVGetColumn(matctx->U,i,&u));
567: PetscCall(BVGetColumn(solve->T_1U,i,&tu));
568: PetscCall(KSPSolve(solve->ksp,u,tu));
569: PetscCall(BVRestoreColumn(solve->T_1U,i,&tu));
570: PetscCall(BVRestoreColumn(matctx->U,i,&u));
571: }
572: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n,n,solve->work,&F));
573: PetscCall(BVDot(solve->T_1U,extop->X,F));
574: PetscCall(MatDestroy(&F));
575: } else {
576: for (j=0;j<n;j++)
577: for (i=0;i<n;i++) solve->work[j*n+i] = extop->XpX[j*extop->szd+i];
578: for (i=0;i<n;i++) extop->H[i*ldh_+i] -= lambda;
579: PetscCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n_,&n_,&snone,extop->H,&ldh_,solve->work,&n_));
580: for (i=0;i<n;i++) extop->H[i*ldh_+i] += lambda;
581: }
582: PetscCall(PetscArraycpy(solve->M,matctx->B,extop->szd*extop->szd));
583: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&snone,matctx->A,&szd_,solve->work,&n_,&sone,solve->M,&szd_));
584: PetscCall(PetscMalloc1(n,&p));
585: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
586: PetscCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n_,&n_,solve->M,&szd_,p,&info));
587: SlepcCheckLapackInfo("getrf",info);
588: PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&n_,solve->M,&szd_,p,solve->work,&n_,&info));
589: SlepcCheckLapackInfo("getri",info);
590: PetscCall(PetscFPTrapPop());
591: PetscCall(PetscFree(p));
592: }
593: solve->theta = lambda;
594: solve->n = n;
595: }
596: PetscFunctionReturn(PETSC_SUCCESS);
597: }
599: PetscErrorCode NEPDeflationFunctionSolve(NEP_EXT_OP extop,Vec b,Vec x)
600: {
601: Vec b1,x1;
602: PetscScalar *xx,*bb,*x2,*b2,*w,*w2,snone=-1.0,sone=1.0,zero=0.0;
603: NEP_DEF_MATSHELL *matctx;
604: NEP_DEF_FUN_SOLVE solve=extop->solve;
605: PetscBLASInt one=1,szd_,n_,ldh_;
606: PetscInt nloc,i;
607: PetscMPIInt np,count;
609: PetscFunctionBegin;
610: if (extop->ref) PetscCall(VecZeroEntries(x));
611: if (extop->szd) {
612: x1 = solve->w[0]; b1 = solve->w[1];
613: PetscCall(VecGetArray(x,&xx));
614: PetscCall(VecPlaceArray(x1,xx));
615: PetscCall(VecGetArray(b,&bb));
616: PetscCall(VecPlaceArray(b1,bb));
617: } else {
618: b1 = b; x1 = x;
619: }
620: PetscCall(KSPSolve(extop->solve->ksp,b1,x1));
621: if (!extop->ref && extop->n && extop->szd) {
622: PetscCall(PetscBLASIntCast(extop->szd,&szd_));
623: PetscCall(PetscBLASIntCast(extop->n,&n_));
624: PetscCall(PetscBLASIntCast(extop->szd+1,&ldh_));
625: PetscCall(BVGetSizes(extop->nep->V,&nloc,NULL,NULL));
626: PetscCall(PetscMalloc2(extop->n,&b2,extop->n,&x2));
627: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)b),&np));
628: for (i=0;i<extop->n;i++) b2[i] = bb[nloc+i]*PetscSqrtReal(np);
629: w = solve->work; w2 = solve->work+extop->n;
630: PetscCall(MatShellGetContext(solve->sincf?extop->MF:solve->T,&matctx));
631: PetscCall(PetscArraycpy(w2,b2,extop->n));
632: PetscCall(BVDotVec(extop->X,x1,w));
633: PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&snone,matctx->A,&szd_,w,&one,&sone,w2,&one));
634: PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,solve->M,&szd_,w2,&one,&zero,x2,&one));
635: if (extop->simpU) {
636: for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] -= solve->theta;
637: for (i=0;i<extop->n;i++) w[i] = x2[i];
638: PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&one,&snone,extop->H,&ldh_,w,&n_));
639: for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] += solve->theta;
640: PetscCall(BVMultVec(extop->X,-1.0,1.0,x1,w));
641: } else PetscCall(BVMultVec(solve->T_1U,-1.0,1.0,x1,x2));
642: for (i=0;i<extop->n;i++) xx[i+nloc] = x2[i]/PetscSqrtReal(np);
643: PetscCall(PetscMPIIntCast(extop->n,&count));
644: PetscCallMPI(MPI_Bcast(xx+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)b)));
645: }
646: if (extop->szd) {
647: PetscCall(VecResetArray(x1));
648: PetscCall(VecRestoreArray(x,&xx));
649: PetscCall(VecResetArray(b1));
650: PetscCall(VecRestoreArray(b,&bb));
651: if (!extop->ref && extop->n) PetscCall(PetscFree2(b2,x2));
652: }
653: PetscFunctionReturn(PETSC_SUCCESS);
654: }
656: PetscErrorCode NEPDeflationSetRefine(NEP_EXT_OP extop,PetscBool ref)
657: {
658: PetscFunctionBegin;
659: extop->ref = ref;
660: PetscFunctionReturn(PETSC_SUCCESS);
661: }
663: PetscErrorCode NEPDeflationReset(NEP_EXT_OP extop)
664: {
665: PetscInt j;
666: NEP_DEF_FUN_SOLVE solve;
668: PetscFunctionBegin;
669: if (!extop) PetscFunctionReturn(PETSC_SUCCESS);
670: PetscCall(PetscFree(extop->H));
671: PetscCall(BVDestroy(&extop->X));
672: if (extop->szd) {
673: PetscCall(VecDestroy(&extop->w));
674: PetscCall(PetscFree3(extop->Hj,extop->XpX,extop->bc));
675: PetscCall(BVDestroy(&extop->W));
676: }
677: PetscCall(MatDestroy(&extop->MF));
678: PetscCall(MatDestroy(&extop->MJ));
679: if (extop->solve) {
680: solve = extop->solve;
681: if (extop->szd) {
682: if (!extop->simpU) PetscCall(BVDestroy(&solve->T_1U));
683: PetscCall(PetscFree2(solve->M,solve->work));
684: PetscCall(VecDestroy(&solve->w[0]));
685: PetscCall(VecDestroy(&solve->w[1]));
686: }
687: if (!solve->sincf) PetscCall(MatDestroy(&solve->T));
688: PetscCall(PetscFree(extop->solve));
689: }
690: if (extop->proj) {
691: if (extop->szd) {
692: for (j=0;j<extop->nep->nt;j++) PetscCall(MatDestroy(&extop->proj->V1pApX[j]));
693: PetscCall(MatDestroy(&extop->proj->XpV1));
694: PetscCall(PetscFree3(extop->proj->V2,extop->proj->V1pApX,extop->proj->work));
695: PetscCall(VecDestroy(&extop->proj->w));
696: PetscCall(BVDestroy(&extop->proj->V1));
697: }
698: PetscCall(PetscFree(extop->proj));
699: }
700: PetscCall(PetscFree(extop));
701: PetscFunctionReturn(PETSC_SUCCESS);
702: }
704: PetscErrorCode NEPDeflationInitialize(NEP nep,BV X,KSP ksp,PetscBool sincfun,PetscInt sz,NEP_EXT_OP *extop)
705: {
706: NEP_EXT_OP op;
707: NEP_DEF_FUN_SOLVE solve;
708: PetscInt szd;
709: Vec x;
711: PetscFunctionBegin;
712: PetscCall(NEPDeflationReset(*extop));
713: PetscCall(PetscNew(&op));
714: *extop = op;
715: op->nep = nep;
716: op->n = 0;
717: op->szd = szd = sz-1;
718: op->max_midx = PetscMin(MAX_MINIDX,szd);
719: op->X = X;
720: if (!X) PetscCall(BVDuplicateResize(nep->V,sz,&op->X));
721: else PetscCall(PetscObjectReference((PetscObject)X));
722: PetscCall(PetscCalloc1(sz*sz,&(op)->H));
723: if (op->szd) {
724: PetscCall(BVGetColumn(op->X,0,&x));
725: PetscCall(VecDuplicate(x,&op->w));
726: PetscCall(BVRestoreColumn(op->X,0,&x));
727: op->simpU = PETSC_FALSE;
728: if (nep->fui==NEP_USER_INTERFACE_SPLIT) {
729: /* undocumented option to use the simple expression for U = T*X*inv(lambda-H) */
730: PetscCall(PetscOptionsGetBool(NULL,NULL,"-nep_deflation_simpleu",&op->simpU,NULL));
731: } else {
732: op->simpU = PETSC_TRUE;
733: }
734: PetscCall(PetscCalloc3(szd*szd*op->max_midx,&(op)->Hj,szd*szd,&(op)->XpX,szd,&op->bc));
735: PetscCall(BVDuplicateResize(op->X,op->szd,&op->W));
736: }
737: if (ksp) {
738: PetscCall(PetscNew(&solve));
739: op->solve = solve;
740: solve->ksp = ksp;
741: solve->sincf = sincfun;
742: solve->n = -1;
743: if (op->szd) {
744: if (!op->simpU) PetscCall(BVDuplicateResize(nep->V,szd,&solve->T_1U));
745: PetscCall(PetscMalloc2(szd*szd,&solve->M,2*szd*szd,&solve->work));
746: PetscCall(BVCreateVec(nep->V,&solve->w[0]));
747: PetscCall(VecDuplicate(solve->w[0],&solve->w[1]));
748: }
749: }
750: PetscFunctionReturn(PETSC_SUCCESS);
751: }
753: static PetscErrorCode NEPDeflationDSNEPComputeMatrix(DS ds,PetscScalar lambda,PetscBool deriv,DSMatType mat,void *ctx)
754: {
755: Mat A,Ei;
756: PetscScalar *T,*w1,*w2,*w=NULL,*ww,*hH,*hHprev,*pts;
757: PetscScalar alpha,alpha2,*AB,sone=1.0,zero=0.0,*basisv,s;
758: const PetscScalar *E;
759: PetscInt i,ldds,nwork=0,szd,nv,j,k,n;
760: PetscBLASInt inc=1,nv_,ldds_,dim_,szdk,szd_,n_,ldh_;
761: PetscMPIInt np;
762: NEP_DEF_PROJECT proj=(NEP_DEF_PROJECT)ctx;
763: NEP_EXT_OP extop=proj->extop;
764: NEP nep=extop->nep;
766: PetscFunctionBegin;
767: PetscCall(DSGetDimensions(ds,&nv,NULL,NULL,NULL));
768: PetscCall(DSGetLeadingDimension(ds,&ldds));
769: PetscCall(DSGetMat(ds,mat,&A));
770: PetscCall(MatZeroEntries(A));
771: /* mat = V1^*T(lambda)V1 */
772: for (i=0;i<nep->nt;i++) {
773: if (deriv) PetscCall(FNEvaluateDerivative(nep->f[i],lambda,&alpha));
774: else PetscCall(FNEvaluateFunction(nep->f[i],lambda,&alpha));
775: PetscCall(DSGetMat(ds,DSMatExtra[i],&Ei));
776: PetscCall(MatAXPY(A,alpha,Ei,SAME_NONZERO_PATTERN));
777: PetscCall(DSRestoreMat(ds,DSMatExtra[i],&Ei));
778: }
779: PetscCall(DSRestoreMat(ds,mat,&A));
780: if (!extop->ref && extop->n) {
781: PetscCall(DSGetArray(ds,mat,&T));
782: n = extop->n;
783: szd = extop->szd;
784: PetscCall(PetscArrayzero(proj->work,proj->lwork));
785: PetscCall(PetscBLASIntCast(nv,&nv_));
786: PetscCall(PetscBLASIntCast(n,&n_));
787: PetscCall(PetscBLASIntCast(ldds,&ldds_));
788: PetscCall(PetscBLASIntCast(szd,&szd_));
789: PetscCall(PetscBLASIntCast(proj->dim,&dim_));
790: PetscCall(PetscBLASIntCast(extop->szd+1,&ldh_));
791: w1 = proj->work; w2 = proj->work+proj->dim*proj->dim;
792: nwork += 2*proj->dim*proj->dim;
794: /* mat = mat + V1^*U(lambda)V2 */
795: for (i=0;i<nep->nt;i++) {
796: if (extop->simpU) {
797: if (deriv) PetscCall(FNEvaluateDerivative(nep->f[i],lambda,&alpha));
798: else PetscCall(FNEvaluateFunction(nep->f[i],lambda,&alpha));
799: ww = w1; w = w2;
800: PetscCall(PetscArraycpy(ww,proj->V2,szd*nv));
801: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np));
802: for (j=0;j<szd*nv;j++) ww[j] *= PetscSqrtReal(np);
803: for (j=0;j<n;j++) extop->H[j*ldh_+j] -= lambda;
804: alpha = -alpha;
805: PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha,extop->H,&ldh_,ww,&szd_));
806: if (deriv) {
807: PetscCall(PetscBLASIntCast(szd*nv,&szdk));
808: PetscCall(FNEvaluateFunction(nep->f[i],lambda,&alpha2));
809: PetscCall(PetscArraycpy(w,proj->V2,szd*nv));
810: for (j=0;j<szd*nv;j++) w[j] *= PetscSqrtReal(np);
811: alpha2 = -alpha2;
812: PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
813: alpha2 = 1.0;
814: PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
815: PetscCallBLAS("BLASaxpy",BLASaxpy_(&szdk,&sone,w,&inc,ww,&inc));
816: }
817: for (j=0;j<n;j++) extop->H[j*ldh_+j] += lambda;
818: } else {
819: PetscCall(NEPDeflationEvaluateHatFunction(extop,i,lambda,NULL,w1,w2,szd));
820: w = deriv?w2:w1; ww = deriv?w1:w2;
821: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np));
822: s = PetscSqrtReal(np);
823: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&s,w,&szd_,proj->V2,&szd_,&zero,ww,&szd_));
824: }
825: PetscCall(MatDenseGetArrayRead(proj->V1pApX[i],&E));
826: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&nv_,&nv_,&n_,&sone,E,&dim_,ww,&szd_,&sone,T,&ldds_));
827: PetscCall(MatDenseRestoreArrayRead(proj->V1pApX[i],&E));
828: }
830: /* mat = mat + V2^*A(lambda)V1 */
831: basisv = proj->work+nwork; nwork += szd;
832: hH = proj->work+nwork; nwork += szd*szd;
833: hHprev = proj->work+nwork; nwork += szd*szd;
834: AB = proj->work+nwork;
835: PetscCall(NEPDeflationEvaluateBasis(extop,lambda,n,basisv,deriv));
836: if (!deriv) for (i=0;i<n;i++) AB[i*(szd+1)] = 1.0;
837: for (j=0;j<n;j++)
838: for (i=0;i<n;i++)
839: for (k=1;k<extop->midx;k++) AB[j*szd+i] += basisv[k]*PetscConj(extop->Hj[k*szd*szd+i*szd+j]);
840: PetscCall(MatDenseGetArrayRead(proj->XpV1,&E));
841: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,E,&szd_,&zero,w,&szd_));
842: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
843: PetscCall(MatDenseRestoreArrayRead(proj->XpV1,&E));
845: /* mat = mat + V2^*B(lambda)V2 */
846: PetscCall(PetscArrayzero(AB,szd*szd));
847: for (i=1;i<extop->midx;i++) {
848: PetscCall(NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev));
849: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
850: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,AB,&szd_));
851: pts = hHprev; hHprev = hH; hH = pts;
852: }
853: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,proj->V2,&szd_,&zero,w,&szd_));
854: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
855: PetscCall(DSRestoreArray(ds,mat,&T));
856: }
857: PetscFunctionReturn(PETSC_SUCCESS);
858: }
860: PetscErrorCode NEPDeflationProjectOperator(NEP_EXT_OP extop,BV Vext,DS ds,PetscInt j0,PetscInt j1)
861: {
862: PetscInt k,j,n=extop->n,dim;
863: Vec v,ve;
864: BV V1;
865: Mat G;
866: NEP nep=extop->nep;
867: NEP_DEF_PROJECT proj;
869: PetscFunctionBegin;
870: NEPCheckSplit(extop->nep,1);
871: proj = extop->proj;
872: if (!proj) {
873: /* Initialize the projection data structure */
874: PetscCall(PetscNew(&proj));
875: extop->proj = proj;
876: proj->extop = extop;
877: PetscCall(BVGetSizes(Vext,NULL,NULL,&dim));
878: proj->dim = dim;
879: if (extop->szd) {
880: proj->lwork = 3*dim*dim+2*extop->szd*extop->szd+extop->szd;
881: PetscCall(PetscMalloc3(dim*extop->szd,&proj->V2,nep->nt,&proj->V1pApX,proj->lwork,&proj->work));
882: for (j=0;j<nep->nt;j++) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,proj->dim,extop->szd,NULL,&proj->V1pApX[j]));
883: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,extop->szd,proj->dim,NULL,&proj->XpV1));
884: PetscCall(BVCreateVec(extop->X,&proj->w));
885: PetscCall(BVDuplicateResize(extop->X,proj->dim,&proj->V1));
886: }
887: PetscCall(DSNEPSetComputeMatrixFunction(ds,NEPDeflationDSNEPComputeMatrix,(void*)proj));
888: }
890: /* Split Vext in V1 and V2 */
891: if (extop->szd) {
892: for (j=j0;j<j1;j++) {
893: PetscCall(BVGetColumn(Vext,j,&ve));
894: PetscCall(BVGetColumn(proj->V1,j,&v));
895: PetscCall(NEPDeflationCopyToExtendedVec(extop,v,proj->V2+j*extop->szd,ve,PETSC_TRUE));
896: PetscCall(BVRestoreColumn(proj->V1,j,&v));
897: PetscCall(BVRestoreColumn(Vext,j,&ve));
898: }
899: V1 = proj->V1;
900: } else V1 = Vext;
902: /* Compute matrices V1^* A_i V1 */
903: PetscCall(BVSetActiveColumns(V1,j0,j1));
904: for (k=0;k<nep->nt;k++) {
905: PetscCall(DSGetMat(ds,DSMatExtra[k],&G));
906: PetscCall(BVMatProject(V1,nep->A[k],V1,G));
907: PetscCall(DSRestoreMat(ds,DSMatExtra[k],&G));
908: }
910: if (extop->n) {
911: if (extop->szd) {
912: /* Compute matrices V1^* A_i X and V1^* X */
913: PetscCall(BVSetActiveColumns(extop->W,0,n));
914: for (k=0;k<nep->nt;k++) {
915: PetscCall(BVMatMult(extop->X,nep->A[k],extop->W));
916: PetscCall(BVDot(extop->W,V1,proj->V1pApX[k]));
917: }
918: PetscCall(BVDot(V1,extop->X,proj->XpV1));
919: }
920: }
921: PetscFunctionReturn(PETSC_SUCCESS);
922: }