Line data Source code
1 : /*
2 : - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3 : SLEPc - Scalable Library for Eigenvalue Problem Computations
4 : Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
5 :
6 : This file is part of SLEPc.
7 : SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8 : - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9 : */
10 :
11 : #include <slepc/private/nepimpl.h> /*I "slepcnep.h" I*/
12 : #include <slepcblaslapack.h>
13 : #include "nepdefl.h"
14 :
15 65 : PetscErrorCode NEPDeflationGetInvariantPair(NEP_EXT_OP extop,BV *X,Mat *H)
16 : {
17 65 : PetscFunctionBegin;
18 65 : if (X) *X = extop->X;
19 65 : if (H) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,extop->szd+1,extop->szd+1,extop->H,H));
20 65 : PetscFunctionReturn(PETSC_SUCCESS);
21 : }
22 :
23 118 : static PetscErrorCode NEPDeflationExtendInvariantPair(NEP_EXT_OP extop,Vec u,PetscScalar lambda,PetscInt k)
24 : {
25 118 : Vec uu;
26 118 : PetscInt ld,i;
27 118 : PetscMPIInt np;
28 118 : PetscReal norm;
29 :
30 118 : PetscFunctionBegin;
31 118 : PetscCall(BVGetColumn(extop->X,k,&uu));
32 118 : ld = extop->szd+1;
33 118 : PetscCall(NEPDeflationCopyToExtendedVec(extop,uu,extop->H+k*ld,u,PETSC_TRUE));
34 118 : PetscCall(BVRestoreColumn(extop->X,k,&uu));
35 118 : PetscCall(BVNormColumn(extop->X,k,NORM_2,&norm));
36 118 : PetscCall(BVScaleColumn(extop->X,k,1.0/norm));
37 118 : PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)u),&np));
38 197 : for (i=0;i<k;i++) extop->H[k*ld+i] *= PetscSqrtReal(np)/norm;
39 118 : extop->H[k*(ld+1)] = lambda;
40 118 : PetscFunctionReturn(PETSC_SUCCESS);
41 : }
42 :
43 20 : PetscErrorCode NEPDeflationExtractEigenpair(NEP_EXT_OP extop,PetscInt k,Vec u,PetscScalar lambda,DS ds)
44 : {
45 20 : Mat A,H;
46 20 : PetscInt ldh=extop->szd+1,ldds,k1=k+1;
47 20 : PetscScalar *eigr,*eigi,*t,*Z;
48 20 : Vec x;
49 :
50 20 : PetscFunctionBegin;
51 20 : PetscCall(NEPDeflationExtendInvariantPair(extop,u,lambda,k));
52 20 : PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,k1,k1,extop->H,&H));
53 20 : PetscCall(MatDenseSetLDA(H,ldh));
54 20 : PetscCall(PetscCalloc3(k1,&eigr,k1,&eigi,extop->szd,&t));
55 20 : PetscCall(DSReset(ds));
56 20 : PetscCall(DSSetType(ds,DSNHEP));
57 20 : PetscCall(DSAllocate(ds,ldh));
58 20 : PetscCall(DSGetLeadingDimension(ds,&ldds));
59 20 : PetscCall(DSSetDimensions(ds,k1,0,k1));
60 20 : PetscCall(DSGetMat(ds,DS_MAT_A,&A));
61 20 : PetscCall(MatCopy(H,A,SAME_NONZERO_PATTERN));
62 20 : PetscCall(DSRestoreMat(ds,DS_MAT_A,&A));
63 20 : PetscCall(MatDestroy(&H));
64 20 : PetscCall(DSSolve(ds,eigr,eigi));
65 20 : PetscCall(DSVectors(ds,DS_MAT_X,&k,NULL));
66 20 : PetscCall(DSGetArray(ds,DS_MAT_X,&Z));
67 20 : PetscCall(BVMultColumn(extop->X,1.0,Z[k*ldds+k],k,Z+k*ldds));
68 20 : PetscCall(DSRestoreArray(ds,DS_MAT_X,&Z));
69 20 : PetscCall(BVGetColumn(extop->X,k,&x));
70 20 : PetscCall(NEPDeflationCopyToExtendedVec(extop,x,t,u,PETSC_FALSE));
71 20 : PetscCall(BVRestoreColumn(extop->X,k,&x));
72 20 : PetscCall(PetscFree3(eigr,eigi,t));
73 20 : PetscFunctionReturn(PETSC_SUCCESS);
74 : }
75 :
76 264 : PetscErrorCode NEPDeflationCopyToExtendedVec(NEP_EXT_OP extop,Vec v,PetscScalar *a,Vec vex,PetscBool back)
77 : {
78 264 : PetscMPIInt np,rk,count;
79 264 : PetscScalar *array1,*array2;
80 264 : PetscInt nloc;
81 :
82 264 : PetscFunctionBegin;
83 264 : if (extop->szd) {
84 170 : PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)vex),&rk));
85 170 : PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)vex),&np));
86 170 : PetscCall(BVGetSizes(extop->nep->V,&nloc,NULL,NULL));
87 170 : if (v) {
88 170 : PetscCall(VecGetArray(v,&array1));
89 170 : PetscCall(VecGetArray(vex,&array2));
90 170 : if (back) PetscCall(PetscArraycpy(array1,array2,nloc));
91 38 : else PetscCall(PetscArraycpy(array2,array1,nloc));
92 170 : PetscCall(VecRestoreArray(v,&array1));
93 170 : PetscCall(VecRestoreArray(vex,&array2));
94 : }
95 170 : if (a) {
96 152 : PetscCall(VecGetArray(vex,&array2));
97 152 : if (back) {
98 132 : PetscCall(PetscArraycpy(a,array2+nloc,extop->szd));
99 132 : PetscCall(PetscMPIIntCast(extop->szd,&count));
100 264 : PetscCallMPI(MPI_Bcast(a,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)vex)));
101 : } else {
102 20 : PetscCall(PetscArraycpy(array2+nloc,a,extop->szd));
103 20 : PetscCall(PetscMPIIntCast(extop->szd,&count));
104 40 : PetscCallMPI(MPI_Bcast(array2+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)vex)));
105 : }
106 152 : PetscCall(VecRestoreArray(vex,&array2));
107 : }
108 : } else {
109 94 : if (back) PetscCall(VecCopy(vex,v));
110 47 : else PetscCall(VecCopy(v,vex));
111 : }
112 264 : PetscFunctionReturn(PETSC_SUCCESS);
113 : }
114 :
115 72 : PetscErrorCode NEPDeflationCreateVec(NEP_EXT_OP extop,Vec *v)
116 : {
117 72 : PetscInt nloc;
118 72 : Vec u;
119 72 : VecType type;
120 :
121 72 : PetscFunctionBegin;
122 72 : if (extop->szd) {
123 24 : PetscCall(BVGetColumn(extop->nep->V,0,&u));
124 24 : PetscCall(VecGetType(u,&type));
125 24 : PetscCall(BVRestoreColumn(extop->nep->V,0,&u));
126 24 : PetscCall(VecCreate(PetscObjectComm((PetscObject)extop->nep),v));
127 24 : PetscCall(VecSetType(*v,type));
128 24 : PetscCall(BVGetSizes(extop->nep->V,&nloc,NULL,NULL));
129 24 : nloc += extop->szd;
130 24 : PetscCall(VecSetSizes(*v,nloc,PETSC_DECIDE));
131 48 : } else PetscCall(BVCreateVec(extop->nep->V,v));
132 72 : PetscFunctionReturn(PETSC_SUCCESS);
133 : }
134 :
135 17 : PetscErrorCode NEPDeflationCreateBV(NEP_EXT_OP extop,PetscInt sz,BV *V)
136 : {
137 17 : PetscInt nloc;
138 17 : BVType type;
139 17 : BVOrthogType otype;
140 17 : BVOrthogRefineType oref;
141 17 : PetscReal oeta;
142 17 : BVOrthogBlockType oblock;
143 17 : NEP nep=extop->nep;
144 17 : VecType vtype;
145 :
146 17 : PetscFunctionBegin;
147 17 : if (extop->szd) {
148 3 : PetscCall(BVGetSizes(nep->V,&nloc,NULL,NULL));
149 3 : PetscCall(BVCreate(PetscObjectComm((PetscObject)nep),V));
150 3 : PetscCall(BVSetSizes(*V,nloc+extop->szd,PETSC_DECIDE,sz));
151 3 : PetscCall(BVGetType(nep->V,&type));
152 3 : PetscCall(BVSetType(*V,type));
153 3 : PetscCall(BVGetVecType(nep->V,&vtype));
154 3 : PetscCall(BVSetVecType(*V,vtype));
155 3 : PetscCall(BVGetOrthogonalization(nep->V,&otype,&oref,&oeta,&oblock));
156 3 : PetscCall(BVSetOrthogonalization(*V,otype,oref,oeta,oblock));
157 3 : PetscCall(PetscObjectStateIncrease((PetscObject)*V));
158 14 : } else PetscCall(BVDuplicateResize(nep->V,sz,V));
159 17 : PetscFunctionReturn(PETSC_SUCCESS);
160 : }
161 :
162 17 : PetscErrorCode NEPDeflationSetRandomVec(NEP_EXT_OP extop,Vec v)
163 : {
164 17 : PetscInt n,next,i;
165 17 : PetscRandom rand;
166 17 : PetscScalar *array;
167 17 : PetscMPIInt nn,np;
168 :
169 17 : PetscFunctionBegin;
170 17 : PetscCall(BVGetRandomContext(extop->nep->V,&rand));
171 17 : PetscCall(VecSetRandom(v,rand));
172 17 : if (extop->szd) {
173 17 : PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)v),&np));
174 17 : PetscCall(BVGetSizes(extop->nep->V,&n,NULL,NULL));
175 17 : PetscCall(VecGetLocalSize(v,&next));
176 17 : PetscCall(VecGetArray(v,&array));
177 25 : for (i=n+extop->n;i<next;i++) array[i] = 0.0;
178 42 : for (i=n;i<n+extop->n;i++) array[i] /= PetscSqrtReal(np);
179 17 : PetscCall(PetscMPIIntCast(extop->n,&nn));
180 34 : PetscCallMPI(MPI_Bcast(array+n,nn,MPIU_SCALAR,0,PetscObjectComm((PetscObject)v)));
181 17 : PetscCall(VecRestoreArray(v,&array));
182 : }
183 17 : PetscFunctionReturn(PETSC_SUCCESS);
184 : }
185 :
186 33 : static PetscErrorCode NEPDeflationEvaluateBasisMat(NEP_EXT_OP extop,PetscInt idx,PetscBool hat,PetscScalar *bval,PetscScalar *Hj,PetscScalar *Hjprev)
187 : {
188 33 : PetscInt i,k,n=extop->n,ldhj=extop->szd,ldh=extop->szd+1;
189 33 : PetscScalar sone=1.0,zero=0.0;
190 33 : PetscBLASInt ldh_,ldhj_,n_;
191 :
192 33 : PetscFunctionBegin;
193 33 : i = (idx<0)?extop->szd*extop->szd*(-idx):extop->szd*extop->szd;
194 33 : PetscCall(PetscArrayzero(Hj,i));
195 33 : PetscCall(PetscBLASIntCast(ldhj+1,&ldh_));
196 33 : PetscCall(PetscBLASIntCast(ldhj,&ldhj_));
197 33 : PetscCall(PetscBLASIntCast(n,&n_));
198 33 : if (idx<1) {
199 82 : if (!hat) for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 1.0;
200 0 : else for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 0.0;
201 : } else {
202 0 : for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[idx-1];
203 0 : PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hjprev,&ldhj_,&zero,Hj,&ldhj_));
204 0 : for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[idx-1];
205 0 : if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[idx-1];
206 : }
207 33 : if (idx<0) {
208 33 : idx = -idx;
209 33 : for (k=1;k<idx;k++) {
210 0 : for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[k-1];
211 0 : PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hj+(k-1)*ldhj*ldhj,&ldhj_,&zero,Hj+k*ldhj*ldhj,&ldhj_));
212 0 : for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[k-1];
213 0 : if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[k-1];
214 : }
215 : }
216 33 : PetscFunctionReturn(PETSC_SUCCESS);
217 : }
218 :
219 98 : PetscErrorCode NEPDeflationLocking(NEP_EXT_OP extop,Vec u,PetscScalar lambda)
220 : {
221 98 : PetscInt i;
222 :
223 98 : PetscFunctionBegin;
224 98 : PetscCall(NEPDeflationExtendInvariantPair(extop,u,lambda,extop->n));
225 98 : extop->n++;
226 98 : PetscCall(BVSetActiveColumns(extop->X,0,extop->n));
227 98 : if (extop->n <= extop->szd) {
228 : /* update XpX */
229 33 : PetscCall(BVDotColumn(extop->X,extop->n-1,extop->XpX+(extop->n-1)*extop->szd));
230 33 : extop->XpX[(extop->n-1)*(1+extop->szd)] = 1.0;
231 49 : for (i=0;i<extop->n-1;i++) extop->XpX[i*extop->szd+extop->n-1] = PetscConj(extop->XpX[(extop->n-1)*extop->szd+i]);
232 : /* determine minimality index */
233 33 : extop->midx = PetscMin(extop->max_midx,extop->n);
234 : /* polynominal basis coefficients */
235 66 : for (i=0;i<extop->midx;i++) extop->bc[i] = extop->nep->target;
236 : /* evaluate the polynomial basis in H */
237 33 : PetscCall(NEPDeflationEvaluateBasisMat(extop,-extop->midx,PETSC_FALSE,NULL,extop->Hj,NULL));
238 : }
239 98 : PetscFunctionReturn(PETSC_SUCCESS);
240 : }
241 :
242 780 : static PetscErrorCode NEPDeflationEvaluateHatFunction(NEP_EXT_OP extop, PetscInt idx,PetscScalar lambda,PetscScalar *y,PetscScalar *hfj,PetscScalar *hfjp,PetscInt ld)
243 : {
244 780 : PetscInt i,j,k,off,ini,fin,sz,ldh,n=extop->n;
245 780 : Mat A,B;
246 780 : PetscScalar *array;
247 780 : const PetscScalar *barray;
248 :
249 780 : PetscFunctionBegin;
250 780 : if (idx<0) {ini = 0; fin = extop->nep->nt;}
251 471 : else {ini = idx; fin = idx+1;}
252 780 : if (y) sz = hfjp?n+2:n+1;
253 780 : else sz = hfjp?3*n:2*n;
254 780 : ldh = extop->szd+1;
255 780 : PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&A));
256 780 : PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&B));
257 780 : PetscCall(MatDenseGetArray(A,&array));
258 1937 : for (j=0;j<n;j++)
259 3074 : for (i=0;i<n;i++) array[j*sz+i] = extop->H[j*ldh+i];
260 780 : PetscCall(MatDenseRestoreArrayWrite(A,&array));
261 780 : if (y) {
262 0 : PetscCall(MatDenseGetArray(A,&array));
263 0 : array[extop->n*(sz+1)] = lambda;
264 0 : if (hfjp) { array[(n+1)*sz+n] = 1.0; array[(n+1)*sz+n+1] = lambda;}
265 0 : for (i=0;i<n;i++) array[n*sz+i] = y[i];
266 0 : PetscCall(MatDenseRestoreArrayWrite(A,&array));
267 0 : for (j=ini;j<fin;j++) {
268 0 : PetscCall(FNEvaluateFunctionMat(extop->nep->f[j],A,B));
269 0 : PetscCall(MatDenseGetArrayRead(B,&barray));
270 0 : for (i=0;i<n;i++) hfj[j*ld+i] = barray[n*sz+i];
271 0 : if (hfjp) for (i=0;i<n;i++) hfjp[j*ld+i] = barray[(n+1)*sz+i];
272 0 : PetscCall(MatDenseRestoreArrayRead(B,&barray));
273 : }
274 : } else {
275 780 : off = idx<0?ld*n:0;
276 780 : PetscCall(MatDenseGetArray(A,&array));
277 1937 : for (k=0;k<n;k++) {
278 1157 : array[(n+k)*sz+k] = 1.0;
279 1157 : array[(n+k)*sz+n+k] = lambda;
280 : }
281 1937 : if (hfjp) for (k=0;k<n;k++) {
282 1157 : array[(2*n+k)*sz+n+k] = 1.0;
283 1157 : array[(2*n+k)*sz+2*n+k] = lambda;
284 : }
285 780 : PetscCall(MatDenseRestoreArray(A,&array));
286 2175 : for (j=ini;j<fin;j++) {
287 1395 : PetscCall(FNEvaluateFunctionMat(extop->nep->f[j],A,B));
288 1395 : PetscCall(MatDenseGetArrayRead(B,&barray));
289 6759 : for (i=0;i<n;i++) for (k=0;k<n;k++) hfj[j*off+i*ld+k] = barray[n*sz+i*sz+k];
290 6759 : if (hfjp) for (k=0;k<n;k++) for (i=0;i<n;i++) hfjp[j*off+i*ld+k] = barray[2*n*sz+i*sz+k];
291 1395 : PetscCall(MatDenseRestoreArrayRead(B,&barray));
292 : }
293 : }
294 780 : PetscCall(MatDestroy(&A));
295 780 : PetscCall(MatDestroy(&B));
296 780 : PetscFunctionReturn(PETSC_SUCCESS);
297 : }
298 :
299 3943 : static PetscErrorCode MatMult_NEPDeflation(Mat M,Vec x,Vec y)
300 : {
301 3943 : NEP_DEF_MATSHELL *matctx;
302 3943 : NEP_EXT_OP extop;
303 3943 : Vec x1,y1;
304 3943 : PetscScalar *yy,sone=1.0,zero=0.0;
305 3943 : const PetscScalar *xx;
306 3943 : PetscInt nloc,i;
307 3943 : PetscMPIInt np;
308 3943 : PetscBLASInt n_,one=1,szd_;
309 :
310 3943 : PetscFunctionBegin;
311 3943 : PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M),&np));
312 3943 : PetscCall(MatShellGetContext(M,&matctx));
313 3943 : extop = matctx->extop;
314 3943 : if (extop->ref) PetscCall(VecZeroEntries(y));
315 3943 : if (extop->szd) {
316 2172 : x1 = matctx->w[0]; y1 = matctx->w[1];
317 2172 : PetscCall(VecGetArrayRead(x,&xx));
318 2172 : PetscCall(VecPlaceArray(x1,xx));
319 2172 : PetscCall(VecGetArray(y,&yy));
320 2172 : PetscCall(VecPlaceArray(y1,yy));
321 2172 : PetscCall(MatMult(matctx->T,x1,y1));
322 2172 : if (!extop->ref && extop->n) {
323 1113 : PetscCall(VecGetLocalSize(x1,&nloc));
324 : /* copy for avoiding warning of constant array xx */
325 2631 : for (i=0;i<extop->n;i++) matctx->work[i] = xx[nloc+i]*PetscSqrtReal(np);
326 1113 : PetscCall(BVMultVec(matctx->U,1.0,1.0,y1,matctx->work));
327 1113 : PetscCall(BVDotVec(extop->X,x1,matctx->work));
328 1113 : PetscCall(PetscBLASIntCast(extop->n,&n_));
329 1113 : PetscCall(PetscBLASIntCast(extop->szd,&szd_));
330 1113 : PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->A,&szd_,matctx->work,&one,&zero,yy+nloc,&one));
331 1113 : PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->B,&szd_,xx+nloc,&one,&sone,yy+nloc,&one));
332 2631 : for (i=0;i<extop->n;i++) yy[nloc+i] /= PetscSqrtReal(np);
333 : }
334 2172 : PetscCall(VecResetArray(x1));
335 2172 : PetscCall(VecRestoreArrayRead(x,&xx));
336 2172 : PetscCall(VecResetArray(y1));
337 2172 : PetscCall(VecRestoreArray(y,&yy));
338 1771 : } else PetscCall(MatMult(matctx->T,x,y));
339 3943 : PetscFunctionReturn(PETSC_SUCCESS);
340 : }
341 :
342 0 : static PetscErrorCode MatCreateVecs_NEPDeflation(Mat M,Vec *right,Vec *left)
343 : {
344 0 : NEP_DEF_MATSHELL *matctx;
345 :
346 0 : PetscFunctionBegin;
347 0 : PetscCall(MatShellGetContext(M,&matctx));
348 0 : if (right) PetscCall(VecDuplicate(matctx->w[0],right));
349 0 : if (left) PetscCall(VecDuplicate(matctx->w[0],left));
350 0 : PetscFunctionReturn(PETSC_SUCCESS);
351 : }
352 :
353 154 : static PetscErrorCode MatDestroy_NEPDeflation(Mat M)
354 : {
355 154 : NEP_DEF_MATSHELL *matctx;
356 :
357 154 : PetscFunctionBegin;
358 154 : PetscCall(MatShellGetContext(M,&matctx));
359 154 : if (matctx->extop->szd) {
360 42 : PetscCall(BVDestroy(&matctx->U));
361 42 : PetscCall(PetscFree4(matctx->hfj,matctx->work,matctx->A,matctx->B));
362 42 : PetscCall(VecDestroy(&matctx->w[0]));
363 42 : PetscCall(VecDestroy(&matctx->w[1]));
364 : }
365 154 : if (matctx->P != matctx->T) PetscCall(MatDestroy(&matctx->P));
366 154 : PetscCall(MatDestroy(&matctx->T));
367 154 : PetscCall(PetscFree(matctx));
368 154 : PetscFunctionReturn(PETSC_SUCCESS);
369 : }
370 :
371 971 : static PetscErrorCode NEPDeflationEvaluateBasis(NEP_EXT_OP extop,PetscScalar lambda,PetscInt n,PetscScalar *val,PetscBool jacobian)
372 : {
373 971 : PetscScalar p;
374 971 : PetscInt i;
375 :
376 971 : PetscFunctionBegin;
377 971 : if (!jacobian) {
378 560 : val[0] = 1.0;
379 788 : for (i=1;i<extop->n;i++) val[i] = val[i-1]*(lambda-extop->bc[i-1]);
380 : } else {
381 411 : val[0] = 0.0;
382 411 : p = 1.0;
383 565 : for (i=1;i<extop->n;i++) {
384 154 : val[i] = val[i-1]*(lambda-extop->bc[i-1])+p;
385 154 : p *= (lambda-extop->bc[i-1]);
386 : }
387 : }
388 971 : PetscFunctionReturn(PETSC_SUCCESS);
389 : }
390 :
391 2890 : static PetscErrorCode NEPDeflationComputeShellMat(NEP_EXT_OP extop,PetscScalar lambda,PetscBool jacobian,Mat *M)
392 : {
393 2890 : NEP_DEF_MATSHELL *matctx,*matctxC;
394 2890 : PetscInt nloc,mloc,n=extop->n,j,i,szd=extop->szd,ldh=szd+1,k;
395 2890 : Mat F,Mshell,Mcomp;
396 2890 : PetscBool ini=PETSC_FALSE;
397 2890 : PetscScalar *hf,*hfj,*hfjp,sone=1.0,*hH,*hHprev,*pts,*B,*A,*Hj=extop->Hj,*basisv,zero=0.0;
398 2890 : PetscBLASInt n_,info,szd_;
399 2890 : size_t len;
400 :
401 2890 : PetscFunctionBegin;
402 2890 : if (!M) Mshell = jacobian?extop->MJ:extop->MF;
403 97 : else Mshell = *M;
404 2890 : Mcomp = jacobian?extop->MF:extop->MJ;
405 2890 : if (!Mshell) {
406 154 : ini = PETSC_TRUE;
407 154 : PetscCall(PetscNew(&matctx));
408 154 : PetscCall(MatGetLocalSize(extop->nep->function,&mloc,&nloc));
409 154 : nloc += szd; mloc += szd;
410 154 : PetscCall(MatCreateShell(PetscObjectComm((PetscObject)extop->nep),nloc,mloc,PETSC_DETERMINE,PETSC_DETERMINE,matctx,&Mshell));
411 154 : PetscCall(MatShellSetOperation(Mshell,MATOP_MULT,(void(*)(void))MatMult_NEPDeflation));
412 154 : PetscCall(MatShellSetOperation(Mshell,MATOP_CREATE_VECS,(void(*)(void))MatCreateVecs_NEPDeflation));
413 154 : PetscCall(MatShellSetOperation(Mshell,MATOP_DESTROY,(void(*)(void))MatDestroy_NEPDeflation));
414 154 : matctx->nep = extop->nep;
415 154 : matctx->extop = extop;
416 154 : if (!M) {
417 113 : if (jacobian) { matctx->jacob = PETSC_TRUE; matctx->T = extop->nep->jacobian; extop->MJ = Mshell; }
418 65 : else { matctx->jacob = PETSC_FALSE; matctx->T = extop->nep->function; extop->MF = Mshell; }
419 113 : PetscCall(PetscObjectReference((PetscObject)matctx->T));
420 113 : if (!jacobian) {
421 65 : if (extop->nep->function_pre && extop->nep->function_pre != extop->nep->function) {
422 2 : matctx->P = extop->nep->function_pre;
423 2 : PetscCall(PetscObjectReference((PetscObject)matctx->P));
424 63 : } else matctx->P = matctx->T;
425 : }
426 : } else {
427 41 : matctx->jacob = jacobian;
428 41 : PetscCall(MatDuplicate(jacobian?extop->nep->jacobian:extop->nep->function,MAT_DO_NOT_COPY_VALUES,&matctx->T));
429 41 : *M = Mshell;
430 41 : if (!jacobian) {
431 41 : if (extop->nep->function_pre && extop->nep->function_pre != extop->nep->function) PetscCall(MatDuplicate(extop->nep->function_pre,MAT_DO_NOT_COPY_VALUES,&matctx->P));
432 41 : else matctx->P = matctx->T;
433 : }
434 : }
435 154 : if (szd) {
436 42 : PetscCall(BVCreateVec(extop->nep->V,matctx->w));
437 42 : PetscCall(VecDuplicate(matctx->w[0],matctx->w+1));
438 42 : PetscCall(BVDuplicateResize(extop->nep->V,szd,&matctx->U));
439 42 : len = extop->simpU? (size_t)2*szd*szd: (size_t)2*szd*szd*extop->nep->nt;
440 42 : PetscCall(PetscMalloc4(len,&matctx->hfj,szd,&matctx->work,szd*szd,&matctx->A,szd*szd,&matctx->B));
441 : }
442 2736 : } else PetscCall(MatShellGetContext(Mshell,&matctx));
443 2736 : if (ini || matctx->theta != lambda || matctx->n != extop->n) {
444 2324 : if (ini || matctx->theta != lambda) {
445 2318 : if (jacobian) PetscCall(NEPComputeJacobian(extop->nep,lambda,matctx->T));
446 1322 : else PetscCall(NEPComputeFunction(extop->nep,lambda,matctx->T,matctx->P));
447 : }
448 2324 : if (n) {
449 739 : matctx->hfjset = PETSC_FALSE;
450 739 : if (!extop->simpU) {
451 : /* likely hfjp has been already computed */
452 542 : if (Mcomp) {
453 505 : PetscCall(MatShellGetContext(Mcomp,&matctxC));
454 505 : if (matctxC->hfjset && matctxC->theta == lambda && matctxC->n == extop->n) {
455 233 : PetscCall(PetscArraycpy(matctx->hfj,matctxC->hfj,2*extop->szd*extop->szd*extop->nep->nt));
456 233 : matctx->hfjset = PETSC_TRUE;
457 : }
458 : }
459 542 : hfj = matctx->hfj; hfjp = matctx->hfj+extop->szd*extop->szd*extop->nep->nt;
460 542 : if (!matctx->hfjset) {
461 309 : PetscCall(NEPDeflationEvaluateHatFunction(extop,-1,lambda,NULL,hfj,hfjp,n));
462 309 : matctx->hfjset = PETSC_TRUE;
463 : }
464 542 : PetscCall(BVSetActiveColumns(matctx->U,0,n));
465 542 : hf = jacobian?hfjp:hfj;
466 542 : PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n,n,hf,&F));
467 542 : PetscCall(BVMatMult(extop->X,extop->nep->A[0],matctx->U));
468 542 : PetscCall(BVMultInPlace(matctx->U,F,0,n));
469 542 : PetscCall(BVSetActiveColumns(extop->W,0,extop->n));
470 1621 : for (j=1;j<extop->nep->nt;j++) {
471 1079 : PetscCall(BVMatMult(extop->X,extop->nep->A[j],extop->W));
472 1079 : PetscCall(MatDensePlaceArray(F,hf+j*n*n));
473 1079 : PetscCall(BVMult(matctx->U,1.0,1.0,extop->W,F));
474 1079 : PetscCall(MatDenseResetArray(F));
475 : }
476 542 : PetscCall(MatDestroy(&F));
477 : } else {
478 197 : hfj = matctx->hfj;
479 197 : PetscCall(BVSetActiveColumns(matctx->U,0,n));
480 197 : PetscCall(BVMatMult(extop->X,matctx->T,matctx->U));
481 437 : for (j=0;j<n;j++) {
482 566 : for (i=0;i<n;i++) hfj[j*n+i] = -extop->H[j*ldh+i];
483 240 : hfj[j*(n+1)] += lambda;
484 : }
485 197 : PetscCall(PetscBLASIntCast(n,&n_));
486 197 : PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
487 197 : PetscCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,hfj,&n_,&info));
488 197 : PetscCall(PetscFPTrapPop());
489 197 : SlepcCheckLapackInfo("trtri",info);
490 197 : PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n,n,hfj,&F));
491 197 : PetscCall(BVMultInPlace(matctx->U,F,0,n));
492 197 : if (jacobian) {
493 83 : PetscCall(NEPDeflationComputeFunction(extop,lambda,NULL));
494 83 : PetscCall(MatShellGetContext(extop->MF,&matctxC));
495 83 : PetscCall(BVMult(matctx->U,-1.0,1.0,matctxC->U,F));
496 : }
497 197 : PetscCall(MatDestroy(&F));
498 : }
499 739 : PetscCall(PetscCalloc3(n,&basisv,szd*szd,&hH,szd*szd,&hHprev));
500 739 : PetscCall(NEPDeflationEvaluateBasis(extop,lambda,n,basisv,jacobian));
501 739 : A = matctx->A;
502 739 : PetscCall(PetscArrayzero(A,szd*szd));
503 1321 : if (!jacobian) for (i=0;i<n;i++) A[i*(szd+1)] = 1.0;
504 1743 : for (j=0;j<n;j++)
505 2548 : for (i=0;i<n;i++)
506 1544 : for (k=1;k<extop->midx;k++) A[j*szd+i] += basisv[k]*PetscConj(Hj[k*szd*szd+i*szd+j]);
507 739 : PetscCall(PetscBLASIntCast(n,&n_));
508 739 : PetscCall(PetscBLASIntCast(szd,&szd_));
509 739 : B = matctx->B;
510 739 : PetscCall(PetscArrayzero(B,szd*szd));
511 739 : for (i=1;i<extop->midx;i++) {
512 0 : PetscCall(NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev));
513 0 : PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
514 0 : PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,B,&szd_));
515 0 : pts = hHprev; hHprev = hH; hH = pts;
516 : }
517 739 : PetscCall(PetscFree3(basisv,hH,hHprev));
518 : }
519 2324 : matctx->theta = lambda;
520 2324 : matctx->n = extop->n;
521 : }
522 2890 : PetscFunctionReturn(PETSC_SUCCESS);
523 : }
524 :
525 1717 : PetscErrorCode NEPDeflationComputeFunction(NEP_EXT_OP extop,PetscScalar lambda,Mat *F)
526 : {
527 1717 : PetscFunctionBegin;
528 1717 : PetscCall(NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,NULL));
529 1717 : if (F) *F = extop->MF;
530 1717 : PetscFunctionReturn(PETSC_SUCCESS);
531 : }
532 :
533 996 : PetscErrorCode NEPDeflationComputeJacobian(NEP_EXT_OP extop,PetscScalar lambda,Mat *J)
534 : {
535 996 : PetscFunctionBegin;
536 996 : PetscCall(NEPDeflationComputeShellMat(extop,lambda,PETSC_TRUE,NULL));
537 996 : if (J) *J = extop->MJ;
538 996 : PetscFunctionReturn(PETSC_SUCCESS);
539 : }
540 :
541 177 : PetscErrorCode NEPDeflationSolveSetUp(NEP_EXT_OP extop,PetscScalar lambda)
542 : {
543 177 : NEP_DEF_MATSHELL *matctx;
544 177 : NEP_DEF_FUN_SOLVE solve;
545 177 : PetscInt i,j,n=extop->n;
546 177 : Vec u,tu;
547 177 : Mat F;
548 177 : PetscScalar snone=-1.0,sone=1.0;
549 177 : PetscBLASInt n_,szd_,ldh_,*p,info;
550 177 : Mat Mshell;
551 :
552 177 : PetscFunctionBegin;
553 177 : solve = extop->solve;
554 177 : if (lambda!=solve->theta || n!=solve->n) {
555 177 : PetscCall(NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,solve->sincf?NULL:&solve->T));
556 177 : Mshell = (solve->sincf)?extop->MF:solve->T;
557 177 : PetscCall(MatShellGetContext(Mshell,&matctx));
558 177 : PetscCall(NEP_KSPSetOperators(solve->ksp,matctx->T,matctx->P));
559 177 : if (!extop->ref && n) {
560 62 : PetscCall(PetscBLASIntCast(n,&n_));
561 62 : PetscCall(PetscBLASIntCast(extop->szd,&szd_));
562 62 : PetscCall(PetscBLASIntCast(extop->szd+1,&ldh_));
563 62 : if (!extop->simpU) {
564 49 : PetscCall(BVSetActiveColumns(solve->T_1U,0,n));
565 124 : for (i=0;i<n;i++) {
566 75 : PetscCall(BVGetColumn(matctx->U,i,&u));
567 75 : PetscCall(BVGetColumn(solve->T_1U,i,&tu));
568 75 : PetscCall(KSPSolve(solve->ksp,u,tu));
569 75 : PetscCall(BVRestoreColumn(solve->T_1U,i,&tu));
570 75 : PetscCall(BVRestoreColumn(matctx->U,i,&u));
571 : }
572 49 : PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n,n,solve->work,&F));
573 49 : PetscCall(BVDot(solve->T_1U,extop->X,F));
574 49 : PetscCall(MatDestroy(&F));
575 : } else {
576 31 : for (j=0;j<n;j++)
577 46 : for (i=0;i<n;i++) solve->work[j*n+i] = extop->XpX[j*extop->szd+i];
578 31 : for (i=0;i<n;i++) extop->H[i*ldh_+i] -= lambda;
579 13 : PetscCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n_,&n_,&snone,extop->H,&ldh_,solve->work,&n_));
580 31 : for (i=0;i<n;i++) extop->H[i*ldh_+i] += lambda;
581 : }
582 62 : PetscCall(PetscArraycpy(solve->M,matctx->B,extop->szd*extop->szd));
583 62 : PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&snone,matctx->A,&szd_,solve->work,&n_,&sone,solve->M,&szd_));
584 62 : PetscCall(PetscMalloc1(n,&p));
585 62 : PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
586 62 : PetscCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n_,&n_,solve->M,&szd_,p,&info));
587 62 : SlepcCheckLapackInfo("getrf",info);
588 62 : PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&n_,solve->M,&szd_,p,solve->work,&n_,&info));
589 62 : SlepcCheckLapackInfo("getri",info);
590 62 : PetscCall(PetscFPTrapPop());
591 62 : PetscCall(PetscFree(p));
592 : }
593 177 : solve->theta = lambda;
594 177 : solve->n = n;
595 : }
596 177 : PetscFunctionReturn(PETSC_SUCCESS);
597 : }
598 :
599 3235 : PetscErrorCode NEPDeflationFunctionSolve(NEP_EXT_OP extop,Vec b,Vec x)
600 : {
601 3235 : Vec b1,x1;
602 3235 : PetscScalar *xx,*bb,*x2,*b2,*w,*w2,snone=-1.0,sone=1.0,zero=0.0;
603 3235 : NEP_DEF_MATSHELL *matctx;
604 3235 : NEP_DEF_FUN_SOLVE solve=extop->solve;
605 3235 : PetscBLASInt one=1,szd_,n_,ldh_;
606 3235 : PetscInt nloc,i;
607 3235 : PetscMPIInt np,count;
608 :
609 3235 : PetscFunctionBegin;
610 3235 : if (extop->ref) PetscCall(VecZeroEntries(x));
611 3235 : if (extop->szd) {
612 2082 : x1 = solve->w[0]; b1 = solve->w[1];
613 2082 : PetscCall(VecGetArray(x,&xx));
614 2082 : PetscCall(VecPlaceArray(x1,xx));
615 2082 : PetscCall(VecGetArray(b,&bb));
616 2082 : PetscCall(VecPlaceArray(b1,bb));
617 : } else {
618 : b1 = b; x1 = x;
619 : }
620 3235 : PetscCall(KSPSolve(extop->solve->ksp,b1,x1));
621 3235 : if (!extop->ref && extop->n && extop->szd) {
622 1073 : PetscCall(PetscBLASIntCast(extop->szd,&szd_));
623 1073 : PetscCall(PetscBLASIntCast(extop->n,&n_));
624 1073 : PetscCall(PetscBLASIntCast(extop->szd+1,&ldh_));
625 1073 : PetscCall(BVGetSizes(extop->nep->V,&nloc,NULL,NULL));
626 1073 : PetscCall(PetscMalloc2(extop->n,&b2,extop->n,&x2));
627 1073 : PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)b),&np));
628 2531 : for (i=0;i<extop->n;i++) b2[i] = bb[nloc+i]*PetscSqrtReal(np);
629 1073 : w = solve->work; w2 = solve->work+extop->n;
630 1073 : PetscCall(MatShellGetContext(solve->sincf?extop->MF:solve->T,&matctx));
631 1073 : PetscCall(PetscArraycpy(w2,b2,extop->n));
632 1073 : PetscCall(BVDotVec(extop->X,x1,w));
633 1073 : PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&snone,matctx->A,&szd_,w,&one,&sone,w2,&one));
634 1073 : PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,solve->M,&szd_,w2,&one,&zero,x2,&one));
635 1073 : if (extop->simpU) {
636 577 : for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] -= solve->theta;
637 577 : for (i=0;i<extop->n;i++) w[i] = x2[i];
638 260 : PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&one,&snone,extop->H,&ldh_,w,&n_));
639 577 : for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] += solve->theta;
640 260 : PetscCall(BVMultVec(extop->X,-1.0,1.0,x1,w));
641 813 : } else PetscCall(BVMultVec(solve->T_1U,-1.0,1.0,x1,x2));
642 2531 : for (i=0;i<extop->n;i++) xx[i+nloc] = x2[i]/PetscSqrtReal(np);
643 1073 : PetscCall(PetscMPIIntCast(extop->n,&count));
644 2146 : PetscCallMPI(MPI_Bcast(xx+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)b)));
645 : }
646 3235 : if (extop->szd) {
647 2082 : PetscCall(VecResetArray(x1));
648 2082 : PetscCall(VecRestoreArray(x,&xx));
649 2082 : PetscCall(VecResetArray(b1));
650 2082 : PetscCall(VecRestoreArray(b,&bb));
651 2082 : if (!extop->ref && extop->n) PetscCall(PetscFree2(b2,x2));
652 : }
653 3235 : PetscFunctionReturn(PETSC_SUCCESS);
654 : }
655 :
656 95 : PetscErrorCode NEPDeflationSetRefine(NEP_EXT_OP extop,PetscBool ref)
657 : {
658 95 : PetscFunctionBegin;
659 95 : extop->ref = ref;
660 95 : PetscFunctionReturn(PETSC_SUCCESS);
661 : }
662 :
663 130 : PetscErrorCode NEPDeflationReset(NEP_EXT_OP extop)
664 : {
665 130 : PetscInt j;
666 130 : NEP_DEF_FUN_SOLVE solve;
667 :
668 130 : PetscFunctionBegin;
669 130 : if (!extop) PetscFunctionReturn(PETSC_SUCCESS);
670 65 : PetscCall(PetscFree(extop->H));
671 65 : PetscCall(BVDestroy(&extop->X));
672 65 : if (extop->szd) {
673 18 : PetscCall(VecDestroy(&extop->w));
674 18 : PetscCall(PetscFree3(extop->Hj,extop->XpX,extop->bc));
675 18 : PetscCall(BVDestroy(&extop->W));
676 : }
677 65 : PetscCall(MatDestroy(&extop->MF));
678 65 : PetscCall(MatDestroy(&extop->MJ));
679 65 : if (extop->solve) {
680 65 : solve = extop->solve;
681 65 : if (extop->szd) {
682 18 : if (!extop->simpU) PetscCall(BVDestroy(&solve->T_1U));
683 18 : PetscCall(PetscFree2(solve->M,solve->work));
684 18 : PetscCall(VecDestroy(&solve->w[0]));
685 18 : PetscCall(VecDestroy(&solve->w[1]));
686 : }
687 65 : if (!solve->sincf) PetscCall(MatDestroy(&solve->T));
688 65 : PetscCall(PetscFree(extop->solve));
689 : }
690 65 : if (extop->proj) {
691 17 : if (extop->szd) {
692 12 : for (j=0;j<extop->nep->nt;j++) PetscCall(MatDestroy(&extop->proj->V1pApX[j]));
693 3 : PetscCall(MatDestroy(&extop->proj->XpV1));
694 3 : PetscCall(PetscFree3(extop->proj->V2,extop->proj->V1pApX,extop->proj->work));
695 3 : PetscCall(VecDestroy(&extop->proj->w));
696 3 : PetscCall(BVDestroy(&extop->proj->V1));
697 : }
698 17 : PetscCall(PetscFree(extop->proj));
699 : }
700 65 : PetscCall(PetscFree(extop));
701 65 : PetscFunctionReturn(PETSC_SUCCESS);
702 : }
703 :
704 65 : PetscErrorCode NEPDeflationInitialize(NEP nep,BV X,KSP ksp,PetscBool sincfun,PetscInt sz,NEP_EXT_OP *extop)
705 : {
706 65 : NEP_EXT_OP op;
707 65 : NEP_DEF_FUN_SOLVE solve;
708 65 : PetscInt szd;
709 65 : Vec x;
710 :
711 65 : PetscFunctionBegin;
712 65 : PetscCall(NEPDeflationReset(*extop));
713 65 : PetscCall(PetscNew(&op));
714 65 : *extop = op;
715 65 : op->nep = nep;
716 65 : op->n = 0;
717 65 : op->szd = szd = sz-1;
718 65 : op->max_midx = PetscMin(MAX_MINIDX,szd);
719 65 : op->X = X;
720 65 : if (!X) PetscCall(BVDuplicateResize(nep->V,sz,&op->X));
721 65 : else PetscCall(PetscObjectReference((PetscObject)X));
722 65 : PetscCall(PetscCalloc1(sz*sz,&(op)->H));
723 65 : if (op->szd) {
724 18 : PetscCall(BVGetColumn(op->X,0,&x));
725 18 : PetscCall(VecDuplicate(x,&op->w));
726 18 : PetscCall(BVRestoreColumn(op->X,0,&x));
727 18 : op->simpU = PETSC_FALSE;
728 18 : if (nep->fui==NEP_USER_INTERFACE_SPLIT) {
729 : /* undocumented option to use the simple expression for U = T*X*inv(lambda-H) */
730 16 : PetscCall(PetscOptionsGetBool(NULL,NULL,"-nep_deflation_simpleu",&op->simpU,NULL));
731 : } else {
732 2 : op->simpU = PETSC_TRUE;
733 : }
734 18 : PetscCall(PetscCalloc3(szd*szd*op->max_midx,&(op)->Hj,szd*szd,&(op)->XpX,szd,&op->bc));
735 18 : PetscCall(BVDuplicateResize(op->X,op->szd,&op->W));
736 : }
737 65 : if (ksp) {
738 65 : PetscCall(PetscNew(&solve));
739 65 : op->solve = solve;
740 65 : solve->ksp = ksp;
741 65 : solve->sincf = sincfun;
742 65 : solve->n = -1;
743 65 : if (op->szd) {
744 18 : if (!op->simpU) PetscCall(BVDuplicateResize(nep->V,szd,&solve->T_1U));
745 18 : PetscCall(PetscMalloc2(szd*szd,&solve->M,2*szd*szd,&solve->work));
746 18 : PetscCall(BVCreateVec(nep->V,&solve->w[0]));
747 18 : PetscCall(VecDuplicate(solve->w[0],&solve->w[1]));
748 : }
749 : }
750 65 : PetscFunctionReturn(PETSC_SUCCESS);
751 : }
752 :
753 850 : static PetscErrorCode NEPDeflationDSNEPComputeMatrix(DS ds,PetscScalar lambda,PetscBool deriv,DSMatType mat,void *ctx)
754 : {
755 850 : Mat A,Ei;
756 850 : PetscScalar *T,*w1,*w2,*w=NULL,*ww,*hH,*hHprev,*pts;
757 850 : PetscScalar alpha,alpha2,*AB,sone=1.0,zero=0.0,*basisv,s;
758 850 : const PetscScalar *E;
759 850 : PetscInt i,ldds,nwork=0,szd,nv,j,k,n;
760 850 : PetscBLASInt inc=1,nv_,ldds_,dim_,szdk,szd_,n_,ldh_;
761 850 : PetscMPIInt np;
762 850 : NEP_DEF_PROJECT proj=(NEP_DEF_PROJECT)ctx;
763 850 : NEP_EXT_OP extop=proj->extop;
764 850 : NEP nep=extop->nep;
765 :
766 850 : PetscFunctionBegin;
767 850 : PetscCall(DSGetDimensions(ds,&nv,NULL,NULL,NULL));
768 850 : PetscCall(DSGetLeadingDimension(ds,&ldds));
769 850 : PetscCall(DSGetMat(ds,mat,&A));
770 850 : PetscCall(MatZeroEntries(A));
771 : /* mat = V1^*T(lambda)V1 */
772 3400 : for (i=0;i<nep->nt;i++) {
773 2550 : if (deriv) PetscCall(FNEvaluateDerivative(nep->f[i],lambda,&alpha));
774 1515 : else PetscCall(FNEvaluateFunction(nep->f[i],lambda,&alpha));
775 2550 : PetscCall(DSGetMat(ds,DSMatExtra[i],&Ei));
776 2550 : PetscCall(MatAXPY(A,alpha,Ei,SAME_NONZERO_PATTERN));
777 2550 : PetscCall(DSRestoreMat(ds,DSMatExtra[i],&Ei));
778 : }
779 850 : PetscCall(DSRestoreMat(ds,mat,&A));
780 850 : if (!extop->ref && extop->n) {
781 232 : PetscCall(DSGetArray(ds,mat,&T));
782 232 : n = extop->n;
783 232 : szd = extop->szd;
784 232 : PetscCall(PetscArrayzero(proj->work,proj->lwork));
785 232 : PetscCall(PetscBLASIntCast(nv,&nv_));
786 232 : PetscCall(PetscBLASIntCast(n,&n_));
787 232 : PetscCall(PetscBLASIntCast(ldds,&ldds_));
788 232 : PetscCall(PetscBLASIntCast(szd,&szd_));
789 232 : PetscCall(PetscBLASIntCast(proj->dim,&dim_));
790 232 : PetscCall(PetscBLASIntCast(extop->szd+1,&ldh_));
791 232 : w1 = proj->work; w2 = proj->work+proj->dim*proj->dim;
792 232 : nwork += 2*proj->dim*proj->dim;
793 :
794 : /* mat = mat + V1^*U(lambda)V2 */
795 928 : for (i=0;i<nep->nt;i++) {
796 696 : if (extop->simpU) {
797 225 : if (deriv) PetscCall(FNEvaluateDerivative(nep->f[i],lambda,&alpha));
798 135 : else PetscCall(FNEvaluateFunction(nep->f[i],lambda,&alpha));
799 225 : ww = w1; w = w2;
800 225 : PetscCall(PetscArraycpy(ww,proj->V2,szd*nv));
801 225 : PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np));
802 2145 : for (j=0;j<szd*nv;j++) ww[j] *= PetscSqrtReal(np);
803 555 : for (j=0;j<n;j++) extop->H[j*ldh_+j] -= lambda;
804 225 : alpha = -alpha;
805 225 : PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha,extop->H,&ldh_,ww,&szd_));
806 225 : if (deriv) {
807 90 : PetscCall(PetscBLASIntCast(szd*nv,&szdk));
808 90 : PetscCall(FNEvaluateFunction(nep->f[i],lambda,&alpha2));
809 90 : PetscCall(PetscArraycpy(w,proj->V2,szd*nv));
810 858 : for (j=0;j<szd*nv;j++) w[j] *= PetscSqrtReal(np);
811 90 : alpha2 = -alpha2;
812 90 : PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
813 90 : alpha2 = 1.0;
814 90 : PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
815 90 : PetscCallBLAS("BLASaxpy",BLASaxpy_(&szdk,&sone,w,&inc,ww,&inc));
816 : }
817 555 : for (j=0;j<n;j++) extop->H[j*ldh_+j] += lambda;
818 : } else {
819 471 : PetscCall(NEPDeflationEvaluateHatFunction(extop,i,lambda,NULL,w1,w2,szd));
820 747 : w = deriv?w2:w1; ww = deriv?w1:w2;
821 471 : PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np));
822 471 : s = PetscSqrtReal(np);
823 471 : PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&s,w,&szd_,proj->V2,&szd_,&zero,ww,&szd_));
824 : }
825 696 : PetscCall(MatDenseGetArrayRead(proj->V1pApX[i],&E));
826 696 : PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&nv_,&nv_,&n_,&sone,E,&dim_,ww,&szd_,&sone,T,&ldds_));
827 696 : PetscCall(MatDenseRestoreArrayRead(proj->V1pApX[i],&E));
828 : }
829 :
830 : /* mat = mat + V2^*A(lambda)V1 */
831 232 : basisv = proj->work+nwork; nwork += szd;
832 232 : hH = proj->work+nwork; nwork += szd*szd;
833 232 : hHprev = proj->work+nwork; nwork += szd*szd;
834 232 : AB = proj->work+nwork;
835 232 : PetscCall(NEPDeflationEvaluateBasis(extop,lambda,n,basisv,deriv));
836 438 : if (!deriv) for (i=0;i<n;i++) AB[i*(szd+1)] = 1.0;
837 581 : for (j=0;j<n;j++)
838 932 : for (i=0;i<n;i++)
839 583 : for (k=1;k<extop->midx;k++) AB[j*szd+i] += basisv[k]*PetscConj(extop->Hj[k*szd*szd+i*szd+j]);
840 232 : PetscCall(MatDenseGetArrayRead(proj->XpV1,&E));
841 232 : PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,E,&szd_,&zero,w,&szd_));
842 232 : PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
843 232 : PetscCall(MatDenseRestoreArrayRead(proj->XpV1,&E));
844 :
845 : /* mat = mat + V2^*B(lambda)V2 */
846 232 : PetscCall(PetscArrayzero(AB,szd*szd));
847 232 : for (i=1;i<extop->midx;i++) {
848 0 : PetscCall(NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev));
849 0 : PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
850 0 : PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,AB,&szd_));
851 0 : pts = hHprev; hHprev = hH; hH = pts;
852 : }
853 232 : PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,proj->V2,&szd_,&zero,w,&szd_));
854 232 : PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
855 232 : PetscCall(DSRestoreArray(ds,mat,&T));
856 : }
857 850 : PetscFunctionReturn(PETSC_SUCCESS);
858 : }
859 :
860 160 : PetscErrorCode NEPDeflationProjectOperator(NEP_EXT_OP extop,BV Vext,DS ds,PetscInt j0,PetscInt j1)
861 : {
862 160 : PetscInt k,j,n=extop->n,dim;
863 160 : Vec v,ve;
864 160 : BV V1;
865 160 : Mat G;
866 160 : NEP nep=extop->nep;
867 160 : NEP_DEF_PROJECT proj;
868 :
869 160 : PetscFunctionBegin;
870 160 : NEPCheckSplit(extop->nep,1);
871 160 : proj = extop->proj;
872 160 : if (!proj) {
873 : /* Initialize the projection data structure */
874 17 : PetscCall(PetscNew(&proj));
875 17 : extop->proj = proj;
876 17 : proj->extop = extop;
877 17 : PetscCall(BVGetSizes(Vext,NULL,NULL,&dim));
878 17 : proj->dim = dim;
879 17 : if (extop->szd) {
880 3 : proj->lwork = 3*dim*dim+2*extop->szd*extop->szd+extop->szd;
881 3 : PetscCall(PetscMalloc3(dim*extop->szd,&proj->V2,nep->nt,&proj->V1pApX,proj->lwork,&proj->work));
882 12 : for (j=0;j<nep->nt;j++) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,proj->dim,extop->szd,NULL,&proj->V1pApX[j]));
883 3 : PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,extop->szd,proj->dim,NULL,&proj->XpV1));
884 3 : PetscCall(BVCreateVec(extop->X,&proj->w));
885 3 : PetscCall(BVDuplicateResize(extop->X,proj->dim,&proj->V1));
886 : }
887 17 : PetscCall(DSNEPSetComputeMatrixFunction(ds,NEPDeflationDSNEPComputeMatrix,(void*)proj));
888 : }
889 :
890 : /* Split Vext in V1 and V2 */
891 160 : if (extop->szd) {
892 122 : for (j=j0;j<j1;j++) {
893 61 : PetscCall(BVGetColumn(Vext,j,&ve));
894 61 : PetscCall(BVGetColumn(proj->V1,j,&v));
895 61 : PetscCall(NEPDeflationCopyToExtendedVec(extop,v,proj->V2+j*extop->szd,ve,PETSC_TRUE));
896 61 : PetscCall(BVRestoreColumn(proj->V1,j,&v));
897 61 : PetscCall(BVRestoreColumn(Vext,j,&ve));
898 : }
899 61 : V1 = proj->V1;
900 : } else V1 = Vext;
901 :
902 : /* Compute matrices V1^* A_i V1 */
903 160 : PetscCall(BVSetActiveColumns(V1,j0,j1));
904 640 : for (k=0;k<nep->nt;k++) {
905 480 : PetscCall(DSGetMat(ds,DSMatExtra[k],&G));
906 480 : PetscCall(BVMatProject(V1,nep->A[k],V1,G));
907 480 : PetscCall(DSRestoreMat(ds,DSMatExtra[k],&G));
908 : }
909 :
910 160 : if (extop->n) {
911 42 : if (extop->szd) {
912 : /* Compute matrices V1^* A_i X and V1^* X */
913 42 : PetscCall(BVSetActiveColumns(extop->W,0,n));
914 168 : for (k=0;k<nep->nt;k++) {
915 126 : PetscCall(BVMatMult(extop->X,nep->A[k],extop->W));
916 126 : PetscCall(BVDot(extop->W,V1,proj->V1pApX[k]));
917 : }
918 42 : PetscCall(BVDot(V1,extop->X,proj->XpV1));
919 : }
920 : }
921 160 : PetscFunctionReturn(PETSC_SUCCESS);
922 : }
|