Line data Source code
1 : /*
2 : - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3 : SLEPc - Scalable Library for Eigenvalue Problem Computations
4 : Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
5 :
6 : This file is part of SLEPc.
7 : SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8 : - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9 : */
10 :
11 : #include <slepc/private/nepimpl.h> /*I "slepcnep.h" I*/
12 : #include <slepcblaslapack.h>
13 : #include "nepdefl.h"
14 :
15 66 : PetscErrorCode NEPDeflationGetInvariantPair(NEP_EXT_OP extop,BV *X,Mat *H)
16 : {
17 66 : PetscFunctionBegin;
18 66 : if (X) *X = extop->X;
19 66 : if (H) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,extop->szd+1,extop->szd+1,extop->H,H));
20 66 : PetscFunctionReturn(PETSC_SUCCESS);
21 : }
22 :
23 125 : static PetscErrorCode NEPDeflationExtendInvariantPair(NEP_EXT_OP extop,Vec u,PetscScalar lambda,PetscInt k)
24 : {
25 125 : Vec uu;
26 125 : PetscInt ld,i;
27 125 : PetscMPIInt np;
28 125 : PetscReal norm;
29 :
30 125 : PetscFunctionBegin;
31 125 : PetscCall(BVGetColumn(extop->X,k,&uu));
32 125 : ld = extop->szd+1;
33 125 : PetscCall(NEPDeflationCopyToExtendedVec(extop,uu,extop->H+k*ld,u,PETSC_TRUE));
34 125 : PetscCall(BVRestoreColumn(extop->X,k,&uu));
35 125 : PetscCall(BVNormColumn(extop->X,k,NORM_2,&norm));
36 125 : PetscCall(BVScaleColumn(extop->X,k,1.0/norm));
37 125 : PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)u),&np));
38 216 : for (i=0;i<k;i++) extop->H[k*ld+i] *= PetscSqrtReal(np)/norm;
39 125 : extop->H[k*(ld+1)] = lambda;
40 125 : PetscFunctionReturn(PETSC_SUCCESS);
41 : }
42 :
43 23 : PetscErrorCode NEPDeflationExtractEigenpair(NEP_EXT_OP extop,PetscInt k,Vec u,PetscScalar lambda,DS ds)
44 : {
45 23 : Mat A,H;
46 23 : PetscInt ldh=extop->szd+1,ldds,k1=k+1;
47 23 : PetscScalar *eigr,*eigi,*t,*Z;
48 23 : Vec x;
49 :
50 23 : PetscFunctionBegin;
51 23 : PetscCall(NEPDeflationExtendInvariantPair(extop,u,lambda,k));
52 23 : PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,k1,k1,extop->H,&H));
53 23 : PetscCall(MatDenseSetLDA(H,ldh));
54 23 : PetscCall(PetscCalloc3(k1,&eigr,k1,&eigi,extop->szd,&t));
55 23 : PetscCall(DSReset(ds));
56 23 : PetscCall(DSSetType(ds,DSNHEP));
57 23 : PetscCall(DSAllocate(ds,ldh));
58 23 : PetscCall(DSGetLeadingDimension(ds,&ldds));
59 23 : PetscCall(DSSetDimensions(ds,k1,0,k1));
60 23 : PetscCall(DSGetMat(ds,DS_MAT_A,&A));
61 23 : PetscCall(MatCopy(H,A,SAME_NONZERO_PATTERN));
62 23 : PetscCall(DSRestoreMat(ds,DS_MAT_A,&A));
63 23 : PetscCall(MatDestroy(&H));
64 23 : PetscCall(DSSolve(ds,eigr,eigi));
65 23 : PetscCall(DSVectors(ds,DS_MAT_X,&k,NULL));
66 23 : PetscCall(DSGetArray(ds,DS_MAT_X,&Z));
67 23 : PetscCall(BVMultColumn(extop->X,1.0,Z[k*ldds+k],k,Z+k*ldds));
68 23 : PetscCall(DSRestoreArray(ds,DS_MAT_X,&Z));
69 23 : PetscCall(BVGetColumn(extop->X,k,&x));
70 23 : PetscCall(NEPDeflationCopyToExtendedVec(extop,x,t,u,PETSC_FALSE));
71 23 : PetscCall(BVRestoreColumn(extop->X,k,&x));
72 23 : PetscCall(PetscFree3(eigr,eigi,t));
73 23 : PetscFunctionReturn(PETSC_SUCCESS);
74 : }
75 :
76 275 : PetscErrorCode NEPDeflationCopyToExtendedVec(NEP_EXT_OP extop,Vec v,PetscScalar *a,Vec vex,PetscBool back)
77 : {
78 275 : PetscMPIInt np,rk,count;
79 275 : PetscScalar *array1,*array2;
80 275 : PetscInt nloc;
81 :
82 275 : PetscFunctionBegin;
83 275 : if (extop->szd) {
84 181 : PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)vex),&rk));
85 181 : PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)vex),&np));
86 181 : PetscCall(BVGetSizes(extop->nep->V,&nloc,NULL,NULL));
87 181 : if (v) {
88 181 : PetscCall(VecGetArray(v,&array1));
89 181 : PetscCall(VecGetArray(vex,&array2));
90 181 : if (back) PetscCall(PetscArraycpy(array1,array2,nloc));
91 42 : else PetscCall(PetscArraycpy(array2,array1,nloc));
92 181 : PetscCall(VecRestoreArray(v,&array1));
93 181 : PetscCall(VecRestoreArray(vex,&array2));
94 : }
95 181 : if (a) {
96 162 : PetscCall(VecGetArray(vex,&array2));
97 162 : if (back) {
98 139 : PetscCall(PetscArraycpy(a,array2+nloc,extop->szd));
99 139 : PetscCall(PetscMPIIntCast(extop->szd,&count));
100 278 : PetscCallMPI(MPI_Bcast(a,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)vex)));
101 : } else {
102 23 : PetscCall(PetscArraycpy(array2+nloc,a,extop->szd));
103 23 : PetscCall(PetscMPIIntCast(extop->szd,&count));
104 46 : PetscCallMPI(MPI_Bcast(array2+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)vex)));
105 : }
106 162 : PetscCall(VecRestoreArray(vex,&array2));
107 : }
108 : } else {
109 94 : if (back) PetscCall(VecCopy(vex,v));
110 47 : else PetscCall(VecCopy(v,vex));
111 : }
112 275 : PetscFunctionReturn(PETSC_SUCCESS);
113 : }
114 :
115 74 : PetscErrorCode NEPDeflationCreateVec(NEP_EXT_OP extop,Vec *v)
116 : {
117 74 : PetscInt nloc;
118 74 : Vec u;
119 74 : VecType type;
120 :
121 74 : PetscFunctionBegin;
122 74 : if (extop->szd) {
123 26 : PetscCall(BVGetColumn(extop->nep->V,0,&u));
124 26 : PetscCall(VecGetType(u,&type));
125 26 : PetscCall(BVRestoreColumn(extop->nep->V,0,&u));
126 26 : PetscCall(VecCreate(PetscObjectComm((PetscObject)extop->nep),v));
127 26 : PetscCall(VecSetType(*v,type));
128 26 : PetscCall(BVGetSizes(extop->nep->V,&nloc,NULL,NULL));
129 26 : nloc += extop->szd;
130 26 : PetscCall(VecSetSizes(*v,nloc,PETSC_DECIDE));
131 48 : } else PetscCall(BVCreateVec(extop->nep->V,v));
132 74 : PetscFunctionReturn(PETSC_SUCCESS);
133 : }
134 :
135 17 : PetscErrorCode NEPDeflationCreateBV(NEP_EXT_OP extop,PetscInt sz,BV *V)
136 : {
137 17 : PetscInt nloc;
138 17 : BVType type;
139 17 : BVOrthogType otype;
140 17 : BVOrthogRefineType oref;
141 17 : PetscReal oeta;
142 17 : BVOrthogBlockType oblock;
143 17 : NEP nep=extop->nep;
144 17 : VecType vtype;
145 :
146 17 : PetscFunctionBegin;
147 17 : if (extop->szd) {
148 3 : PetscCall(BVGetSizes(nep->V,&nloc,NULL,NULL));
149 3 : PetscCall(BVCreate(PetscObjectComm((PetscObject)nep),V));
150 3 : PetscCall(BVSetSizes(*V,nloc+extop->szd,PETSC_DECIDE,sz));
151 3 : PetscCall(BVGetType(nep->V,&type));
152 3 : PetscCall(BVSetType(*V,type));
153 3 : PetscCall(BVGetVecType(nep->V,&vtype));
154 3 : PetscCall(BVSetVecType(*V,vtype));
155 3 : PetscCall(BVGetOrthogonalization(nep->V,&otype,&oref,&oeta,&oblock));
156 3 : PetscCall(BVSetOrthogonalization(*V,otype,oref,oeta,oblock));
157 3 : PetscCall(PetscObjectStateIncrease((PetscObject)*V));
158 14 : } else PetscCall(BVDuplicateResize(nep->V,sz,V));
159 17 : PetscFunctionReturn(PETSC_SUCCESS);
160 : }
161 :
162 17 : PetscErrorCode NEPDeflationSetRandomVec(NEP_EXT_OP extop,Vec v)
163 : {
164 17 : PetscInt n,next,i;
165 17 : PetscRandom rand;
166 17 : PetscScalar *array;
167 17 : PetscMPIInt nn,np;
168 :
169 17 : PetscFunctionBegin;
170 17 : PetscCall(BVGetRandomContext(extop->nep->V,&rand));
171 17 : PetscCall(VecSetRandom(v,rand));
172 17 : if (extop->szd) {
173 17 : PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)v),&np));
174 17 : PetscCall(BVGetSizes(extop->nep->V,&n,NULL,NULL));
175 17 : PetscCall(VecGetLocalSize(v,&next));
176 17 : PetscCall(VecGetArray(v,&array));
177 25 : for (i=n+extop->n;i<next;i++) array[i] = 0.0;
178 42 : for (i=n;i<n+extop->n;i++) array[i] /= PetscSqrtReal(np);
179 17 : PetscCall(PetscMPIIntCast(extop->n,&nn));
180 34 : PetscCallMPI(MPI_Bcast(array+n,nn,MPIU_SCALAR,0,PetscObjectComm((PetscObject)v)));
181 17 : PetscCall(VecRestoreArray(v,&array));
182 : }
183 17 : PetscFunctionReturn(PETSC_SUCCESS);
184 : }
185 :
186 36 : static PetscErrorCode NEPDeflationEvaluateBasisMat(NEP_EXT_OP extop,PetscInt idx,PetscBool hat,PetscScalar *bval,PetscScalar *Hj,PetscScalar *Hjprev)
187 : {
188 36 : PetscInt i,k,n=extop->n,ldhj=extop->szd,ldh=extop->szd+1;
189 36 : PetscScalar sone=1.0,zero=0.0;
190 36 : PetscBLASInt ldh_,ldhj_,n_;
191 :
192 36 : PetscFunctionBegin;
193 36 : i = (idx<0)?extop->szd*extop->szd*(-idx):extop->szd*extop->szd;
194 36 : PetscCall(PetscArrayzero(Hj,i));
195 36 : PetscCall(PetscBLASIntCast(ldhj+1,&ldh_));
196 36 : PetscCall(PetscBLASIntCast(ldhj,&ldhj_));
197 36 : PetscCall(PetscBLASIntCast(n,&n_));
198 36 : if (idx<1) {
199 91 : if (!hat) for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 1.0;
200 0 : else for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 0.0;
201 : } else {
202 0 : for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[idx-1];
203 0 : PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hjprev,&ldhj_,&zero,Hj,&ldhj_));
204 0 : for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[idx-1];
205 0 : if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[idx-1];
206 : }
207 36 : if (idx<0) {
208 36 : idx = -idx;
209 36 : for (k=1;k<idx;k++) {
210 0 : for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[k-1];
211 0 : PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hj+(k-1)*ldhj*ldhj,&ldhj_,&zero,Hj+k*ldhj*ldhj,&ldhj_));
212 0 : for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[k-1];
213 0 : if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[k-1];
214 : }
215 : }
216 36 : PetscFunctionReturn(PETSC_SUCCESS);
217 : }
218 :
219 102 : PetscErrorCode NEPDeflationLocking(NEP_EXT_OP extop,Vec u,PetscScalar lambda)
220 : {
221 102 : PetscInt i;
222 :
223 102 : PetscFunctionBegin;
224 102 : PetscCall(NEPDeflationExtendInvariantPair(extop,u,lambda,extop->n));
225 102 : extop->n++;
226 102 : PetscCall(BVSetActiveColumns(extop->X,0,extop->n));
227 102 : if (extop->n <= extop->szd) {
228 : /* update XpX */
229 36 : PetscCall(BVDotColumn(extop->X,extop->n-1,extop->XpX+(extop->n-1)*extop->szd));
230 36 : extop->XpX[(extop->n-1)*(1+extop->szd)] = 1.0;
231 55 : for (i=0;i<extop->n-1;i++) extop->XpX[i*extop->szd+extop->n-1] = PetscConj(extop->XpX[(extop->n-1)*extop->szd+i]);
232 : /* determine minimality index */
233 36 : extop->midx = PetscMin(extop->max_midx,extop->n);
234 : /* polynominal basis coefficients */
235 72 : for (i=0;i<extop->midx;i++) extop->bc[i] = extop->nep->target;
236 : /* evaluate the polynomial basis in H */
237 36 : PetscCall(NEPDeflationEvaluateBasisMat(extop,-extop->midx,PETSC_FALSE,NULL,extop->Hj,NULL));
238 : }
239 102 : PetscFunctionReturn(PETSC_SUCCESS);
240 : }
241 :
242 791 : static PetscErrorCode NEPDeflationEvaluateHatFunction(NEP_EXT_OP extop, PetscInt idx,PetscScalar lambda,PetscScalar *y,PetscScalar *hfj,PetscScalar *hfjp,PetscInt ld)
243 : {
244 791 : PetscInt i,j,k,off,ini,fin,sz,ldh,n=extop->n;
245 791 : Mat A,B;
246 791 : PetscScalar *array;
247 791 : const PetscScalar *barray;
248 :
249 791 : PetscFunctionBegin;
250 791 : if (idx<0) {ini = 0; fin = extop->nep->nt;}
251 471 : else {ini = idx; fin = idx+1;}
252 791 : if (y) sz = hfjp?n+2:n+1;
253 791 : else sz = hfjp?3*n:2*n;
254 791 : ldh = extop->szd+1;
255 791 : PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&A));
256 791 : PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&B));
257 791 : PetscCall(MatDenseGetArray(A,&array));
258 1969 : for (j=0;j<n;j++)
259 3142 : for (i=0;i<n;i++) array[j*sz+i] = extop->H[j*ldh+i];
260 791 : PetscCall(MatDenseRestoreArrayWrite(A,&array));
261 791 : if (y) {
262 0 : PetscCall(MatDenseGetArray(A,&array));
263 0 : array[extop->n*(sz+1)] = lambda;
264 0 : if (hfjp) { array[(n+1)*sz+n] = 1.0; array[(n+1)*sz+n+1] = lambda;}
265 0 : for (i=0;i<n;i++) array[n*sz+i] = y[i];
266 0 : PetscCall(MatDenseRestoreArrayWrite(A,&array));
267 0 : for (j=ini;j<fin;j++) {
268 0 : PetscCall(FNEvaluateFunctionMat(extop->nep->f[j],A,B));
269 0 : PetscCall(MatDenseGetArrayRead(B,&barray));
270 0 : for (i=0;i<n;i++) hfj[j*ld+i] = barray[n*sz+i];
271 0 : if (hfjp) for (i=0;i<n;i++) hfjp[j*ld+i] = barray[(n+1)*sz+i];
272 0 : PetscCall(MatDenseRestoreArrayRead(B,&barray));
273 : }
274 : } else {
275 791 : off = idx<0?ld*n:0;
276 791 : PetscCall(MatDenseGetArray(A,&array));
277 1969 : for (k=0;k<n;k++) {
278 1178 : array[(n+k)*sz+k] = 1.0;
279 1178 : array[(n+k)*sz+n+k] = lambda;
280 : }
281 1969 : if (hfjp) for (k=0;k<n;k++) {
282 1178 : array[(2*n+k)*sz+n+k] = 1.0;
283 1178 : array[(2*n+k)*sz+2*n+k] = lambda;
284 : }
285 791 : PetscCall(MatDenseRestoreArray(A,&array));
286 2219 : for (j=ini;j<fin;j++) {
287 1428 : PetscCall(FNEvaluateFunctionMat(extop->nep->f[j],A,B));
288 1428 : PetscCall(MatDenseGetArrayRead(B,&barray));
289 6996 : for (i=0;i<n;i++) for (k=0;k<n;k++) hfj[j*off+i*ld+k] = barray[n*sz+i*sz+k];
290 6996 : if (hfjp) for (k=0;k<n;k++) for (i=0;i<n;i++) hfjp[j*off+i*ld+k] = barray[2*n*sz+i*sz+k];
291 1428 : PetscCall(MatDenseRestoreArrayRead(B,&barray));
292 : }
293 : }
294 791 : PetscCall(MatDestroy(&A));
295 791 : PetscCall(MatDestroy(&B));
296 791 : PetscFunctionReturn(PETSC_SUCCESS);
297 : }
298 :
299 4042 : static PetscErrorCode MatMult_NEPDeflation(Mat M,Vec x,Vec y)
300 : {
301 4042 : NEP_DEF_MATSHELL *matctx;
302 4042 : NEP_EXT_OP extop;
303 4042 : Vec x1,y1;
304 4042 : PetscScalar *yy,sone=1.0,zero=0.0;
305 4042 : const PetscScalar *xx;
306 4042 : PetscInt nloc,i;
307 4042 : PetscMPIInt np;
308 4042 : PetscBLASInt n_,one=1,szd_;
309 :
310 4042 : PetscFunctionBegin;
311 4042 : PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M),&np));
312 4042 : PetscCall(MatShellGetContext(M,&matctx));
313 4042 : extop = matctx->extop;
314 4042 : if (extop->ref) PetscCall(VecZeroEntries(y));
315 4042 : if (extop->szd) {
316 2271 : x1 = matctx->w[0]; y1 = matctx->w[1];
317 2271 : PetscCall(VecGetArrayRead(x,&xx));
318 2271 : PetscCall(VecPlaceArray(x1,xx));
319 2271 : PetscCall(VecGetArray(y,&yy));
320 2271 : PetscCall(VecPlaceArray(y1,yy));
321 2271 : PetscCall(MatMult(matctx->T,x1,y1));
322 2271 : if (!extop->ref && extop->n) {
323 1187 : PetscCall(VecGetLocalSize(x1,&nloc));
324 : /* copy for avoiding warning of constant array xx */
325 2849 : for (i=0;i<extop->n;i++) matctx->work[i] = xx[nloc+i]*PetscSqrtReal(np);
326 1187 : PetscCall(BVMultVec(matctx->U,1.0,1.0,y1,matctx->work));
327 1187 : PetscCall(BVDotVec(extop->X,x1,matctx->work));
328 1187 : PetscCall(PetscBLASIntCast(extop->n,&n_));
329 1187 : PetscCall(PetscBLASIntCast(extop->szd,&szd_));
330 1187 : PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->A,&szd_,matctx->work,&one,&zero,yy+nloc,&one));
331 1187 : PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->B,&szd_,xx+nloc,&one,&sone,yy+nloc,&one));
332 2849 : for (i=0;i<extop->n;i++) yy[nloc+i] /= PetscSqrtReal(np);
333 : }
334 2271 : PetscCall(VecResetArray(x1));
335 2271 : PetscCall(VecRestoreArrayRead(x,&xx));
336 2271 : PetscCall(VecResetArray(y1));
337 2271 : PetscCall(VecRestoreArray(y,&yy));
338 1771 : } else PetscCall(MatMult(matctx->T,x,y));
339 4042 : PetscFunctionReturn(PETSC_SUCCESS);
340 : }
341 :
342 0 : static PetscErrorCode MatCreateVecs_NEPDeflation(Mat M,Vec *right,Vec *left)
343 : {
344 0 : NEP_DEF_MATSHELL *matctx;
345 :
346 0 : PetscFunctionBegin;
347 0 : PetscCall(MatShellGetContext(M,&matctx));
348 0 : if (right) PetscCall(VecDuplicate(matctx->w[0],right));
349 0 : if (left) PetscCall(VecDuplicate(matctx->w[0],left));
350 0 : PetscFunctionReturn(PETSC_SUCCESS);
351 : }
352 :
353 156 : static PetscErrorCode MatDestroy_NEPDeflation(Mat M)
354 : {
355 156 : NEP_DEF_MATSHELL *matctx;
356 :
357 156 : PetscFunctionBegin;
358 156 : PetscCall(MatShellGetContext(M,&matctx));
359 156 : if (matctx->extop->szd) {
360 44 : PetscCall(BVDestroy(&matctx->U));
361 44 : PetscCall(PetscFree2(matctx->hfj,matctx->work));
362 44 : PetscCall(PetscFree2(matctx->A,matctx->B));
363 44 : PetscCall(VecDestroy(&matctx->w[0]));
364 44 : PetscCall(VecDestroy(&matctx->w[1]));
365 : }
366 156 : if (matctx->P != matctx->T) PetscCall(MatDestroy(&matctx->P));
367 156 : PetscCall(MatDestroy(&matctx->T));
368 156 : PetscCall(PetscFree(matctx));
369 156 : PetscFunctionReturn(PETSC_SUCCESS);
370 : }
371 :
372 990 : static PetscErrorCode NEPDeflationEvaluateBasis(NEP_EXT_OP extop,PetscScalar lambda,PetscInt n,PetscScalar *val,PetscBool jacobian)
373 : {
374 990 : PetscScalar p;
375 990 : PetscInt i;
376 :
377 990 : PetscFunctionBegin;
378 990 : if (!jacobian) {
379 571 : val[0] = 1.0;
380 809 : for (i=1;i<extop->n;i++) val[i] = val[i-1]*(lambda-extop->bc[i-1]);
381 : } else {
382 419 : val[0] = 0.0;
383 419 : p = 1.0;
384 580 : for (i=1;i<extop->n;i++) {
385 161 : val[i] = val[i-1]*(lambda-extop->bc[i-1])+p;
386 161 : p *= (lambda-extop->bc[i-1]);
387 : }
388 : }
389 990 : PetscFunctionReturn(PETSC_SUCCESS);
390 : }
391 :
392 2927 : static PetscErrorCode NEPDeflationComputeShellMat(NEP_EXT_OP extop,PetscScalar lambda,PetscBool jacobian,Mat *M)
393 : {
394 2927 : NEP_DEF_MATSHELL *matctx,*matctxC;
395 2927 : PetscInt nloc,mloc,n=extop->n,j,i,szd=extop->szd,ldh=szd+1,k;
396 2927 : Mat F,Mshell,Mcomp;
397 2927 : PetscBool ini=PETSC_FALSE;
398 2927 : PetscScalar *hf,*hfj,*hfjp,sone=1.0,*hH,*hHprev,*pts,*B,*A,*Hj=extop->Hj,*basisv,zero=0.0;
399 2927 : PetscBLASInt n_,info,szd_;
400 :
401 2927 : PetscFunctionBegin;
402 2927 : if (!M) Mshell = jacobian?extop->MJ:extop->MF;
403 97 : else Mshell = *M;
404 2927 : Mcomp = jacobian?extop->MF:extop->MJ;
405 2927 : if (!Mshell) {
406 156 : ini = PETSC_TRUE;
407 156 : PetscCall(PetscNew(&matctx));
408 156 : PetscCall(MatGetLocalSize(extop->nep->function,&mloc,&nloc));
409 156 : nloc += szd; mloc += szd;
410 156 : PetscCall(MatCreateShell(PetscObjectComm((PetscObject)extop->nep),nloc,mloc,PETSC_DETERMINE,PETSC_DETERMINE,matctx,&Mshell));
411 156 : PetscCall(MatShellSetOperation(Mshell,MATOP_MULT,(void(*)(void))MatMult_NEPDeflation));
412 156 : PetscCall(MatShellSetOperation(Mshell,MATOP_CREATE_VECS,(void(*)(void))MatCreateVecs_NEPDeflation));
413 156 : PetscCall(MatShellSetOperation(Mshell,MATOP_DESTROY,(void(*)(void))MatDestroy_NEPDeflation));
414 156 : matctx->nep = extop->nep;
415 156 : matctx->extop = extop;
416 156 : if (!M) {
417 115 : if (jacobian) { matctx->jacob = PETSC_TRUE; matctx->T = extop->nep->jacobian; extop->MJ = Mshell; }
418 66 : else { matctx->jacob = PETSC_FALSE; matctx->T = extop->nep->function; extop->MF = Mshell; }
419 115 : PetscCall(PetscObjectReference((PetscObject)matctx->T));
420 115 : if (!jacobian) {
421 66 : if (extop->nep->function_pre && extop->nep->function_pre != extop->nep->function) {
422 2 : matctx->P = extop->nep->function_pre;
423 2 : PetscCall(PetscObjectReference((PetscObject)matctx->P));
424 64 : } else matctx->P = matctx->T;
425 : }
426 : } else {
427 41 : matctx->jacob = jacobian;
428 41 : PetscCall(MatDuplicate(jacobian?extop->nep->jacobian:extop->nep->function,MAT_DO_NOT_COPY_VALUES,&matctx->T));
429 41 : *M = Mshell;
430 41 : if (!jacobian) {
431 41 : if (extop->nep->function_pre && extop->nep->function_pre != extop->nep->function) PetscCall(MatDuplicate(extop->nep->function_pre,MAT_DO_NOT_COPY_VALUES,&matctx->P));
432 41 : else matctx->P = matctx->T;
433 : }
434 : }
435 156 : if (szd) {
436 44 : PetscCall(BVCreateVec(extop->nep->V,matctx->w));
437 44 : PetscCall(VecDuplicate(matctx->w[0],matctx->w+1));
438 44 : PetscCall(BVDuplicateResize(extop->nep->V,szd,&matctx->U));
439 44 : PetscCall(PetscMalloc2(extop->simpU?2*(szd)*(szd):2*(szd)*(szd)*extop->nep->nt,&matctx->hfj,szd,&matctx->work));
440 44 : PetscCall(PetscMalloc2(szd*szd,&matctx->A,szd*szd,&matctx->B));
441 : }
442 2771 : } else PetscCall(MatShellGetContext(Mshell,&matctx));
443 2771 : if (ini || matctx->theta != lambda || matctx->n != extop->n) {
444 2238 : if (ini || matctx->theta != lambda) {
445 2344 : if (jacobian) PetscCall(NEPComputeJacobian(extop->nep,lambda,matctx->T));
446 1337 : else PetscCall(NEPComputeFunction(extop->nep,lambda,matctx->T,matctx->P));
447 : }
448 2350 : if (n) {
449 758 : matctx->hfjset = PETSC_FALSE;
450 758 : if (!extop->simpU) {
451 : /* likely hfjp has been already computed */
452 561 : if (Mcomp) {
453 524 : PetscCall(MatShellGetContext(Mcomp,&matctxC));
454 524 : if (matctxC->hfjset && matctxC->theta == lambda && matctxC->n == extop->n) {
455 241 : PetscCall(PetscArraycpy(matctx->hfj,matctxC->hfj,2*extop->szd*extop->szd*extop->nep->nt));
456 241 : matctx->hfjset = PETSC_TRUE;
457 : }
458 : }
459 561 : hfj = matctx->hfj; hfjp = matctx->hfj+extop->szd*extop->szd*extop->nep->nt;
460 561 : if (!matctx->hfjset) {
461 320 : PetscCall(NEPDeflationEvaluateHatFunction(extop,-1,lambda,NULL,hfj,hfjp,n));
462 320 : matctx->hfjset = PETSC_TRUE;
463 : }
464 561 : PetscCall(BVSetActiveColumns(matctx->U,0,n));
465 561 : hf = jacobian?hfjp:hfj;
466 561 : PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n,n,hf,&F));
467 561 : PetscCall(BVMatMult(extop->X,extop->nep->A[0],matctx->U));
468 561 : PetscCall(BVMultInPlace(matctx->U,F,0,n));
469 561 : PetscCall(BVSetActiveColumns(extop->W,0,extop->n));
470 1678 : for (j=1;j<extop->nep->nt;j++) {
471 1117 : PetscCall(BVMatMult(extop->X,extop->nep->A[j],extop->W));
472 1117 : PetscCall(MatDensePlaceArray(F,hf+j*n*n));
473 1117 : PetscCall(BVMult(matctx->U,1.0,1.0,extop->W,F));
474 1117 : PetscCall(MatDenseResetArray(F));
475 : }
476 561 : PetscCall(MatDestroy(&F));
477 : } else {
478 197 : hfj = matctx->hfj;
479 197 : PetscCall(BVSetActiveColumns(matctx->U,0,n));
480 197 : PetscCall(BVMatMult(extop->X,matctx->T,matctx->U));
481 437 : for (j=0;j<n;j++) {
482 566 : for (i=0;i<n;i++) hfj[j*n+i] = -extop->H[j*ldh+i];
483 240 : hfj[j*(n+1)] += lambda;
484 : }
485 197 : PetscCall(PetscBLASIntCast(n,&n_));
486 197 : PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
487 197 : PetscCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,hfj,&n_,&info));
488 197 : PetscCall(PetscFPTrapPop());
489 197 : SlepcCheckLapackInfo("trtri",info);
490 197 : PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n,n,hfj,&F));
491 197 : PetscCall(BVMultInPlace(matctx->U,F,0,n));
492 197 : if (jacobian) {
493 83 : PetscCall(NEPDeflationComputeFunction(extop,lambda,NULL));
494 83 : PetscCall(MatShellGetContext(extop->MF,&matctxC));
495 83 : PetscCall(BVMult(matctx->U,-1.0,1.0,matctxC->U,F));
496 : }
497 197 : PetscCall(MatDestroy(&F));
498 : }
499 758 : PetscCall(PetscCalloc3(n,&basisv,szd*szd,&hH,szd*szd,&hHprev));
500 758 : PetscCall(NEPDeflationEvaluateBasis(extop,lambda,n,basisv,jacobian));
501 758 : A = matctx->A;
502 758 : PetscCall(PetscArrayzero(A,szd*szd));
503 1361 : if (!jacobian) for (i=0;i<n;i++) A[i*(szd+1)] = 1.0;
504 1798 : for (j=0;j<n;j++)
505 2664 : for (i=0;i<n;i++)
506 1624 : for (k=1;k<extop->midx;k++) A[j*szd+i] += basisv[k]*PetscConj(Hj[k*szd*szd+i*szd+j]);
507 758 : PetscCall(PetscBLASIntCast(n,&n_));
508 758 : PetscCall(PetscBLASIntCast(szd,&szd_));
509 758 : B = matctx->B;
510 758 : PetscCall(PetscArrayzero(B,szd*szd));
511 758 : for (i=1;i<extop->midx;i++) {
512 0 : PetscCall(NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev));
513 0 : PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
514 0 : PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,B,&szd_));
515 0 : pts = hHprev; hHprev = hH; hH = pts;
516 : }
517 758 : PetscCall(PetscFree3(basisv,hH,hHprev));
518 : }
519 2350 : matctx->theta = lambda;
520 2350 : matctx->n = extop->n;
521 : }
522 2927 : PetscFunctionReturn(PETSC_SUCCESS);
523 : }
524 :
525 1732 : PetscErrorCode NEPDeflationComputeFunction(NEP_EXT_OP extop,PetscScalar lambda,Mat *F)
526 : {
527 1732 : PetscFunctionBegin;
528 1732 : PetscCall(NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,NULL));
529 1732 : if (F) *F = extop->MF;
530 1732 : PetscFunctionReturn(PETSC_SUCCESS);
531 : }
532 :
533 1007 : PetscErrorCode NEPDeflationComputeJacobian(NEP_EXT_OP extop,PetscScalar lambda,Mat *J)
534 : {
535 1007 : PetscFunctionBegin;
536 1007 : PetscCall(NEPDeflationComputeShellMat(extop,lambda,PETSC_TRUE,NULL));
537 1007 : if (J) *J = extop->MJ;
538 1007 : PetscFunctionReturn(PETSC_SUCCESS);
539 : }
540 :
541 188 : PetscErrorCode NEPDeflationSolveSetUp(NEP_EXT_OP extop,PetscScalar lambda)
542 : {
543 188 : NEP_DEF_MATSHELL *matctx;
544 188 : NEP_DEF_FUN_SOLVE solve;
545 188 : PetscInt i,j,n=extop->n;
546 188 : Vec u,tu;
547 188 : Mat F;
548 188 : PetscScalar snone=-1.0,sone=1.0;
549 188 : PetscBLASInt n_,szd_,ldh_,*p,info;
550 188 : Mat Mshell;
551 :
552 188 : PetscFunctionBegin;
553 188 : solve = extop->solve;
554 188 : if (lambda!=solve->theta || n!=solve->n) {
555 188 : PetscCall(NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,solve->sincf?NULL:&solve->T));
556 188 : Mshell = (solve->sincf)?extop->MF:solve->T;
557 188 : PetscCall(MatShellGetContext(Mshell,&matctx));
558 188 : PetscCall(NEP_KSPSetOperators(solve->ksp,matctx->T,matctx->P));
559 188 : if (!extop->ref && n) {
560 70 : PetscCall(PetscBLASIntCast(n,&n_));
561 70 : PetscCall(PetscBLASIntCast(extop->szd,&szd_));
562 70 : PetscCall(PetscBLASIntCast(extop->szd+1,&ldh_));
563 70 : if (!extop->simpU) {
564 57 : PetscCall(BVSetActiveColumns(solve->T_1U,0,n));
565 147 : for (i=0;i<n;i++) {
566 90 : PetscCall(BVGetColumn(matctx->U,i,&u));
567 90 : PetscCall(BVGetColumn(solve->T_1U,i,&tu));
568 90 : PetscCall(KSPSolve(solve->ksp,u,tu));
569 90 : PetscCall(BVRestoreColumn(solve->T_1U,i,&tu));
570 90 : PetscCall(BVRestoreColumn(matctx->U,i,&u));
571 : }
572 57 : PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n,n,solve->work,&F));
573 57 : PetscCall(BVDot(solve->T_1U,extop->X,F));
574 57 : PetscCall(MatDestroy(&F));
575 : } else {
576 31 : for (j=0;j<n;j++)
577 46 : for (i=0;i<n;i++) solve->work[j*n+i] = extop->XpX[j*extop->szd+i];
578 31 : for (i=0;i<n;i++) extop->H[i*ldh_+i] -= lambda;
579 13 : PetscCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n_,&n_,&snone,extop->H,&ldh_,solve->work,&n_));
580 31 : for (i=0;i<n;i++) extop->H[i*ldh_+i] += lambda;
581 : }
582 70 : PetscCall(PetscArraycpy(solve->M,matctx->B,extop->szd*extop->szd));
583 70 : PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&snone,matctx->A,&szd_,solve->work,&n_,&sone,solve->M,&szd_));
584 70 : PetscCall(PetscMalloc1(n,&p));
585 70 : PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
586 70 : PetscCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n_,&n_,solve->M,&szd_,p,&info));
587 70 : SlepcCheckLapackInfo("getrf",info);
588 70 : PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&n_,solve->M,&szd_,p,solve->work,&n_,&info));
589 70 : SlepcCheckLapackInfo("getri",info);
590 70 : PetscCall(PetscFPTrapPop());
591 70 : PetscCall(PetscFree(p));
592 : }
593 188 : solve->theta = lambda;
594 188 : solve->n = n;
595 : }
596 188 : PetscFunctionReturn(PETSC_SUCCESS);
597 : }
598 :
599 3316 : PetscErrorCode NEPDeflationFunctionSolve(NEP_EXT_OP extop,Vec b,Vec x)
600 : {
601 3316 : Vec b1,x1;
602 3316 : PetscScalar *xx,*bb,*x2,*b2,*w,*w2,snone=-1.0,sone=1.0,zero=0.0;
603 3316 : NEP_DEF_MATSHELL *matctx;
604 3316 : NEP_DEF_FUN_SOLVE solve=extop->solve;
605 3316 : PetscBLASInt one=1,szd_,n_,ldh_;
606 3316 : PetscInt nloc,i;
607 3316 : PetscMPIInt np,count;
608 :
609 3316 : PetscFunctionBegin;
610 3316 : if (extop->ref) PetscCall(VecZeroEntries(x));
611 3316 : if (extop->szd) {
612 2163 : x1 = solve->w[0]; b1 = solve->w[1];
613 2163 : PetscCall(VecGetArray(x,&xx));
614 2163 : PetscCall(VecPlaceArray(x1,xx));
615 2163 : PetscCall(VecGetArray(b,&bb));
616 2163 : PetscCall(VecPlaceArray(b1,bb));
617 : } else {
618 : b1 = b; x1 = x;
619 : }
620 3316 : PetscCall(KSPSolve(extop->solve->ksp,b1,x1));
621 3316 : if (!extop->ref && extop->n && extop->szd) {
622 1136 : PetscCall(PetscBLASIntCast(extop->szd,&szd_));
623 1136 : PetscCall(PetscBLASIntCast(extop->n,&n_));
624 1136 : PetscCall(PetscBLASIntCast(extop->szd+1,&ldh_));
625 1136 : PetscCall(BVGetSizes(extop->nep->V,&nloc,NULL,NULL));
626 1136 : PetscCall(PetscMalloc2(extop->n,&b2,extop->n,&x2));
627 1136 : PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)b),&np));
628 2717 : for (i=0;i<extop->n;i++) b2[i] = bb[nloc+i]*PetscSqrtReal(np);
629 1136 : w = solve->work; w2 = solve->work+extop->n;
630 1136 : PetscCall(MatShellGetContext(solve->sincf?extop->MF:solve->T,&matctx));
631 1136 : PetscCall(PetscArraycpy(w2,b2,extop->n));
632 1136 : PetscCall(BVDotVec(extop->X,x1,w));
633 1136 : PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&snone,matctx->A,&szd_,w,&one,&sone,w2,&one));
634 1136 : PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,solve->M,&szd_,w2,&one,&zero,x2,&one));
635 1136 : if (extop->simpU) {
636 577 : for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] -= solve->theta;
637 577 : for (i=0;i<extop->n;i++) w[i] = x2[i];
638 260 : PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&one,&snone,extop->H,&ldh_,w,&n_));
639 577 : for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] += solve->theta;
640 260 : PetscCall(BVMultVec(extop->X,-1.0,1.0,x1,w));
641 876 : } else PetscCall(BVMultVec(solve->T_1U,-1.0,1.0,x1,x2));
642 2717 : for (i=0;i<extop->n;i++) xx[i+nloc] = x2[i]/PetscSqrtReal(np);
643 1136 : PetscCall(PetscMPIIntCast(extop->n,&count));
644 2272 : PetscCallMPI(MPI_Bcast(xx+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)b)));
645 : }
646 3316 : if (extop->szd) {
647 2163 : PetscCall(VecResetArray(x1));
648 2163 : PetscCall(VecRestoreArray(x,&xx));
649 2163 : PetscCall(VecResetArray(b1));
650 2163 : PetscCall(VecRestoreArray(b,&bb));
651 2163 : if (!extop->ref && extop->n) PetscCall(PetscFree2(b2,x2));
652 : }
653 3316 : PetscFunctionReturn(PETSC_SUCCESS);
654 : }
655 :
656 102 : PetscErrorCode NEPDeflationSetRefine(NEP_EXT_OP extop,PetscBool ref)
657 : {
658 102 : PetscFunctionBegin;
659 102 : extop->ref = ref;
660 102 : PetscFunctionReturn(PETSC_SUCCESS);
661 : }
662 :
663 132 : PetscErrorCode NEPDeflationReset(NEP_EXT_OP extop)
664 : {
665 132 : PetscInt j;
666 132 : NEP_DEF_FUN_SOLVE solve;
667 :
668 132 : PetscFunctionBegin;
669 132 : if (!extop) PetscFunctionReturn(PETSC_SUCCESS);
670 66 : PetscCall(PetscFree(extop->H));
671 66 : PetscCall(BVDestroy(&extop->X));
672 66 : if (extop->szd) {
673 19 : PetscCall(VecDestroy(&extop->w));
674 19 : PetscCall(PetscFree3(extop->Hj,extop->XpX,extop->bc));
675 19 : PetscCall(BVDestroy(&extop->W));
676 : }
677 66 : PetscCall(MatDestroy(&extop->MF));
678 66 : PetscCall(MatDestroy(&extop->MJ));
679 66 : if (extop->solve) {
680 66 : solve = extop->solve;
681 66 : if (extop->szd) {
682 19 : if (!extop->simpU) PetscCall(BVDestroy(&solve->T_1U));
683 19 : PetscCall(PetscFree2(solve->M,solve->work));
684 19 : PetscCall(VecDestroy(&solve->w[0]));
685 19 : PetscCall(VecDestroy(&solve->w[1]));
686 : }
687 66 : if (!solve->sincf) PetscCall(MatDestroy(&solve->T));
688 66 : PetscCall(PetscFree(extop->solve));
689 : }
690 66 : if (extop->proj) {
691 17 : if (extop->szd) {
692 12 : for (j=0;j<extop->nep->nt;j++) PetscCall(MatDestroy(&extop->proj->V1pApX[j]));
693 3 : PetscCall(MatDestroy(&extop->proj->XpV1));
694 3 : PetscCall(PetscFree3(extop->proj->V2,extop->proj->V1pApX,extop->proj->work));
695 3 : PetscCall(VecDestroy(&extop->proj->w));
696 3 : PetscCall(BVDestroy(&extop->proj->V1));
697 : }
698 17 : PetscCall(PetscFree(extop->proj));
699 : }
700 66 : PetscCall(PetscFree(extop));
701 66 : PetscFunctionReturn(PETSC_SUCCESS);
702 : }
703 :
704 66 : PetscErrorCode NEPDeflationInitialize(NEP nep,BV X,KSP ksp,PetscBool sincfun,PetscInt sz,NEP_EXT_OP *extop)
705 : {
706 66 : NEP_EXT_OP op;
707 66 : NEP_DEF_FUN_SOLVE solve;
708 66 : PetscInt szd;
709 66 : Vec x;
710 :
711 66 : PetscFunctionBegin;
712 66 : PetscCall(NEPDeflationReset(*extop));
713 66 : PetscCall(PetscNew(&op));
714 66 : *extop = op;
715 66 : op->nep = nep;
716 66 : op->n = 0;
717 66 : op->szd = szd = sz-1;
718 66 : op->max_midx = PetscMin(MAX_MINIDX,szd);
719 66 : op->X = X;
720 66 : if (!X) PetscCall(BVDuplicateResize(nep->V,sz,&op->X));
721 66 : else PetscCall(PetscObjectReference((PetscObject)X));
722 66 : PetscCall(PetscCalloc1(sz*sz,&(op)->H));
723 66 : if (op->szd) {
724 19 : PetscCall(BVGetColumn(op->X,0,&x));
725 19 : PetscCall(VecDuplicate(x,&op->w));
726 19 : PetscCall(BVRestoreColumn(op->X,0,&x));
727 19 : op->simpU = PETSC_FALSE;
728 19 : if (nep->fui==NEP_USER_INTERFACE_SPLIT) {
729 : /* undocumented option to use the simple expression for U = T*X*inv(lambda-H) */
730 17 : PetscCall(PetscOptionsGetBool(NULL,NULL,"-nep_deflation_simpleu",&op->simpU,NULL));
731 : } else {
732 2 : op->simpU = PETSC_TRUE;
733 : }
734 19 : PetscCall(PetscCalloc3(szd*szd*op->max_midx,&(op)->Hj,szd*szd,&(op)->XpX,szd,&op->bc));
735 19 : PetscCall(BVDuplicateResize(op->X,op->szd,&op->W));
736 : }
737 66 : if (ksp) {
738 66 : PetscCall(PetscNew(&solve));
739 66 : op->solve = solve;
740 66 : solve->ksp = ksp;
741 66 : solve->sincf = sincfun;
742 66 : solve->n = -1;
743 66 : if (op->szd) {
744 19 : if (!op->simpU) PetscCall(BVDuplicateResize(nep->V,szd,&solve->T_1U));
745 19 : PetscCall(PetscMalloc2(szd*szd,&solve->M,2*szd*szd,&solve->work));
746 19 : PetscCall(BVCreateVec(nep->V,&solve->w[0]));
747 19 : PetscCall(VecDuplicate(solve->w[0],&solve->w[1]));
748 : }
749 : }
750 66 : PetscFunctionReturn(PETSC_SUCCESS);
751 : }
752 :
753 850 : static PetscErrorCode NEPDeflationDSNEPComputeMatrix(DS ds,PetscScalar lambda,PetscBool deriv,DSMatType mat,void *ctx)
754 : {
755 850 : Mat A,Ei;
756 850 : PetscScalar *T,*w1,*w2,*w=NULL,*ww,*hH,*hHprev,*pts;
757 850 : PetscScalar alpha,alpha2,*AB,sone=1.0,zero=0.0,*basisv,s;
758 850 : const PetscScalar *E;
759 850 : PetscInt i,ldds,nwork=0,szd,nv,j,k,n;
760 850 : PetscBLASInt inc=1,nv_,ldds_,dim_,szdk,szd_,n_,ldh_;
761 850 : PetscMPIInt np;
762 850 : NEP_DEF_PROJECT proj=(NEP_DEF_PROJECT)ctx;
763 850 : NEP_EXT_OP extop=proj->extop;
764 850 : NEP nep=extop->nep;
765 :
766 850 : PetscFunctionBegin;
767 850 : PetscCall(DSGetDimensions(ds,&nv,NULL,NULL,NULL));
768 850 : PetscCall(DSGetLeadingDimension(ds,&ldds));
769 850 : PetscCall(DSGetMat(ds,mat,&A));
770 850 : PetscCall(MatZeroEntries(A));
771 : /* mat = V1^*T(lambda)V1 */
772 3400 : for (i=0;i<nep->nt;i++) {
773 2550 : if (deriv) PetscCall(FNEvaluateDerivative(nep->f[i],lambda,&alpha));
774 1515 : else PetscCall(FNEvaluateFunction(nep->f[i],lambda,&alpha));
775 2550 : PetscCall(DSGetMat(ds,DSMatExtra[i],&Ei));
776 2550 : PetscCall(MatAXPY(A,alpha,Ei,SAME_NONZERO_PATTERN));
777 2550 : PetscCall(DSRestoreMat(ds,DSMatExtra[i],&Ei));
778 : }
779 850 : PetscCall(DSRestoreMat(ds,mat,&A));
780 850 : if (!extop->ref && extop->n) {
781 232 : PetscCall(DSGetArray(ds,mat,&T));
782 232 : n = extop->n;
783 232 : szd = extop->szd;
784 232 : PetscCall(PetscArrayzero(proj->work,proj->lwork));
785 232 : PetscCall(PetscBLASIntCast(nv,&nv_));
786 232 : PetscCall(PetscBLASIntCast(n,&n_));
787 232 : PetscCall(PetscBLASIntCast(ldds,&ldds_));
788 232 : PetscCall(PetscBLASIntCast(szd,&szd_));
789 232 : PetscCall(PetscBLASIntCast(proj->dim,&dim_));
790 232 : PetscCall(PetscBLASIntCast(extop->szd+1,&ldh_));
791 232 : w1 = proj->work; w2 = proj->work+proj->dim*proj->dim;
792 232 : nwork += 2*proj->dim*proj->dim;
793 :
794 : /* mat = mat + V1^*U(lambda)V2 */
795 928 : for (i=0;i<nep->nt;i++) {
796 696 : if (extop->simpU) {
797 225 : if (deriv) PetscCall(FNEvaluateDerivative(nep->f[i],lambda,&alpha));
798 135 : else PetscCall(FNEvaluateFunction(nep->f[i],lambda,&alpha));
799 225 : ww = w1; w = w2;
800 225 : PetscCall(PetscArraycpy(ww,proj->V2,szd*nv));
801 225 : PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np));
802 2145 : for (j=0;j<szd*nv;j++) ww[j] *= PetscSqrtReal(np);
803 555 : for (j=0;j<n;j++) extop->H[j*ldh_+j] -= lambda;
804 225 : alpha = -alpha;
805 225 : PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha,extop->H,&ldh_,ww,&szd_));
806 225 : if (deriv) {
807 90 : PetscCall(PetscBLASIntCast(szd*nv,&szdk));
808 90 : PetscCall(FNEvaluateFunction(nep->f[i],lambda,&alpha2));
809 90 : PetscCall(PetscArraycpy(w,proj->V2,szd*nv));
810 858 : for (j=0;j<szd*nv;j++) w[j] *= PetscSqrtReal(np);
811 90 : alpha2 = -alpha2;
812 90 : PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
813 90 : alpha2 = 1.0;
814 90 : PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
815 90 : PetscCallBLAS("BLASaxpy",BLASaxpy_(&szdk,&sone,w,&inc,ww,&inc));
816 : }
817 555 : for (j=0;j<n;j++) extop->H[j*ldh_+j] += lambda;
818 : } else {
819 471 : PetscCall(NEPDeflationEvaluateHatFunction(extop,i,lambda,NULL,w1,w2,szd));
820 471 : w = deriv?w2:w1; ww = deriv?w1:w2;
821 471 : PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np));
822 471 : s = PetscSqrtReal(np);
823 471 : PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&s,w,&szd_,proj->V2,&szd_,&zero,ww,&szd_));
824 : }
825 696 : PetscCall(MatDenseGetArrayRead(proj->V1pApX[i],&E));
826 696 : PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&nv_,&nv_,&n_,&sone,E,&dim_,ww,&szd_,&sone,T,&ldds_));
827 696 : PetscCall(MatDenseRestoreArrayRead(proj->V1pApX[i],&E));
828 : }
829 :
830 : /* mat = mat + V2^*A(lambda)V1 */
831 232 : basisv = proj->work+nwork; nwork += szd;
832 232 : hH = proj->work+nwork; nwork += szd*szd;
833 232 : hHprev = proj->work+nwork; nwork += szd*szd;
834 232 : AB = proj->work+nwork;
835 232 : PetscCall(NEPDeflationEvaluateBasis(extop,lambda,n,basisv,deriv));
836 438 : if (!deriv) for (i=0;i<n;i++) AB[i*(szd+1)] = 1.0;
837 581 : for (j=0;j<n;j++)
838 932 : for (i=0;i<n;i++)
839 583 : for (k=1;k<extop->midx;k++) AB[j*szd+i] += basisv[k]*PetscConj(extop->Hj[k*szd*szd+i*szd+j]);
840 232 : PetscCall(MatDenseGetArrayRead(proj->XpV1,&E));
841 232 : PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,E,&szd_,&zero,w,&szd_));
842 232 : PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
843 232 : PetscCall(MatDenseRestoreArrayRead(proj->XpV1,&E));
844 :
845 : /* mat = mat + V2^*B(lambda)V2 */
846 232 : PetscCall(PetscArrayzero(AB,szd*szd));
847 232 : for (i=1;i<extop->midx;i++) {
848 0 : PetscCall(NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev));
849 0 : PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
850 0 : PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,AB,&szd_));
851 0 : pts = hHprev; hHprev = hH; hH = pts;
852 : }
853 232 : PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,proj->V2,&szd_,&zero,w,&szd_));
854 232 : PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
855 232 : PetscCall(DSRestoreArray(ds,mat,&T));
856 : }
857 850 : PetscFunctionReturn(PETSC_SUCCESS);
858 : }
859 :
860 160 : PetscErrorCode NEPDeflationProjectOperator(NEP_EXT_OP extop,BV Vext,DS ds,PetscInt j0,PetscInt j1)
861 : {
862 160 : PetscInt k,j,n=extop->n,dim;
863 160 : Vec v,ve;
864 160 : BV V1;
865 160 : Mat G;
866 160 : NEP nep=extop->nep;
867 160 : NEP_DEF_PROJECT proj;
868 :
869 160 : PetscFunctionBegin;
870 160 : NEPCheckSplit(extop->nep,1);
871 160 : proj = extop->proj;
872 160 : if (!proj) {
873 : /* Initialize the projection data structure */
874 17 : PetscCall(PetscNew(&proj));
875 17 : extop->proj = proj;
876 17 : proj->extop = extop;
877 17 : PetscCall(BVGetSizes(Vext,NULL,NULL,&dim));
878 17 : proj->dim = dim;
879 17 : if (extop->szd) {
880 3 : proj->lwork = 3*dim*dim+2*extop->szd*extop->szd+extop->szd;
881 3 : PetscCall(PetscMalloc3(dim*extop->szd,&proj->V2,nep->nt,&proj->V1pApX,proj->lwork,&proj->work));
882 12 : for (j=0;j<nep->nt;j++) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,proj->dim,extop->szd,NULL,&proj->V1pApX[j]));
883 3 : PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,extop->szd,proj->dim,NULL,&proj->XpV1));
884 3 : PetscCall(BVCreateVec(extop->X,&proj->w));
885 3 : PetscCall(BVDuplicateResize(extop->X,proj->dim,&proj->V1));
886 : }
887 17 : PetscCall(DSNEPSetComputeMatrixFunction(ds,NEPDeflationDSNEPComputeMatrix,(void*)proj));
888 : }
889 :
890 : /* Split Vext in V1 and V2 */
891 160 : if (extop->szd) {
892 122 : for (j=j0;j<j1;j++) {
893 61 : PetscCall(BVGetColumn(Vext,j,&ve));
894 61 : PetscCall(BVGetColumn(proj->V1,j,&v));
895 61 : PetscCall(NEPDeflationCopyToExtendedVec(extop,v,proj->V2+j*extop->szd,ve,PETSC_TRUE));
896 61 : PetscCall(BVRestoreColumn(proj->V1,j,&v));
897 61 : PetscCall(BVRestoreColumn(Vext,j,&ve));
898 : }
899 61 : V1 = proj->V1;
900 : } else V1 = Vext;
901 :
902 : /* Compute matrices V1^* A_i V1 */
903 160 : PetscCall(BVSetActiveColumns(V1,j0,j1));
904 640 : for (k=0;k<nep->nt;k++) {
905 480 : PetscCall(DSGetMat(ds,DSMatExtra[k],&G));
906 480 : PetscCall(BVMatProject(V1,nep->A[k],V1,G));
907 480 : PetscCall(DSRestoreMat(ds,DSMatExtra[k],&G));
908 : }
909 :
910 160 : if (extop->n) {
911 42 : if (extop->szd) {
912 : /* Compute matrices V1^* A_i X and V1^* X */
913 42 : PetscCall(BVSetActiveColumns(extop->W,0,n));
914 168 : for (k=0;k<nep->nt;k++) {
915 126 : PetscCall(BVMatMult(extop->X,nep->A[k],extop->W));
916 126 : PetscCall(BVDot(extop->W,V1,proj->V1pApX[k]));
917 : }
918 42 : PetscCall(BVDot(V1,extop->X,proj->XpV1));
919 : }
920 : }
921 160 : PetscFunctionReturn(PETSC_SUCCESS);
922 : }
|