Line data Source code
1 : /*
2 : - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3 : SLEPc - Scalable Library for Eigenvalue Problem Computations
4 : Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
5 :
6 : This file is part of SLEPc.
7 : SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8 : - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9 : */
10 : /*
11 : BV developer functions needed in contour integral methods
12 : */
13 :
14 : #include <slepc/private/bvimpl.h> /*I "slepcbv.h" I*/
15 : #include <slepcblaslapack.h>
16 :
17 : #define p_id(i) (i*subcomm->n + subcomm->color)
18 :
19 : /*@
20 : BVScatter - Scatters the columns of a BV to another BV created in a
21 : subcommunicator.
22 :
23 : Collective
24 :
25 : Input Parameters:
26 : + Vin - input basis vectors (defined on the whole communicator)
27 : . scat - VecScatter object that contains the info for the communication
28 : - xdup - an auxiliary vector
29 :
30 : Output Parameter:
31 : . Vout - output basis vectors (defined on the subcommunicator)
32 :
33 : Notes:
34 : Currently implemented as a loop for each the active column, where each
35 : column is scattered independently. The vector xdup is defined on the
36 : contiguous parent communicator and have enough space to store one
37 : duplicate of the original vector per each subcommunicator.
38 :
39 : Level: developer
40 :
41 : .seealso: BVGetColumn()
42 : @*/
43 30 : PetscErrorCode BVScatter(BV Vin,BV Vout,VecScatter scat,Vec xdup)
44 : {
45 30 : PetscInt i;
46 30 : Vec v,z;
47 30 : const PetscScalar *array;
48 :
49 30 : PetscFunctionBegin;
50 30 : PetscValidHeaderSpecific(Vin,BV_CLASSID,1);
51 30 : PetscValidHeaderSpecific(Vout,BV_CLASSID,2);
52 30 : PetscValidHeaderSpecific(scat,PETSCSF_CLASSID,3);
53 30 : PetscValidHeaderSpecific(xdup,VEC_CLASSID,4);
54 30 : PetscCall(BVCreateVec(Vout,&z));
55 424 : for (i=Vin->l;i<Vin->k;i++) {
56 394 : PetscCall(BVGetColumn(Vin,i,&v));
57 394 : PetscCall(VecScatterBegin(scat,v,xdup,INSERT_VALUES,SCATTER_FORWARD));
58 394 : PetscCall(VecScatterEnd(scat,v,xdup,INSERT_VALUES,SCATTER_FORWARD));
59 394 : PetscCall(BVRestoreColumn(Vin,i,&v));
60 394 : PetscCall(VecGetArrayRead(xdup,&array));
61 394 : PetscCall(VecPlaceArray(z,array));
62 394 : PetscCall(BVInsertVec(Vout,i,z));
63 394 : PetscCall(VecResetArray(z));
64 394 : PetscCall(VecRestoreArrayRead(xdup,&array));
65 : }
66 30 : PetscCall(VecDestroy(&z));
67 30 : PetscFunctionReturn(PETSC_SUCCESS);
68 : }
69 :
70 : /*@
71 : BVSumQuadrature - Computes the sum of terms required in the quadrature
72 : rule to approximate the contour integral.
73 :
74 : Collective
75 :
76 : Input Parameters:
77 : + Y - input basis vectors
78 : . M - number of moments
79 : . L - block size
80 : . L_max - maximum block size
81 : . w - quadrature weights
82 : . zn - normalized quadrature points
83 : . scat - (optional) VecScatter object to communicate between subcommunicators
84 : . subcomm - subcommunicator layout
85 : . npoints - number of points to process by the subcommunicator
86 : - useconj - whether conjugate points can be used or not
87 :
88 : Output Parameter:
89 : . S - output basis vectors
90 :
91 : Notes:
92 : This is a generalization of BVMult(). The resulting matrix S consists of M
93 : panels of L columns, and the following formula is computed for each panel
94 : S_k = sum_j w_j*zn_j^k*Y_j, where Y_j is the j-th panel of Y containing
95 : the result of solving T(z_j)^{-1}*X for each integration point j. L_max is
96 : the width of the panels in Y.
97 :
98 : When using subcommunicators, Y is stored in the subcommunicators for a subset
99 : of integration points. In that case, the computation is done in the subcomm
100 : and then scattered to the whole communicator in S using the VecScatter scat.
101 : The value npoints is the number of points to be processed in this subcomm
102 : and the flag useconj indicates whether symmetric points can be reused.
103 :
104 : Level: developer
105 :
106 : .seealso: BVMult(), BVScatter(), BVDotQuadrature(), RGComputeQuadrature(), RGCanUseConjugates()
107 : @*/
108 88 : PetscErrorCode BVSumQuadrature(BV S,BV Y,PetscInt M,PetscInt L,PetscInt L_max,PetscScalar *w,PetscScalar *zn,VecScatter scat,PetscSubcomm subcomm,PetscInt npoints,PetscBool useconj)
109 : {
110 88 : PetscInt i,j,k,nloc;
111 88 : Vec v,sj;
112 88 : PetscScalar *ppk,*pv,one=1.0;
113 :
114 88 : PetscFunctionBegin;
115 88 : PetscValidHeaderSpecific(S,BV_CLASSID,1);
116 88 : PetscValidHeaderSpecific(Y,BV_CLASSID,2);
117 88 : if (scat) PetscValidHeaderSpecific(scat,PETSCSF_CLASSID,8);
118 :
119 88 : PetscCall(BVGetSizes(Y,&nloc,NULL,NULL));
120 88 : PetscCall(PetscMalloc1(npoints,&ppk));
121 2116 : for (i=0;i<npoints;i++) ppk[i] = 1.0;
122 88 : PetscCall(BVCreateVec(Y,&v));
123 716 : for (k=0;k<M;k++) {
124 8480 : for (j=0;j<L;j++) {
125 7852 : PetscCall(VecSet(v,0.0));
126 195852 : for (i=0;i<npoints;i++) {
127 188000 : PetscCall(BVSetActiveColumns(Y,i*L_max+j,i*L_max+j+1));
128 188000 : PetscCall(BVMultVec(Y,ppk[i]*w[p_id(i)],1.0,v,&one));
129 : }
130 7852 : if (PetscUnlikely(useconj)) {
131 1120 : PetscCall(VecGetArray(v,&pv));
132 674656 : for (i=0;i<nloc;i++) pv[i] = 2.0*PetscRealPart(pv[i]);
133 1120 : PetscCall(VecRestoreArray(v,&pv));
134 : }
135 7852 : PetscCall(BVGetColumn(S,k*L+j,&sj));
136 7852 : if (PetscUnlikely(scat)) {
137 2768 : PetscCall(VecScatterBegin(scat,v,sj,ADD_VALUES,SCATTER_REVERSE));
138 2768 : PetscCall(VecScatterEnd(scat,v,sj,ADD_VALUES,SCATTER_REVERSE));
139 5084 : } else PetscCall(VecCopy(v,sj));
140 7852 : PetscCall(BVRestoreColumn(S,k*L+j,&sj));
141 : }
142 15108 : for (i=0;i<npoints;i++) ppk[i] *= zn[p_id(i)];
143 : }
144 88 : PetscCall(PetscFree(ppk));
145 88 : PetscCall(VecDestroy(&v));
146 88 : PetscFunctionReturn(PETSC_SUCCESS);
147 : }
148 :
149 : /*@
150 : BVDotQuadrature - Computes the projection terms required in the quadrature
151 : rule to approximate the contour integral.
152 :
153 : Collective
154 :
155 : Input Parameters:
156 : + Y - first basis vectors
157 : . V - second basis vectors
158 : . M - number of moments
159 : . L - block size
160 : . L_max - maximum block size
161 : . w - quadrature weights
162 : . zn - normalized quadrature points
163 : . subcomm - subcommunicator layout
164 : . npoints - number of points to process by the subcommunicator
165 : - useconj - whether conjugate points can be used or not
166 :
167 : Output Parameter:
168 : . Mu - computed result
169 :
170 : Notes:
171 : This is a generalization of BVDot(). The resulting matrix Mu consists of M
172 : blocks of size LxL (placed horizontally), each of them computed as
173 : Mu_k = sum_j w_j*zn_j^k*V'*Y_j, where Y_j is the j-th panel of Y containing
174 : the result of solving T(z_j)^{-1}*X for each integration point j. L_max is
175 : the width of the panels in Y.
176 :
177 : When using subcommunicators, Y is stored in the subcommunicators for a subset
178 : of integration points. In that case, the computation is done in the subcomm
179 : and then the final result is combined via reduction.
180 : The value npoints is the number of points to be processed in this subcomm
181 : and the flag useconj indicates whether symmetric points can be reused.
182 :
183 : Level: developer
184 :
185 : .seealso: BVDot(), BVScatter(), BVSumQuadrature(), RGComputeQuadrature(), RGCanUseConjugates()
186 : @*/
187 26 : PetscErrorCode BVDotQuadrature(BV Y,BV V,PetscScalar *Mu,PetscInt M,PetscInt L,PetscInt L_max,PetscScalar *w,PetscScalar *zn,PetscSubcomm subcomm,PetscInt npoints,PetscBool useconj)
188 : {
189 26 : PetscMPIInt sub_size,count;
190 26 : PetscInt i,j,k,s;
191 26 : PetscScalar *temp,*temp2,*ppk,alp;
192 26 : Mat H;
193 26 : const PetscScalar *pH;
194 26 : MPI_Comm child,parent;
195 :
196 26 : PetscFunctionBegin;
197 26 : PetscValidHeaderSpecific(Y,BV_CLASSID,1);
198 26 : PetscValidHeaderSpecific(V,BV_CLASSID,2);
199 :
200 26 : PetscCall(PetscSubcommGetChild(subcomm,&child));
201 26 : PetscCallMPI(MPI_Comm_size(child,&sub_size));
202 26 : PetscCall(PetscMalloc3(npoints*L*(L+1),&temp,2*M*L*L,&temp2,npoints,&ppk));
203 26 : PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,L,L_max*npoints,NULL,&H));
204 26 : PetscCall(PetscArrayzero(temp2,2*M*L*L));
205 26 : PetscCall(BVSetActiveColumns(Y,0,L_max*npoints));
206 26 : PetscCall(BVSetActiveColumns(V,0,L));
207 26 : PetscCall(BVDot(Y,V,H));
208 26 : PetscCall(MatDenseGetArrayRead(H,&pH));
209 718 : for (i=0;i<npoints;i++) {
210 8212 : for (j=0;j<L;j++) {
211 110144 : for (k=0;k<L;k++) {
212 102624 : temp[k+j*L+i*L*L] = pH[k+j*L+i*L*L_max];
213 : }
214 : }
215 : }
216 26 : PetscCall(MatDenseRestoreArrayRead(H,&pH));
217 718 : for (i=0;i<npoints;i++) ppk[i] = 1;
218 354 : for (k=0;k<2*M;k++) {
219 3344 : for (j=0;j<L;j++) {
220 94408 : for (i=0;i<npoints;i++) {
221 91392 : alp = ppk[i]*w[p_id(i)];
222 1327104 : for (s=0;s<L;s++) {
223 1235712 : if (!useconj) temp2[s+(j+k*L)*L] += alp*temp[s+(j+i*L)*L];
224 16896 : else temp2[s+(j+k*L)*L] += 2.0*PetscRealPart(alp*temp[s+(j+i*L)*L]);
225 : }
226 : }
227 : }
228 9096 : for (i=0;i<npoints;i++) ppk[i] *= zn[p_id(i)];
229 : }
230 38802 : for (i=0;i<2*M*L*L;i++) temp2[i] /= sub_size;
231 26 : PetscCall(PetscMPIIntCast(2*M*L*L,&count));
232 26 : PetscCall(PetscSubcommGetParent(subcomm,&parent));
233 78 : PetscCallMPI(MPIU_Allreduce(temp2,Mu,count,MPIU_SCALAR,MPIU_SUM,parent));
234 26 : PetscCall(PetscFree3(temp,temp2,ppk));
235 26 : PetscCall(MatDestroy(&H));
236 26 : PetscFunctionReturn(PETSC_SUCCESS);
237 : }
238 :
239 : /*@
240 : BVTraceQuadrature - Computes an estimate of the number of eigenvalues
241 : inside a region via quantities computed in the quadrature rule of
242 : contour integral methods.
243 :
244 : Collective
245 :
246 : Input Parameters:
247 : + Y - first basis vectors
248 : . V - second basis vectors
249 : . L - block size
250 : . L_max - maximum block size
251 : . w - quadrature weights
252 : . scat - (optional) VecScatter object to communicate between subcommunicators
253 : . subcomm - subcommunicator layout
254 : . npoints - number of points to process by the subcommunicator
255 : - useconj - whether conjugate points can be used or not
256 :
257 : Output Parameter:
258 : . est_eig - estimated eigenvalue count
259 :
260 : Notes:
261 : This function returns an estimation of the number of eigenvalues in the
262 : region, computed as trace(V'*S_0), where S_0 is the first panel of S
263 : computed by BVSumQuadrature().
264 :
265 : When using subcommunicators, Y is stored in the subcommunicators for a subset
266 : of integration points. In that case, the computation is done in the subcomm
267 : and then scattered to the whole communicator in S using the VecScatter scat.
268 : The value npoints is the number of points to be processed in this subcomm
269 : and the flag useconj indicates whether symmetric points can be reused.
270 :
271 : Level: developer
272 :
273 : .seealso: BVScatter(), BVDotQuadrature(), BVSumQuadrature(), RGComputeQuadrature(), RGCanUseConjugates()
274 : @*/
275 73 : PetscErrorCode BVTraceQuadrature(BV Y,BV V,PetscInt L,PetscInt L_max,PetscScalar *w,VecScatter scat,PetscSubcomm subcomm,PetscInt npoints,PetscBool useconj,PetscReal *est_eig)
276 : {
277 73 : PetscInt i,j;
278 73 : Vec y,yall,vj;
279 73 : PetscScalar dot,sum=0.0,one=1.0;
280 :
281 73 : PetscFunctionBegin;
282 73 : PetscValidHeaderSpecific(Y,BV_CLASSID,1);
283 73 : PetscValidHeaderSpecific(V,BV_CLASSID,2);
284 73 : if (scat) PetscValidHeaderSpecific(scat,PETSCSF_CLASSID,6);
285 :
286 73 : PetscCall(BVCreateVec(Y,&y));
287 73 : PetscCall(BVCreateVec(V,&yall));
288 952 : for (j=0;j<L;j++) {
289 879 : PetscCall(VecSet(y,0.0));
290 21003 : for (i=0;i<npoints;i++) {
291 20124 : PetscCall(BVSetActiveColumns(Y,i*L_max+j,i*L_max+j+1));
292 20124 : PetscCall(BVMultVec(Y,w[p_id(i)],1.0,y,&one));
293 : }
294 879 : PetscCall(BVGetColumn(V,j,&vj));
295 879 : if (scat) {
296 394 : PetscCall(VecScatterBegin(scat,y,yall,ADD_VALUES,SCATTER_REVERSE));
297 394 : PetscCall(VecScatterEnd(scat,y,yall,ADD_VALUES,SCATTER_REVERSE));
298 394 : PetscCall(VecDot(vj,yall,&dot));
299 485 : } else PetscCall(VecDot(vj,y,&dot));
300 879 : PetscCall(BVRestoreColumn(V,j,&vj));
301 879 : if (useconj) sum += 2.0*PetscRealPart(dot);
302 755 : else sum += dot;
303 : }
304 73 : *est_eig = PetscAbsScalar(sum)/(PetscReal)L;
305 73 : PetscCall(VecDestroy(&y));
306 73 : PetscCall(VecDestroy(&yall));
307 73 : PetscFunctionReturn(PETSC_SUCCESS);
308 : }
309 :
310 28 : static PetscErrorCode BVSVDAndRank_Refine(BV S,PetscReal delta,PetscScalar *pA,PetscReal *sigma,PetscInt *rank)
311 : {
312 28 : PetscInt i,j,k,ml=S->k;
313 28 : PetscMPIInt len;
314 28 : PetscScalar *work,*B,*tempB,*sarray,*Q1,*Q2,*temp2,alpha=1.0,beta=0.0;
315 28 : PetscBLASInt l,m,n,lda,ldu,ldvt,lwork,info,ldb,ldc,lds;
316 : #if defined(PETSC_USE_COMPLEX)
317 28 : PetscReal *rwork;
318 : #endif
319 :
320 28 : PetscFunctionBegin;
321 28 : PetscCall(PetscBLASIntCast(S->ld,&lds));
322 28 : PetscCall(BVGetArray(S,&sarray));
323 28 : PetscCall(PetscMalloc6(ml*ml,&temp2,S->n*ml,&Q1,S->n*ml,&Q2,ml*ml,&B,ml*ml,&tempB,5*ml,&work));
324 : #if defined(PETSC_USE_COMPLEX)
325 28 : PetscCall(PetscMalloc1(5*ml,&rwork));
326 : #endif
327 28 : PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
328 :
329 28 : PetscCall(PetscArrayzero(B,ml*ml));
330 3104 : for (i=0;i<ml;i++) B[i*ml+i]=1;
331 :
332 84 : for (k=0;k<2;k++) {
333 56 : PetscCall(PetscBLASIntCast(S->n,&m));
334 56 : PetscCall(PetscBLASIntCast(ml,&l));
335 56 : n = l; lda = m; ldb = m; ldc = l;
336 56 : if (!k) PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&l,&n,&m,&alpha,sarray,&lds,sarray,&lds,&beta,pA,&ldc));
337 28 : else PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&l,&n,&m,&alpha,Q1,&lda,Q1,&ldb,&beta,pA,&ldc));
338 56 : PetscCall(PetscArrayzero(temp2,ml*ml));
339 56 : PetscCall(PetscMPIIntCast(ml*ml,&len));
340 168 : PetscCallMPI(MPIU_Allreduce(pA,temp2,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)S)));
341 :
342 56 : PetscCall(PetscBLASIntCast(ml,&m));
343 56 : n = m; lda = m; lwork = 5*m, ldu = 1; ldvt = 1;
344 : #if defined(PETSC_USE_COMPLEX)
345 56 : PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&m,&n,temp2,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,rwork,&info));
346 : #else
347 : PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&m,&n,temp2,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,&info));
348 : #endif
349 56 : SlepcCheckLapackInfo("gesvd",info);
350 :
351 56 : PetscCall(PetscBLASIntCast(S->n,&l));
352 56 : PetscCall(PetscBLASIntCast(ml,&n));
353 56 : m = n; lda = l; ldb = m; ldc = l;
354 56 : if (!k) PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,sarray,&lds,temp2,&ldb,&beta,Q1,&ldc));
355 28 : else PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,Q1,&lda,temp2,&ldb,&beta,Q2,&ldc));
356 :
357 56 : PetscCall(PetscBLASIntCast(ml,&l));
358 56 : m = l; n = l; lda = l; ldb = m; ldc = l;
359 56 : PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,B,&lda,temp2,&ldb,&beta,tempB,&ldc));
360 6208 : for (i=0;i<ml;i++) {
361 6152 : sigma[i] = PetscSqrtReal(sigma[i]);
362 2475624 : for (j=0;j<S->n;j++) {
363 2469472 : if (k%2) Q2[j+i*S->n] /= sigma[i];
364 1234736 : else Q1[j+i*S->n] /= sigma[i];
365 : }
366 753064 : for (j=0;j<ml;j++) B[j+i*ml] = tempB[j+i*ml]*sigma[i];
367 : }
368 : }
369 :
370 28 : PetscCall(PetscBLASIntCast(ml,&m));
371 28 : n = m; lda = m; ldu=1; ldvt=1;
372 : #if defined(PETSC_USE_COMPLEX)
373 28 : PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&m,&n,B,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,rwork,&info));
374 : #else
375 : PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&m,&n,B,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,&info));
376 : #endif
377 28 : SlepcCheckLapackInfo("gesvd",info);
378 :
379 28 : PetscCall(PetscBLASIntCast(S->n,&l));
380 28 : PetscCall(PetscBLASIntCast(ml,&n));
381 28 : m = n; lda = l; ldb = m;
382 28 : if (k%2) PetscCallBLAS("BLASgemm",BLASgemm_("N","T",&l,&n,&m,&alpha,Q1,&lda,B,&ldb,&beta,sarray,&lds));
383 28 : else PetscCallBLAS("BLASgemm",BLASgemm_("N","T",&l,&n,&m,&alpha,Q2,&lda,B,&ldb,&beta,sarray,&lds));
384 :
385 28 : PetscCall(PetscFPTrapPop());
386 28 : PetscCall(BVRestoreArray(S,&sarray));
387 :
388 28 : if (rank) {
389 28 : (*rank) = 0;
390 3104 : for (i=0;i<ml;i++) {
391 6152 : if (sigma[i]/PetscMax(sigma[0],1.0)>delta) (*rank)++;
392 : }
393 : }
394 28 : PetscCall(PetscFree6(temp2,Q1,Q2,B,tempB,work));
395 : #if defined(PETSC_USE_COMPLEX)
396 28 : PetscCall(PetscFree(rwork));
397 : #endif
398 28 : PetscFunctionReturn(PETSC_SUCCESS);
399 : }
400 :
401 34 : static PetscErrorCode BVSVDAndRank_QR(BV S,PetscReal delta,PetscScalar *pA,PetscReal *sigma,PetscInt *rank)
402 : {
403 34 : PetscInt i,n,ml=S->k;
404 34 : PetscBLASInt m,lda,lwork,info;
405 34 : PetscScalar *work;
406 34 : PetscReal *rwork;
407 34 : Mat A;
408 34 : Vec v;
409 :
410 34 : PetscFunctionBegin;
411 : /* Compute QR factorizaton of S */
412 34 : PetscCall(BVGetSizes(S,NULL,&n,NULL));
413 34 : n = PetscMin(n,ml);
414 34 : PetscCall(BVSetActiveColumns(S,0,n));
415 34 : PetscCall(PetscArrayzero(pA,ml*n));
416 34 : PetscCall(MatCreateDense(PETSC_COMM_SELF,n,n,PETSC_DECIDE,PETSC_DECIDE,pA,&A));
417 34 : PetscCall(BVOrthogonalize(S,A));
418 34 : if (n<ml) {
419 : /* the rest of the factorization */
420 533 : for (i=n;i<ml;i++) {
421 522 : PetscCall(BVGetColumn(S,i,&v));
422 522 : PetscCall(BVOrthogonalizeVec(S,v,pA+i*n,NULL,NULL));
423 522 : PetscCall(BVRestoreColumn(S,i,&v));
424 : }
425 : }
426 34 : PetscCall(PetscBLASIntCast(n,&lda));
427 34 : PetscCall(PetscBLASIntCast(ml,&m));
428 34 : PetscCall(PetscMalloc2(5*ml,&work,5*ml,&rwork));
429 34 : lwork = 5*m;
430 34 : PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
431 : #if !defined (PETSC_USE_COMPLEX)
432 : PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&lda,&m,pA,&lda,sigma,NULL,&lda,NULL,&lda,work,&lwork,&info));
433 : #else
434 34 : PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&lda,&m,pA,&lda,sigma,NULL,&lda,NULL,&lda,work,&lwork,rwork,&info));
435 : #endif
436 34 : SlepcCheckLapackInfo("gesvd",info);
437 34 : PetscCall(PetscFPTrapPop());
438 34 : *rank = 0;
439 2848 : for (i=0;i<n;i++) {
440 5628 : if (sigma[i]/PetscMax(sigma[0],1)>delta) (*rank)++;
441 : }
442 : /* n first columns of A have the left singular vectors */
443 34 : PetscCall(BVMultInPlace(S,A,0,*rank));
444 34 : PetscCall(PetscFree2(work,rwork));
445 34 : PetscCall(MatDestroy(&A));
446 34 : PetscFunctionReturn(PETSC_SUCCESS);
447 : }
448 :
449 9 : static PetscErrorCode BVSVDAndRank_QR_CAA(BV S,PetscInt M,PetscInt L,PetscReal delta,PetscScalar *pA,PetscReal *sigma,PetscInt *rank)
450 : {
451 9 : PetscInt i,j,n,ml=S->k;
452 9 : PetscBLASInt m,k_,lda,lwork,info;
453 9 : PetscScalar *work,*T,*U,*R,sone=1.0,zero=0.0;
454 9 : PetscReal *rwork;
455 9 : Mat A;
456 :
457 9 : PetscFunctionBegin;
458 : /* Compute QR factorizaton of S */
459 9 : PetscCall(BVGetSizes(S,NULL,&n,NULL));
460 9 : PetscCheck(n>=ml,PetscObjectComm((PetscObject)S),PETSC_ERR_SUP,"The QR_CAA method does not support problem size n < m*L");
461 9 : PetscCall(BVSetActiveColumns(S,0,ml));
462 9 : PetscCall(PetscArrayzero(pA,ml*ml));
463 9 : PetscCall(MatCreateDense(PETSC_COMM_SELF,ml,ml,PETSC_DECIDE,PETSC_DECIDE,pA,&A));
464 9 : PetscCall(BVOrthogonalize(S,A));
465 9 : PetscCall(MatDestroy(&A));
466 :
467 : /* SVD of first (M-1)*L diagonal block */
468 9 : PetscCall(PetscBLASIntCast((M-1)*L,&m));
469 9 : PetscCall(PetscMalloc5(m*m,&T,m*m,&R,m*m,&U,5*ml,&work,5*ml,&rwork));
470 305 : for (j=0;j<m;j++) PetscCall(PetscArraycpy(R+j*m,pA+j*ml,m));
471 9 : lwork = 5*m;
472 9 : PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
473 : #if !defined (PETSC_USE_COMPLEX)
474 : PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&m,&m,R,&m,sigma,U,&m,NULL,&m,work,&lwork,&info));
475 : #else
476 9 : PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&m,&m,R,&m,sigma,U,&m,NULL,&m,work,&lwork,rwork,&info));
477 : #endif
478 9 : SlepcCheckLapackInfo("gesvd",info);
479 9 : PetscCall(PetscFPTrapPop());
480 9 : *rank = 0;
481 305 : for (i=0;i<m;i++) {
482 592 : if (sigma[i]/PetscMax(sigma[0],1)>delta) (*rank)++;
483 : }
484 9 : PetscCall(MatCreateDense(PETSC_COMM_SELF,m,m,PETSC_DECIDE,PETSC_DECIDE,U,&A));
485 9 : PetscCall(BVSetActiveColumns(S,0,m));
486 9 : PetscCall(BVMultInPlace(S,A,0,*rank));
487 9 : PetscCall(MatDestroy(&A));
488 : /* Projected linear system */
489 : /* m first columns of A have the right singular vectors */
490 9 : PetscCall(PetscBLASIntCast(*rank,&k_));
491 9 : PetscCall(PetscBLASIntCast(ml,&lda));
492 9 : PetscCallBLAS("BLASgemm",BLASgemm_("N","C",&m,&k_,&m,&sone,pA+L*lda,&lda,R,&m,&zero,T,&m));
493 9 : PetscCall(PetscArrayzero(pA,ml*ml));
494 9 : PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&m,&sone,U,&m,T,&m,&zero,pA,&k_));
495 793 : for (j=0;j<k_;j++) for (i=0;i<k_;i++) pA[j*k_+i] /= sigma[j];
496 9 : PetscCall(PetscFree5(T,R,U,work,rwork));
497 9 : PetscFunctionReturn(PETSC_SUCCESS);
498 : }
499 :
500 : /*@
501 : BVSVDAndRank - Compute the SVD (left singular vectors only, and singular
502 : values) and determine the numerical rank according to a tolerance.
503 :
504 : Collective
505 :
506 : Input Parameters:
507 : + S - the basis vectors
508 : . m - the moment degree
509 : . l - the block size
510 : . delta - the tolerance used to determine the rank
511 : - meth - the method to be used
512 :
513 : Output Parameters:
514 : + A - workspace, on output contains relevant values in the CAA method
515 : . sigma - computed singular values
516 : - rank - estimated rank (optional)
517 :
518 : Notes:
519 : This function computes [U,Sigma,V] = svd(S) and replaces S with U.
520 : The current implementation computes this via S'*S, and it may include
521 : some kind of iterative refinement to improve accuracy in some cases.
522 :
523 : The parameters m and l refer to the moment and block size of contour
524 : integral methods. All columns up to m*l are modified, and the active
525 : columns are set to 0..m*l.
526 :
527 : The method is one of BV_SVD_METHOD_REFINE, BV_SVD_METHOD_QR, BV_SVD_METHOD_QR_CAA.
528 :
529 : The A workspace should be m*l*m*l in size.
530 :
531 : Once the decomposition is computed, the numerical rank is estimated
532 : by counting the number of singular values that are larger than the
533 : tolerance delta, relative to the first singular value.
534 :
535 : Level: developer
536 :
537 : .seealso: BVSetActiveColumns()
538 : @*/
539 71 : PetscErrorCode BVSVDAndRank(BV S,PetscInt m,PetscInt l,PetscReal delta,BVSVDMethod meth,PetscScalar *A,PetscReal *sigma,PetscInt *rank)
540 : {
541 71 : PetscFunctionBegin;
542 71 : PetscValidHeaderSpecific(S,BV_CLASSID,1);
543 213 : PetscValidLogicalCollectiveInt(S,m,2);
544 213 : PetscValidLogicalCollectiveInt(S,l,3);
545 213 : PetscValidLogicalCollectiveReal(S,delta,4);
546 213 : PetscValidLogicalCollectiveEnum(S,meth,5);
547 71 : PetscAssertPointer(A,6);
548 71 : PetscAssertPointer(sigma,7);
549 71 : PetscAssertPointer(rank,8);
550 :
551 71 : PetscCall(PetscLogEventBegin(BV_SVDAndRank,S,0,0,0));
552 71 : PetscCall(BVSetActiveColumns(S,0,m*l));
553 71 : switch (meth) {
554 28 : case BV_SVD_METHOD_REFINE:
555 28 : PetscCall(BVSVDAndRank_Refine(S,delta,A,sigma,rank));
556 : break;
557 34 : case BV_SVD_METHOD_QR:
558 34 : PetscCall(BVSVDAndRank_QR(S,delta,A,sigma,rank));
559 : break;
560 9 : case BV_SVD_METHOD_QR_CAA:
561 9 : PetscCall(BVSVDAndRank_QR_CAA(S,m,l,delta,A,sigma,rank));
562 : break;
563 : }
564 71 : PetscCall(PetscLogEventEnd(BV_SVDAndRank,S,0,0,0));
565 71 : PetscFunctionReturn(PETSC_SUCCESS);
566 : }
567 :
568 : /*@
569 : BVCISSResizeBases - Resize the bases involved in CISS solvers when the L grows.
570 :
571 : Logically Collective
572 :
573 : Input Parameters:
574 : + S - basis of L*M columns
575 : . V - basis of L columns (may be associated to subcommunicators)
576 : . Y - basis of npoints*L columns
577 : . Lold - old value of L
578 : . Lnew - new value of L
579 : . M - the moment size
580 : - npoints - number of integration points
581 :
582 : Level: developer
583 :
584 : .seealso: BVResize()
585 : @*/
586 7 : PetscErrorCode BVCISSResizeBases(BV S,BV V,BV Y,PetscInt Lold,PetscInt Lnew,PetscInt M,PetscInt npoints)
587 : {
588 7 : PetscInt i,j;
589 :
590 7 : PetscFunctionBegin;
591 7 : PetscValidHeaderSpecific(S,BV_CLASSID,1);
592 7 : PetscValidHeaderSpecific(V,BV_CLASSID,2);
593 7 : PetscValidHeaderSpecific(Y,BV_CLASSID,3);
594 21 : PetscValidLogicalCollectiveInt(S,Lold,4);
595 21 : PetscValidLogicalCollectiveInt(S,Lnew,5);
596 21 : PetscValidLogicalCollectiveInt(S,M,6);
597 21 : PetscValidLogicalCollectiveInt(S,npoints,7);
598 :
599 7 : PetscCall(BVResize(S,Lnew*M,PETSC_FALSE));
600 7 : PetscCall(BVResize(V,Lnew,PETSC_TRUE));
601 7 : PetscCall(BVResize(Y,Lnew*npoints,PETSC_TRUE));
602 : /* columns of Y are interleaved */
603 215 : for (i=npoints-1;i>=0;i--) {
604 1600 : for (j=Lold-1;j>=0;j--) PetscCall(BVCopyColumn(Y,i*Lold+j,i*Lnew+j));
605 : }
606 7 : PetscFunctionReturn(PETSC_SUCCESS);
607 : }
|