Line data Source code
1 : /*
2 : - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3 : SLEPc - Scalable Library for Eigenvalue Problem Computations
4 : Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
5 :
6 : This file is part of SLEPc.
7 : SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8 : - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9 : */
10 : /*
11 : BV developer functions needed in contour integral methods
12 : */
13 :
14 : #include <slepc/private/bvimpl.h> /*I "slepcbv.h" I*/
15 : #include <slepcblaslapack.h>
16 :
17 : #define p_id(i) (i*subcomm->n + subcomm->color)
18 :
19 : /*@
20 : BVScatter - Scatters the columns of a BV to another BV created in a
21 : subcommunicator.
22 :
23 : Collective
24 :
25 : Input Parameters:
26 : + Vin - input basis vectors (defined on the whole communicator)
27 : . scat - VecScatter object that contains the info for the communication
28 : - xdup - an auxiliary vector
29 :
30 : Output Parameter:
31 : . Vout - output basis vectors (defined on the subcommunicator)
32 :
33 : Notes:
34 : Currently implemented as a loop for each the active column, where each
35 : column is scattered independently. The vector xdup is defined on the
36 : contiguous parent communicator and have enough space to store one
37 : duplicate of the original vector per each subcommunicator.
38 :
39 : Level: developer
40 :
41 : .seealso: BVGetColumn()
42 : @*/
43 8 : PetscErrorCode BVScatter(BV Vin,BV Vout,VecScatter scat,Vec xdup)
44 : {
45 8 : PetscInt i;
46 8 : Vec v,z;
47 8 : const PetscScalar *array;
48 :
49 8 : PetscFunctionBegin;
50 8 : PetscValidHeaderSpecific(Vin,BV_CLASSID,1);
51 8 : PetscValidHeaderSpecific(Vout,BV_CLASSID,2);
52 8 : PetscValidHeaderSpecific(scat,PETSCSF_CLASSID,3);
53 8 : PetscValidHeaderSpecific(xdup,VEC_CLASSID,4);
54 8 : PetscCall(BVCreateVec(Vout,&z));
55 128 : for (i=Vin->l;i<Vin->k;i++) {
56 120 : PetscCall(BVGetColumn(Vin,i,&v));
57 120 : PetscCall(VecScatterBegin(scat,v,xdup,INSERT_VALUES,SCATTER_FORWARD));
58 120 : PetscCall(VecScatterEnd(scat,v,xdup,INSERT_VALUES,SCATTER_FORWARD));
59 120 : PetscCall(BVRestoreColumn(Vin,i,&v));
60 120 : PetscCall(VecGetArrayRead(xdup,&array));
61 120 : PetscCall(VecPlaceArray(z,array));
62 120 : PetscCall(BVInsertVec(Vout,i,z));
63 120 : PetscCall(VecResetArray(z));
64 120 : PetscCall(VecRestoreArrayRead(xdup,&array));
65 : }
66 8 : PetscCall(VecDestroy(&z));
67 8 : PetscFunctionReturn(PETSC_SUCCESS);
68 : }
69 :
70 : /*@
71 : BVSumQuadrature - Computes the sum of terms required in the quadrature
72 : rule to approximate the contour integral.
73 :
74 : Collective
75 :
76 : Input Parameters:
77 : + Y - input basis vectors
78 : . M - number of moments
79 : . L - block size
80 : . L_max - maximum block size
81 : . w - quadrature weights
82 : . zn - normalized quadrature points
83 : . scat - (optional) VecScatter object to communicate between subcommunicators
84 : . subcomm - subcommunicator layout
85 : . npoints - number of points to process by the subcommunicator
86 : - useconj - whether conjugate points can be used or not
87 :
88 : Output Parameter:
89 : . S - output basis vectors
90 :
91 : Notes:
92 : This is a generalization of BVMult(). The resulting matrix S consists of M
93 : panels of L columns, and the following formula is computed for each panel
94 : S_k = sum_j w_j*zn_j^k*Y_j, where Y_j is the j-th panel of Y containing
95 : the result of solving T(z_j)^{-1}*X for each integration point j. L_max is
96 : the width of the panels in Y.
97 :
98 : When using subcommunicators, Y is stored in the subcommunicators for a subset
99 : of integration points. In that case, the computation is done in the subcomm
100 : and then scattered to the whole communicator in S using the VecScatter scat.
101 : The value npoints is the number of points to be processed in this subcomm
102 : and the flag useconj indicates whether symmetric points can be reused.
103 :
104 : Level: developer
105 :
106 : .seealso: BVMult(), BVScatter(), BVDotQuadrature(), RGComputeQuadrature(), RGCanUseConjugates()
107 : @*/
108 26 : PetscErrorCode BVSumQuadrature(BV S,BV Y,PetscInt M,PetscInt L,PetscInt L_max,PetscScalar *w,PetscScalar *zn,VecScatter scat,PetscSubcomm subcomm,PetscInt npoints,PetscBool useconj)
109 : {
110 26 : PetscInt i,j,k,nloc;
111 26 : Vec v,sj;
112 26 : PetscScalar *ppk,*pv,one=1.0;
113 :
114 26 : PetscFunctionBegin;
115 26 : PetscValidHeaderSpecific(S,BV_CLASSID,1);
116 26 : PetscValidHeaderSpecific(Y,BV_CLASSID,2);
117 26 : if (scat) PetscValidHeaderSpecific(scat,PETSCSF_CLASSID,8);
118 :
119 26 : PetscCall(BVGetSizes(Y,&nloc,NULL,NULL));
120 26 : PetscCall(PetscMalloc1(npoints,&ppk));
121 678 : for (i=0;i<npoints;i++) ppk[i] = 1.0;
122 26 : PetscCall(BVCreateVec(Y,&v));
123 230 : for (k=0;k<M;k++) {
124 3128 : for (j=0;j<L;j++) {
125 2924 : PetscCall(VecSet(v,0.0));
126 74668 : for (i=0;i<npoints;i++) {
127 71744 : PetscCall(BVSetActiveColumns(Y,i*L_max+j,i*L_max+j+1));
128 71744 : PetscCall(BVMultVec(Y,ppk[i]*w[p_id(i)],1.0,v,&one));
129 : }
130 2924 : if (PetscUnlikely(useconj)) {
131 0 : PetscCall(VecGetArray(v,&pv));
132 0 : for (i=0;i<nloc;i++) pv[i] = 2.0*PetscRealPart(pv[i]);
133 0 : PetscCall(VecRestoreArray(v,&pv));
134 : }
135 2924 : PetscCall(BVGetColumn(S,k*L+j,&sj));
136 2924 : if (PetscUnlikely(scat)) {
137 960 : PetscCall(VecScatterBegin(scat,v,sj,ADD_VALUES,SCATTER_REVERSE));
138 960 : PetscCall(VecScatterEnd(scat,v,sj,ADD_VALUES,SCATTER_REVERSE));
139 1964 : } else PetscCall(VecCopy(v,sj));
140 2924 : PetscCall(BVRestoreColumn(S,k*L+j,&sj));
141 : }
142 5292 : for (i=0;i<npoints;i++) ppk[i] *= zn[p_id(i)];
143 : }
144 26 : PetscCall(PetscFree(ppk));
145 26 : PetscCall(VecDestroy(&v));
146 26 : PetscFunctionReturn(PETSC_SUCCESS);
147 : }
148 :
149 : /*@
150 : BVDotQuadrature - Computes the projection terms required in the quadrature
151 : rule to approximate the contour integral.
152 :
153 : Collective
154 :
155 : Input Parameters:
156 : + Y - first basis vectors
157 : . V - second basis vectors
158 : . M - number of moments
159 : . L - block size
160 : . L_max - maximum block size
161 : . w - quadrature weights
162 : . zn - normalized quadrature points
163 : . subcomm - subcommunicator layout
164 : . npoints - number of points to process by the subcommunicator
165 : - useconj - whether conjugate points can be used or not
166 :
167 : Output Parameter:
168 : . Mu - computed result
169 :
170 : Notes:
171 : This is a generalization of BVDot(). The resulting matrix Mu consists of M
172 : blocks of size LxL (placed horizontally), each of them computed as
173 : Mu_k = sum_j w_j*zn_j^k*V'*Y_j, where Y_j is the j-th panel of Y containing
174 : the result of solving T(z_j)^{-1}*X for each integration point j. L_max is
175 : the width of the panels in Y.
176 :
177 : When using subcommunicators, Y is stored in the subcommunicators for a subset
178 : of integration points. In that case, the computation is done in the subcomm
179 : and then the final result is combined via reduction.
180 : The value npoints is the number of points to be processed in this subcomm
181 : and the flag useconj indicates whether symmetric points can be reused.
182 :
183 : Level: developer
184 :
185 : .seealso: BVDot(), BVScatter(), BVSumQuadrature(), RGComputeQuadrature(), RGCanUseConjugates()
186 : @*/
187 6 : PetscErrorCode BVDotQuadrature(BV Y,BV V,PetscScalar *Mu,PetscInt M,PetscInt L,PetscInt L_max,PetscScalar *w,PetscScalar *zn,PetscSubcomm subcomm,PetscInt npoints,PetscBool useconj)
188 : {
189 6 : PetscMPIInt sub_size,count;
190 6 : PetscInt i,j,k,s;
191 6 : PetscScalar *temp,*temp2,*ppk,alp;
192 6 : Mat H;
193 6 : const PetscScalar *pH;
194 6 : MPI_Comm child,parent;
195 :
196 6 : PetscFunctionBegin;
197 6 : PetscValidHeaderSpecific(Y,BV_CLASSID,1);
198 6 : PetscValidHeaderSpecific(V,BV_CLASSID,2);
199 :
200 6 : PetscCall(PetscSubcommGetChild(subcomm,&child));
201 6 : PetscCallMPI(MPI_Comm_size(child,&sub_size));
202 6 : PetscCall(PetscMalloc3(npoints*L*(L+1),&temp,2*M*L*L,&temp2,npoints,&ppk));
203 6 : PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,L,L_max*npoints,NULL,&H));
204 6 : PetscCall(PetscArrayzero(temp2,2*M*L*L));
205 6 : PetscCall(BVSetActiveColumns(Y,0,L_max*npoints));
206 6 : PetscCall(BVSetActiveColumns(V,0,L));
207 6 : PetscCall(BVDot(Y,V,H));
208 6 : PetscCall(MatDenseGetArrayRead(H,&pH));
209 190 : for (i=0;i<npoints;i++) {
210 1976 : for (j=0;j<L;j++) {
211 22592 : for (k=0;k<L;k++) {
212 20800 : temp[k+j*L+i*L*L] = pH[k+j*L+i*L*L_max];
213 : }
214 : }
215 : }
216 6 : PetscCall(MatDenseRestoreArrayRead(H,&pH));
217 190 : for (i=0;i<npoints;i++) ppk[i] = 1;
218 94 : for (k=0;k<2*M;k++) {
219 1008 : for (j=0;j<L;j++) {
220 28312 : for (i=0;i<npoints;i++) {
221 27392 : alp = ppk[i]*w[p_id(i)];
222 353792 : for (s=0;s<L;s++) {
223 326400 : if (!useconj) temp2[s+(j+k*L)*L] += alp*temp[s+(j+i*L)*L];
224 0 : else temp2[s+(j+k*L)*L] += 2.0*PetscRealPart(alp*temp[s+(j+i*L)*L]);
225 : }
226 : }
227 : }
228 2776 : for (i=0;i<npoints;i++) ppk[i] *= zn[p_id(i)];
229 : }
230 11230 : for (i=0;i<2*M*L*L;i++) temp2[i] /= sub_size;
231 6 : PetscCall(PetscMPIIntCast(2*M*L*L,&count));
232 6 : PetscCall(PetscSubcommGetParent(subcomm,&parent));
233 18 : PetscCallMPI(MPIU_Allreduce(temp2,Mu,count,MPIU_SCALAR,MPIU_SUM,parent));
234 6 : PetscCall(PetscFree3(temp,temp2,ppk));
235 6 : PetscCall(MatDestroy(&H));
236 6 : PetscFunctionReturn(PETSC_SUCCESS);
237 : }
238 :
239 : /*@
240 : BVTraceQuadrature - Computes an estimate of the number of eigenvalues
241 : inside a region via quantities computed in the quadrature rule of
242 : contour integral methods.
243 :
244 : Collective
245 :
246 : Input Parameters:
247 : + Y - first basis vectors
248 : . V - second basis vectors
249 : . L - block size
250 : . L_max - maximum block size
251 : . w - quadrature weights
252 : . scat - (optional) VecScatter object to communicate between subcommunicators
253 : . subcomm - subcommunicator layout
254 : . npoints - number of points to process by the subcommunicator
255 : - useconj - whether conjugate points can be used or not
256 :
257 : Output Parameter:
258 : . est_eig - estimated eigenvalue count
259 :
260 : Notes:
261 : This function returns an estimation of the number of eigenvalues in the
262 : region, computed as trace(V'*S_0), where S_0 is the first panel of S
263 : computed by BVSumQuadrature().
264 :
265 : When using subcommunicators, Y is stored in the subcommunicators for a subset
266 : of integration points. In that case, the computation is done in the subcomm
267 : and then scattered to the whole communicator in S using the VecScatter scat.
268 : The value npoints is the number of points to be processed in this subcomm
269 : and the flag useconj indicates whether symmetric points can be reused.
270 :
271 : Level: developer
272 :
273 : .seealso: BVScatter(), BVDotQuadrature(), BVSumQuadrature(), RGComputeQuadrature(), RGCanUseConjugates()
274 : @*/
275 0 : PetscErrorCode BVTraceQuadrature(BV Y,BV V,PetscInt L,PetscInt L_max,PetscScalar *w,VecScatter scat,PetscSubcomm subcomm,PetscInt npoints,PetscBool useconj,PetscReal *est_eig)
276 : {
277 0 : PetscInt i,j;
278 0 : Vec y,yall,vj;
279 0 : PetscScalar dot,sum=0.0,one=1.0;
280 :
281 0 : PetscFunctionBegin;
282 0 : PetscValidHeaderSpecific(Y,BV_CLASSID,1);
283 0 : PetscValidHeaderSpecific(V,BV_CLASSID,2);
284 0 : if (scat) PetscValidHeaderSpecific(scat,PETSCSF_CLASSID,6);
285 :
286 0 : PetscCall(BVCreateVec(Y,&y));
287 0 : PetscCall(BVCreateVec(V,&yall));
288 0 : for (j=0;j<L;j++) {
289 0 : PetscCall(VecSet(y,0.0));
290 0 : for (i=0;i<npoints;i++) {
291 0 : PetscCall(BVSetActiveColumns(Y,i*L_max+j,i*L_max+j+1));
292 0 : PetscCall(BVMultVec(Y,w[p_id(i)],1.0,y,&one));
293 : }
294 0 : PetscCall(BVGetColumn(V,j,&vj));
295 0 : if (scat) {
296 0 : PetscCall(VecScatterBegin(scat,y,yall,ADD_VALUES,SCATTER_REVERSE));
297 0 : PetscCall(VecScatterEnd(scat,y,yall,ADD_VALUES,SCATTER_REVERSE));
298 0 : PetscCall(VecDot(vj,yall,&dot));
299 0 : } else PetscCall(VecDot(vj,y,&dot));
300 0 : PetscCall(BVRestoreColumn(V,j,&vj));
301 0 : if (useconj) sum += 2.0*PetscRealPart(dot);
302 0 : else sum += dot;
303 : }
304 0 : *est_eig = PetscAbsScalar(sum)/(PetscReal)L;
305 0 : PetscCall(VecDestroy(&y));
306 0 : PetscCall(VecDestroy(&yall));
307 0 : PetscFunctionReturn(PETSC_SUCCESS);
308 : }
309 :
310 21 : static PetscErrorCode BVSVDAndRank_Refine(BV S,PetscReal delta,PetscScalar *pA,PetscReal *sigma,PetscInt *rank)
311 : {
312 21 : PetscInt i,j,k,ml=S->k;
313 21 : PetscMPIInt len;
314 21 : PetscScalar *work,*B,*tempB,*sarray,*Q1,*Q2,*temp2,alpha=1.0,beta=0.0;
315 21 : PetscBLASInt l,m,n,lda,ldu,ldvt,lwork,info,ldb,ldc,lds;
316 : #if defined(PETSC_USE_COMPLEX)
317 : PetscReal *rwork;
318 : #endif
319 :
320 21 : PetscFunctionBegin;
321 21 : PetscCall(PetscBLASIntCast(S->ld,&lds));
322 21 : PetscCall(BVGetArray(S,&sarray));
323 21 : PetscCall(PetscMalloc6(ml*ml,&temp2,S->n*ml,&Q1,S->n*ml,&Q2,ml*ml,&B,ml*ml,&tempB,5*ml,&work));
324 : #if defined(PETSC_USE_COMPLEX)
325 : PetscCall(PetscMalloc1(5*ml,&rwork));
326 : #endif
327 21 : PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
328 :
329 21 : PetscCall(PetscArrayzero(B,ml*ml));
330 2505 : for (i=0;i<ml;i++) B[i*ml+i]=1;
331 :
332 63 : for (k=0;k<2;k++) {
333 42 : PetscCall(PetscBLASIntCast(S->n,&m));
334 42 : PetscCall(PetscBLASIntCast(ml,&l));
335 42 : n = l; lda = m; ldb = m; ldc = l;
336 42 : if (!k) PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&l,&n,&m,&alpha,sarray,&lds,sarray,&lds,&beta,pA,&ldc));
337 21 : else PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&l,&n,&m,&alpha,Q1,&lda,Q1,&ldb,&beta,pA,&ldc));
338 42 : PetscCall(PetscArrayzero(temp2,ml*ml));
339 42 : PetscCall(PetscMPIIntCast(ml*ml,&len));
340 126 : PetscCallMPI(MPIU_Allreduce(pA,temp2,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)S)));
341 :
342 42 : PetscCall(PetscBLASIntCast(ml,&m));
343 42 : n = m; lda = m; lwork = 5*m, ldu = 1; ldvt = 1;
344 : #if defined(PETSC_USE_COMPLEX)
345 : PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&m,&n,temp2,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,rwork,&info));
346 : #else
347 42 : PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&m,&n,temp2,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,&info));
348 : #endif
349 42 : SlepcCheckLapackInfo("gesvd",info);
350 :
351 42 : PetscCall(PetscBLASIntCast(S->n,&l));
352 42 : PetscCall(PetscBLASIntCast(ml,&n));
353 42 : m = n; lda = l; ldb = m; ldc = l;
354 42 : if (!k) PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,sarray,&lds,temp2,&ldb,&beta,Q1,&ldc));
355 21 : else PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,Q1,&lda,temp2,&ldb,&beta,Q2,&ldc));
356 :
357 42 : PetscCall(PetscBLASIntCast(ml,&l));
358 42 : m = l; n = l; lda = l; ldb = m; ldc = l;
359 42 : PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,B,&lda,temp2,&ldb,&beta,tempB,&ldc));
360 5010 : for (i=0;i<ml;i++) {
361 4968 : sigma[i] = PetscSqrtReal(sigma[i]);
362 1660712 : for (j=0;j<S->n;j++) {
363 1655744 : if (k%2) Q2[j+i*S->n] /= sigma[i];
364 827872 : else Q1[j+i*S->n] /= sigma[i];
365 : }
366 616584 : for (j=0;j<ml;j++) B[j+i*ml] = tempB[j+i*ml]*sigma[i];
367 : }
368 : }
369 :
370 21 : PetscCall(PetscBLASIntCast(ml,&m));
371 21 : n = m; lda = m; ldu=1; ldvt=1;
372 : #if defined(PETSC_USE_COMPLEX)
373 : PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&m,&n,B,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,rwork,&info));
374 : #else
375 21 : PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&m,&n,B,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,&info));
376 : #endif
377 21 : SlepcCheckLapackInfo("gesvd",info);
378 :
379 21 : PetscCall(PetscBLASIntCast(S->n,&l));
380 21 : PetscCall(PetscBLASIntCast(ml,&n));
381 21 : m = n; lda = l; ldb = m;
382 21 : if (k%2) PetscCallBLAS("BLASgemm",BLASgemm_("N","T",&l,&n,&m,&alpha,Q1,&lda,B,&ldb,&beta,sarray,&lds));
383 21 : else PetscCallBLAS("BLASgemm",BLASgemm_("N","T",&l,&n,&m,&alpha,Q2,&lda,B,&ldb,&beta,sarray,&lds));
384 :
385 21 : PetscCall(PetscFPTrapPop());
386 21 : PetscCall(BVRestoreArray(S,&sarray));
387 :
388 21 : if (rank) {
389 21 : (*rank) = 0;
390 2505 : for (i=0;i<ml;i++) {
391 4840 : if (sigma[i]/PetscMax(sigma[0],1.0)>delta) (*rank)++;
392 : }
393 : }
394 21 : PetscCall(PetscFree6(temp2,Q1,Q2,B,tempB,work));
395 : #if defined(PETSC_USE_COMPLEX)
396 : PetscCall(PetscFree(rwork));
397 : #endif
398 21 : PetscFunctionReturn(PETSC_SUCCESS);
399 : }
400 :
401 0 : static PetscErrorCode BVSVDAndRank_QR(BV S,PetscReal delta,PetscScalar *pA,PetscReal *sigma,PetscInt *rank)
402 : {
403 0 : PetscInt i,n,ml=S->k;
404 0 : PetscBLASInt m,lda,lwork,info;
405 0 : PetscScalar *work;
406 0 : PetscReal *rwork;
407 0 : Mat A;
408 0 : Vec v;
409 :
410 0 : PetscFunctionBegin;
411 : /* Compute QR factorizaton of S */
412 0 : PetscCall(BVGetSizes(S,NULL,&n,NULL));
413 0 : n = PetscMin(n,ml);
414 0 : PetscCall(BVSetActiveColumns(S,0,n));
415 0 : PetscCall(PetscArrayzero(pA,ml*n));
416 0 : PetscCall(MatCreateDense(PETSC_COMM_SELF,n,n,PETSC_DECIDE,PETSC_DECIDE,pA,&A));
417 0 : PetscCall(BVOrthogonalize(S,A));
418 0 : if (n<ml) {
419 : /* the rest of the factorization */
420 0 : for (i=n;i<ml;i++) {
421 0 : PetscCall(BVGetColumn(S,i,&v));
422 0 : PetscCall(BVOrthogonalizeVec(S,v,pA+i*n,NULL,NULL));
423 0 : PetscCall(BVRestoreColumn(S,i,&v));
424 : }
425 : }
426 0 : PetscCall(PetscBLASIntCast(n,&lda));
427 0 : PetscCall(PetscBLASIntCast(ml,&m));
428 0 : PetscCall(PetscMalloc2(5*ml,&work,5*ml,&rwork));
429 0 : lwork = 5*m;
430 0 : PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
431 : #if !defined (PETSC_USE_COMPLEX)
432 0 : PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&lda,&m,pA,&lda,sigma,NULL,&lda,NULL,&lda,work,&lwork,&info));
433 : #else
434 : PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&lda,&m,pA,&lda,sigma,NULL,&lda,NULL,&lda,work,&lwork,rwork,&info));
435 : #endif
436 0 : SlepcCheckLapackInfo("gesvd",info);
437 0 : PetscCall(PetscFPTrapPop());
438 0 : *rank = 0;
439 0 : for (i=0;i<n;i++) {
440 0 : if (sigma[i]/PetscMax(sigma[0],1)>delta) (*rank)++;
441 : }
442 : /* n first columns of A have the left singular vectors */
443 0 : PetscCall(BVMultInPlace(S,A,0,*rank));
444 0 : PetscCall(PetscFree2(work,rwork));
445 0 : PetscCall(MatDestroy(&A));
446 0 : PetscFunctionReturn(PETSC_SUCCESS);
447 : }
448 :
449 0 : static PetscErrorCode BVSVDAndRank_QR_CAA(BV S,PetscInt M,PetscInt L,PetscReal delta,PetscScalar *pA,PetscReal *sigma,PetscInt *rank)
450 : {
451 0 : PetscInt i,j,n,ml=S->k;
452 0 : PetscBLASInt m,k_,lda,lwork,info;
453 0 : PetscScalar *work,*T,*U,*R,sone=1.0,zero=0.0;
454 0 : PetscReal *rwork;
455 0 : Mat A;
456 :
457 0 : PetscFunctionBegin;
458 : /* Compute QR factorizaton of S */
459 0 : PetscCall(BVGetSizes(S,NULL,&n,NULL));
460 0 : PetscCheck(n>=ml,PetscObjectComm((PetscObject)S),PETSC_ERR_SUP,"The QR_CAA method does not support problem size n < m*L");
461 0 : PetscCall(BVSetActiveColumns(S,0,ml));
462 0 : PetscCall(PetscArrayzero(pA,ml*ml));
463 0 : PetscCall(MatCreateDense(PETSC_COMM_SELF,ml,ml,PETSC_DECIDE,PETSC_DECIDE,pA,&A));
464 0 : PetscCall(BVOrthogonalize(S,A));
465 0 : PetscCall(MatDestroy(&A));
466 :
467 : /* SVD of first (M-1)*L diagonal block */
468 0 : PetscCall(PetscBLASIntCast((M-1)*L,&m));
469 0 : PetscCall(PetscMalloc5(m*m,&T,m*m,&R,m*m,&U,5*ml,&work,5*ml,&rwork));
470 0 : for (j=0;j<m;j++) PetscCall(PetscArraycpy(R+j*m,pA+j*ml,m));
471 0 : lwork = 5*m;
472 0 : PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
473 : #if !defined (PETSC_USE_COMPLEX)
474 0 : PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&m,&m,R,&m,sigma,U,&m,NULL,&m,work,&lwork,&info));
475 : #else
476 : PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&m,&m,R,&m,sigma,U,&m,NULL,&m,work,&lwork,rwork,&info));
477 : #endif
478 0 : SlepcCheckLapackInfo("gesvd",info);
479 0 : PetscCall(PetscFPTrapPop());
480 0 : *rank = 0;
481 0 : for (i=0;i<m;i++) {
482 0 : if (sigma[i]/PetscMax(sigma[0],1)>delta) (*rank)++;
483 : }
484 0 : PetscCall(MatCreateDense(PETSC_COMM_SELF,m,m,PETSC_DECIDE,PETSC_DECIDE,U,&A));
485 0 : PetscCall(BVSetActiveColumns(S,0,m));
486 0 : PetscCall(BVMultInPlace(S,A,0,*rank));
487 0 : PetscCall(MatDestroy(&A));
488 : /* Projected linear system */
489 : /* m first columns of A have the right singular vectors */
490 0 : PetscCall(PetscBLASIntCast(*rank,&k_));
491 0 : PetscCall(PetscBLASIntCast(ml,&lda));
492 0 : PetscCallBLAS("BLASgemm",BLASgemm_("N","C",&m,&k_,&m,&sone,pA+L*lda,&lda,R,&m,&zero,T,&m));
493 0 : PetscCall(PetscArrayzero(pA,ml*ml));
494 0 : PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&m,&sone,U,&m,T,&m,&zero,pA,&k_));
495 0 : for (j=0;j<k_;j++) for (i=0;i<k_;i++) pA[j*k_+i] /= sigma[j];
496 0 : PetscCall(PetscFree5(T,R,U,work,rwork));
497 0 : PetscFunctionReturn(PETSC_SUCCESS);
498 : }
499 :
500 : /*@
501 : BVSVDAndRank - Compute the SVD (left singular vectors only, and singular
502 : values) and determine the numerical rank according to a tolerance.
503 :
504 : Collective
505 :
506 : Input Parameters:
507 : + S - the basis vectors
508 : . m - the moment degree
509 : . l - the block size
510 : . delta - the tolerance used to determine the rank
511 : - meth - the method to be used
512 :
513 : Output Parameters:
514 : + A - workspace, on output contains relevant values in the CAA method
515 : . sigma - computed singular values
516 : - rank - estimated rank (optional)
517 :
518 : Notes:
519 : This function computes [U,Sigma,V] = svd(S) and replaces S with U.
520 : The current implementation computes this via S'*S, and it may include
521 : some kind of iterative refinement to improve accuracy in some cases.
522 :
523 : The parameters m and l refer to the moment and block size of contour
524 : integral methods. All columns up to m*l are modified, and the active
525 : columns are set to 0..m*l.
526 :
527 : The method is one of BV_SVD_METHOD_REFINE, BV_SVD_METHOD_QR, BV_SVD_METHOD_QR_CAA.
528 :
529 : The A workspace should be m*l*m*l in size.
530 :
531 : Once the decomposition is computed, the numerical rank is estimated
532 : by counting the number of singular values that are larger than the
533 : tolerance delta, relative to the first singular value.
534 :
535 : Level: developer
536 :
537 : .seealso: BVSetActiveColumns()
538 : @*/
539 21 : PetscErrorCode BVSVDAndRank(BV S,PetscInt m,PetscInt l,PetscReal delta,BVSVDMethod meth,PetscScalar *A,PetscReal *sigma,PetscInt *rank)
540 : {
541 21 : PetscFunctionBegin;
542 21 : PetscValidHeaderSpecific(S,BV_CLASSID,1);
543 63 : PetscValidLogicalCollectiveInt(S,m,2);
544 63 : PetscValidLogicalCollectiveInt(S,l,3);
545 63 : PetscValidLogicalCollectiveReal(S,delta,4);
546 63 : PetscValidLogicalCollectiveEnum(S,meth,5);
547 21 : PetscAssertPointer(A,6);
548 21 : PetscAssertPointer(sigma,7);
549 21 : PetscAssertPointer(rank,8);
550 :
551 21 : PetscCall(PetscLogEventBegin(BV_SVDAndRank,S,0,0,0));
552 21 : PetscCall(BVSetActiveColumns(S,0,m*l));
553 21 : switch (meth) {
554 21 : case BV_SVD_METHOD_REFINE:
555 21 : PetscCall(BVSVDAndRank_Refine(S,delta,A,sigma,rank));
556 : break;
557 0 : case BV_SVD_METHOD_QR:
558 0 : PetscCall(BVSVDAndRank_QR(S,delta,A,sigma,rank));
559 : break;
560 0 : case BV_SVD_METHOD_QR_CAA:
561 0 : PetscCall(BVSVDAndRank_QR_CAA(S,m,l,delta,A,sigma,rank));
562 : break;
563 : }
564 21 : PetscCall(PetscLogEventEnd(BV_SVDAndRank,S,0,0,0));
565 21 : PetscFunctionReturn(PETSC_SUCCESS);
566 : }
567 :
568 : /*@
569 : BVCISSResizeBases - Resize the bases involved in CISS solvers when the L grows.
570 :
571 : Logically Collective
572 :
573 : Input Parameters:
574 : + S - basis of L*M columns
575 : . V - basis of L columns (may be associated to subcommunicators)
576 : . Y - basis of npoints*L columns
577 : . Lold - old value of L
578 : . Lnew - new value of L
579 : . M - the moment size
580 : - npoints - number of integration points
581 :
582 : Level: developer
583 :
584 : .seealso: BVResize()
585 : @*/
586 0 : PetscErrorCode BVCISSResizeBases(BV S,BV V,BV Y,PetscInt Lold,PetscInt Lnew,PetscInt M,PetscInt npoints)
587 : {
588 0 : PetscInt i,j;
589 :
590 0 : PetscFunctionBegin;
591 0 : PetscValidHeaderSpecific(S,BV_CLASSID,1);
592 0 : PetscValidHeaderSpecific(V,BV_CLASSID,2);
593 0 : PetscValidHeaderSpecific(Y,BV_CLASSID,3);
594 0 : PetscValidLogicalCollectiveInt(S,Lold,4);
595 0 : PetscValidLogicalCollectiveInt(S,Lnew,5);
596 0 : PetscValidLogicalCollectiveInt(S,M,6);
597 0 : PetscValidLogicalCollectiveInt(S,npoints,7);
598 :
599 0 : PetscCall(BVResize(S,Lnew*M,PETSC_FALSE));
600 0 : PetscCall(BVResize(V,Lnew,PETSC_TRUE));
601 0 : PetscCall(BVResize(Y,Lnew*npoints,PETSC_TRUE));
602 : /* columns of Y are interleaved */
603 0 : for (i=npoints-1;i>=0;i--) {
604 0 : for (j=Lold-1;j>=0;j--) PetscCall(BVCopyColumn(Y,i*Lold+j,i*Lnew+j));
605 : }
606 0 : PetscFunctionReturn(PETSC_SUCCESS);
607 : }
|