Actual source code: bvcontour.c
slepc-main 2024-11-22
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: BV developer functions needed in contour integral methods
12: */
14: #include <slepc/private/bvimpl.h>
15: #include <slepcblaslapack.h>
17: #define p_id(i) (i*subcomm->n + subcomm->color)
19: /*@
20: BVScatter - Scatters the columns of a BV to another BV created in a
21: subcommunicator.
23: Collective
25: Input Parameters:
26: + Vin - input basis vectors (defined on the whole communicator)
27: . scat - VecScatter object that contains the info for the communication
28: - xdup - an auxiliary vector
30: Output Parameter:
31: . Vout - output basis vectors (defined on the subcommunicator)
33: Notes:
34: Currently implemented as a loop for each the active column, where each
35: column is scattered independently. The vector xdup is defined on the
36: contiguous parent communicator and have enough space to store one
37: duplicate of the original vector per each subcommunicator.
39: Level: developer
41: .seealso: BVGetColumn()
42: @*/
43: PetscErrorCode BVScatter(BV Vin,BV Vout,VecScatter scat,Vec xdup)
44: {
45: PetscInt i;
46: Vec v,z;
47: const PetscScalar *array;
49: PetscFunctionBegin;
54: PetscCall(BVCreateVec(Vout,&z));
55: for (i=Vin->l;i<Vin->k;i++) {
56: PetscCall(BVGetColumn(Vin,i,&v));
57: PetscCall(VecScatterBegin(scat,v,xdup,INSERT_VALUES,SCATTER_FORWARD));
58: PetscCall(VecScatterEnd(scat,v,xdup,INSERT_VALUES,SCATTER_FORWARD));
59: PetscCall(BVRestoreColumn(Vin,i,&v));
60: PetscCall(VecGetArrayRead(xdup,&array));
61: PetscCall(VecPlaceArray(z,array));
62: PetscCall(BVInsertVec(Vout,i,z));
63: PetscCall(VecResetArray(z));
64: PetscCall(VecRestoreArrayRead(xdup,&array));
65: }
66: PetscCall(VecDestroy(&z));
67: PetscFunctionReturn(PETSC_SUCCESS);
68: }
70: /*@
71: BVSumQuadrature - Computes the sum of terms required in the quadrature
72: rule to approximate the contour integral.
74: Collective
76: Input Parameters:
77: + Y - input basis vectors
78: . M - number of moments
79: . L - block size
80: . L_max - maximum block size
81: . w - quadrature weights
82: . zn - normalized quadrature points
83: . scat - (optional) VecScatter object to communicate between subcommunicators
84: . subcomm - subcommunicator layout
85: . npoints - number of points to process by the subcommunicator
86: - useconj - whether conjugate points can be used or not
88: Output Parameter:
89: . S - output basis vectors
91: Notes:
92: This is a generalization of BVMult(). The resulting matrix S consists of M
93: panels of L columns, and the following formula is computed for each panel
94: S_k = sum_j w_j*zn_j^k*Y_j, where Y_j is the j-th panel of Y containing
95: the result of solving T(z_j)^{-1}*X for each integration point j. L_max is
96: the width of the panels in Y.
98: When using subcommunicators, Y is stored in the subcommunicators for a subset
99: of integration points. In that case, the computation is done in the subcomm
100: and then scattered to the whole communicator in S using the VecScatter scat.
101: The value npoints is the number of points to be processed in this subcomm
102: and the flag useconj indicates whether symmetric points can be reused.
104: Level: developer
106: .seealso: BVMult(), BVScatter(), BVDotQuadrature(), RGComputeQuadrature(), RGCanUseConjugates()
107: @*/
108: PetscErrorCode BVSumQuadrature(BV S,BV Y,PetscInt M,PetscInt L,PetscInt L_max,PetscScalar *w,PetscScalar *zn,VecScatter scat,PetscSubcomm subcomm,PetscInt npoints,PetscBool useconj)
109: {
110: PetscInt i,j,k,nloc;
111: Vec v,sj;
112: PetscScalar *ppk,*pv,one=1.0;
114: PetscFunctionBegin;
119: PetscCall(BVGetSizes(Y,&nloc,NULL,NULL));
120: PetscCall(PetscMalloc1(npoints,&ppk));
121: for (i=0;i<npoints;i++) ppk[i] = 1.0;
122: PetscCall(BVCreateVec(Y,&v));
123: for (k=0;k<M;k++) {
124: for (j=0;j<L;j++) {
125: PetscCall(VecSet(v,0.0));
126: for (i=0;i<npoints;i++) {
127: PetscCall(BVSetActiveColumns(Y,i*L_max+j,i*L_max+j+1));
128: PetscCall(BVMultVec(Y,ppk[i]*w[p_id(i)],1.0,v,&one));
129: }
130: if (PetscUnlikely(useconj)) {
131: PetscCall(VecGetArray(v,&pv));
132: for (i=0;i<nloc;i++) pv[i] = 2.0*PetscRealPart(pv[i]);
133: PetscCall(VecRestoreArray(v,&pv));
134: }
135: PetscCall(BVGetColumn(S,k*L+j,&sj));
136: if (PetscUnlikely(scat)) {
137: PetscCall(VecScatterBegin(scat,v,sj,ADD_VALUES,SCATTER_REVERSE));
138: PetscCall(VecScatterEnd(scat,v,sj,ADD_VALUES,SCATTER_REVERSE));
139: } else PetscCall(VecCopy(v,sj));
140: PetscCall(BVRestoreColumn(S,k*L+j,&sj));
141: }
142: for (i=0;i<npoints;i++) ppk[i] *= zn[p_id(i)];
143: }
144: PetscCall(PetscFree(ppk));
145: PetscCall(VecDestroy(&v));
146: PetscFunctionReturn(PETSC_SUCCESS);
147: }
149: /*@
150: BVDotQuadrature - Computes the projection terms required in the quadrature
151: rule to approximate the contour integral.
153: Collective
155: Input Parameters:
156: + Y - first basis vectors
157: . V - second basis vectors
158: . M - number of moments
159: . L - block size
160: . L_max - maximum block size
161: . w - quadrature weights
162: . zn - normalized quadrature points
163: . subcomm - subcommunicator layout
164: . npoints - number of points to process by the subcommunicator
165: - useconj - whether conjugate points can be used or not
167: Output Parameter:
168: . Mu - computed result
170: Notes:
171: This is a generalization of BVDot(). The resulting matrix Mu consists of M
172: blocks of size LxL (placed horizontally), each of them computed as
173: Mu_k = sum_j w_j*zn_j^k*V'*Y_j, where Y_j is the j-th panel of Y containing
174: the result of solving T(z_j)^{-1}*X for each integration point j. L_max is
175: the width of the panels in Y.
177: When using subcommunicators, Y is stored in the subcommunicators for a subset
178: of integration points. In that case, the computation is done in the subcomm
179: and then the final result is combined via reduction.
180: The value npoints is the number of points to be processed in this subcomm
181: and the flag useconj indicates whether symmetric points can be reused.
183: Level: developer
185: .seealso: BVDot(), BVScatter(), BVSumQuadrature(), RGComputeQuadrature(), RGCanUseConjugates()
186: @*/
187: PetscErrorCode BVDotQuadrature(BV Y,BV V,PetscScalar *Mu,PetscInt M,PetscInt L,PetscInt L_max,PetscScalar *w,PetscScalar *zn,PetscSubcomm subcomm,PetscInt npoints,PetscBool useconj)
188: {
189: PetscMPIInt sub_size,count;
190: PetscInt i,j,k,s;
191: PetscScalar *temp,*temp2,*ppk,alp;
192: Mat H;
193: const PetscScalar *pH;
194: MPI_Comm child,parent;
196: PetscFunctionBegin;
200: PetscCall(PetscSubcommGetChild(subcomm,&child));
201: PetscCallMPI(MPI_Comm_size(child,&sub_size));
202: PetscCall(PetscMalloc3(npoints*L*(L+1),&temp,2*M*L*L,&temp2,npoints,&ppk));
203: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,L,L_max*npoints,NULL,&H));
204: PetscCall(PetscArrayzero(temp2,2*M*L*L));
205: PetscCall(BVSetActiveColumns(Y,0,L_max*npoints));
206: PetscCall(BVSetActiveColumns(V,0,L));
207: PetscCall(BVDot(Y,V,H));
208: PetscCall(MatDenseGetArrayRead(H,&pH));
209: for (i=0;i<npoints;i++) {
210: for (j=0;j<L;j++) {
211: for (k=0;k<L;k++) {
212: temp[k+j*L+i*L*L] = pH[k+j*L+i*L*L_max];
213: }
214: }
215: }
216: PetscCall(MatDenseRestoreArrayRead(H,&pH));
217: for (i=0;i<npoints;i++) ppk[i] = 1;
218: for (k=0;k<2*M;k++) {
219: for (j=0;j<L;j++) {
220: for (i=0;i<npoints;i++) {
221: alp = ppk[i]*w[p_id(i)];
222: for (s=0;s<L;s++) {
223: if (!useconj) temp2[s+(j+k*L)*L] += alp*temp[s+(j+i*L)*L];
224: else temp2[s+(j+k*L)*L] += 2.0*PetscRealPart(alp*temp[s+(j+i*L)*L]);
225: }
226: }
227: }
228: for (i=0;i<npoints;i++) ppk[i] *= zn[p_id(i)];
229: }
230: for (i=0;i<2*M*L*L;i++) temp2[i] /= sub_size;
231: PetscCall(PetscMPIIntCast(2*M*L*L,&count));
232: PetscCall(PetscSubcommGetParent(subcomm,&parent));
233: PetscCallMPI(MPIU_Allreduce(temp2,Mu,count,MPIU_SCALAR,MPIU_SUM,parent));
234: PetscCall(PetscFree3(temp,temp2,ppk));
235: PetscCall(MatDestroy(&H));
236: PetscFunctionReturn(PETSC_SUCCESS);
237: }
239: /*@
240: BVTraceQuadrature - Computes an estimate of the number of eigenvalues
241: inside a region via quantities computed in the quadrature rule of
242: contour integral methods.
244: Collective
246: Input Parameters:
247: + Y - first basis vectors
248: . V - second basis vectors
249: . L - block size
250: . L_max - maximum block size
251: . w - quadrature weights
252: . scat - (optional) VecScatter object to communicate between subcommunicators
253: . subcomm - subcommunicator layout
254: . npoints - number of points to process by the subcommunicator
255: - useconj - whether conjugate points can be used or not
257: Output Parameter:
258: . est_eig - estimated eigenvalue count
260: Notes:
261: This function returns an estimation of the number of eigenvalues in the
262: region, computed as trace(V'*S_0), where S_0 is the first panel of S
263: computed by BVSumQuadrature().
265: When using subcommunicators, Y is stored in the subcommunicators for a subset
266: of integration points. In that case, the computation is done in the subcomm
267: and then scattered to the whole communicator in S using the VecScatter scat.
268: The value npoints is the number of points to be processed in this subcomm
269: and the flag useconj indicates whether symmetric points can be reused.
271: Level: developer
273: .seealso: BVScatter(), BVDotQuadrature(), BVSumQuadrature(), RGComputeQuadrature(), RGCanUseConjugates()
274: @*/
275: PetscErrorCode BVTraceQuadrature(BV Y,BV V,PetscInt L,PetscInt L_max,PetscScalar *w,VecScatter scat,PetscSubcomm subcomm,PetscInt npoints,PetscBool useconj,PetscReal *est_eig)
276: {
277: PetscInt i,j;
278: Vec y,yall,vj;
279: PetscScalar dot,sum=0.0,one=1.0;
281: PetscFunctionBegin;
286: PetscCall(BVCreateVec(Y,&y));
287: PetscCall(BVCreateVec(V,&yall));
288: for (j=0;j<L;j++) {
289: PetscCall(VecSet(y,0.0));
290: for (i=0;i<npoints;i++) {
291: PetscCall(BVSetActiveColumns(Y,i*L_max+j,i*L_max+j+1));
292: PetscCall(BVMultVec(Y,w[p_id(i)],1.0,y,&one));
293: }
294: PetscCall(BVGetColumn(V,j,&vj));
295: if (scat) {
296: PetscCall(VecScatterBegin(scat,y,yall,ADD_VALUES,SCATTER_REVERSE));
297: PetscCall(VecScatterEnd(scat,y,yall,ADD_VALUES,SCATTER_REVERSE));
298: PetscCall(VecDot(vj,yall,&dot));
299: } else PetscCall(VecDot(vj,y,&dot));
300: PetscCall(BVRestoreColumn(V,j,&vj));
301: if (useconj) sum += 2.0*PetscRealPart(dot);
302: else sum += dot;
303: }
304: *est_eig = PetscAbsScalar(sum)/(PetscReal)L;
305: PetscCall(VecDestroy(&y));
306: PetscCall(VecDestroy(&yall));
307: PetscFunctionReturn(PETSC_SUCCESS);
308: }
310: static PetscErrorCode BVSVDAndRank_Refine(BV S,PetscReal delta,PetscScalar *pA,PetscReal *sigma,PetscInt *rank)
311: {
312: PetscInt i,j,k,ml=S->k;
313: PetscMPIInt len;
314: PetscScalar *work,*B,*tempB,*sarray,*Q1,*Q2,*temp2,alpha=1.0,beta=0.0;
315: PetscBLASInt l,m,n,lda,ldu,ldvt,lwork,info,ldb,ldc,lds;
316: #if defined(PETSC_USE_COMPLEX)
317: PetscReal *rwork;
318: #endif
320: PetscFunctionBegin;
321: PetscCall(PetscBLASIntCast(S->ld,&lds));
322: PetscCall(BVGetArray(S,&sarray));
323: PetscCall(PetscMalloc6(ml*ml,&temp2,S->n*ml,&Q1,S->n*ml,&Q2,ml*ml,&B,ml*ml,&tempB,5*ml,&work));
324: #if defined(PETSC_USE_COMPLEX)
325: PetscCall(PetscMalloc1(5*ml,&rwork));
326: #endif
327: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
329: PetscCall(PetscArrayzero(B,ml*ml));
330: for (i=0;i<ml;i++) B[i*ml+i]=1;
332: for (k=0;k<2;k++) {
333: PetscCall(PetscBLASIntCast(S->n,&m));
334: PetscCall(PetscBLASIntCast(ml,&l));
335: n = l; lda = m; ldb = m; ldc = l;
336: if (!k) PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&l,&n,&m,&alpha,sarray,&lds,sarray,&lds,&beta,pA,&ldc));
337: else PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&l,&n,&m,&alpha,Q1,&lda,Q1,&ldb,&beta,pA,&ldc));
338: PetscCall(PetscArrayzero(temp2,ml*ml));
339: PetscCall(PetscMPIIntCast(ml*ml,&len));
340: PetscCallMPI(MPIU_Allreduce(pA,temp2,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)S)));
342: PetscCall(PetscBLASIntCast(ml,&m));
343: n = m; lda = m; lwork = 5*m, ldu = 1; ldvt = 1;
344: #if defined(PETSC_USE_COMPLEX)
345: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&m,&n,temp2,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,rwork,&info));
346: #else
347: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&m,&n,temp2,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,&info));
348: #endif
349: SlepcCheckLapackInfo("gesvd",info);
351: PetscCall(PetscBLASIntCast(S->n,&l));
352: PetscCall(PetscBLASIntCast(ml,&n));
353: m = n; lda = l; ldb = m; ldc = l;
354: if (!k) PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,sarray,&lds,temp2,&ldb,&beta,Q1,&ldc));
355: else PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,Q1,&lda,temp2,&ldb,&beta,Q2,&ldc));
357: PetscCall(PetscBLASIntCast(ml,&l));
358: m = l; n = l; lda = l; ldb = m; ldc = l;
359: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,B,&lda,temp2,&ldb,&beta,tempB,&ldc));
360: for (i=0;i<ml;i++) {
361: sigma[i] = PetscSqrtReal(sigma[i]);
362: for (j=0;j<S->n;j++) {
363: if (k%2) Q2[j+i*S->n] /= sigma[i];
364: else Q1[j+i*S->n] /= sigma[i];
365: }
366: for (j=0;j<ml;j++) B[j+i*ml] = tempB[j+i*ml]*sigma[i];
367: }
368: }
370: PetscCall(PetscBLASIntCast(ml,&m));
371: n = m; lda = m; ldu=1; ldvt=1;
372: #if defined(PETSC_USE_COMPLEX)
373: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&m,&n,B,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,rwork,&info));
374: #else
375: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&m,&n,B,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,&info));
376: #endif
377: SlepcCheckLapackInfo("gesvd",info);
379: PetscCall(PetscBLASIntCast(S->n,&l));
380: PetscCall(PetscBLASIntCast(ml,&n));
381: m = n; lda = l; ldb = m;
382: if (k%2) PetscCallBLAS("BLASgemm",BLASgemm_("N","T",&l,&n,&m,&alpha,Q1,&lda,B,&ldb,&beta,sarray,&lds));
383: else PetscCallBLAS("BLASgemm",BLASgemm_("N","T",&l,&n,&m,&alpha,Q2,&lda,B,&ldb,&beta,sarray,&lds));
385: PetscCall(PetscFPTrapPop());
386: PetscCall(BVRestoreArray(S,&sarray));
388: if (rank) {
389: (*rank) = 0;
390: for (i=0;i<ml;i++) {
391: if (sigma[i]/PetscMax(sigma[0],1.0)>delta) (*rank)++;
392: }
393: }
394: PetscCall(PetscFree6(temp2,Q1,Q2,B,tempB,work));
395: #if defined(PETSC_USE_COMPLEX)
396: PetscCall(PetscFree(rwork));
397: #endif
398: PetscFunctionReturn(PETSC_SUCCESS);
399: }
401: static PetscErrorCode BVSVDAndRank_QR(BV S,PetscReal delta,PetscScalar *pA,PetscReal *sigma,PetscInt *rank)
402: {
403: PetscInt i,n,ml=S->k;
404: PetscBLASInt m,lda,lwork,info;
405: PetscScalar *work;
406: PetscReal *rwork;
407: Mat A;
408: Vec v;
410: PetscFunctionBegin;
411: /* Compute QR factorizaton of S */
412: PetscCall(BVGetSizes(S,NULL,&n,NULL));
413: n = PetscMin(n,ml);
414: PetscCall(BVSetActiveColumns(S,0,n));
415: PetscCall(PetscArrayzero(pA,ml*n));
416: PetscCall(MatCreateDense(PETSC_COMM_SELF,n,n,PETSC_DECIDE,PETSC_DECIDE,pA,&A));
417: PetscCall(BVOrthogonalize(S,A));
418: if (n<ml) {
419: /* the rest of the factorization */
420: for (i=n;i<ml;i++) {
421: PetscCall(BVGetColumn(S,i,&v));
422: PetscCall(BVOrthogonalizeVec(S,v,pA+i*n,NULL,NULL));
423: PetscCall(BVRestoreColumn(S,i,&v));
424: }
425: }
426: PetscCall(PetscBLASIntCast(n,&lda));
427: PetscCall(PetscBLASIntCast(ml,&m));
428: PetscCall(PetscMalloc2(5*ml,&work,5*ml,&rwork));
429: lwork = 5*m;
430: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
431: #if !defined (PETSC_USE_COMPLEX)
432: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&lda,&m,pA,&lda,sigma,NULL,&lda,NULL,&lda,work,&lwork,&info));
433: #else
434: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&lda,&m,pA,&lda,sigma,NULL,&lda,NULL,&lda,work,&lwork,rwork,&info));
435: #endif
436: SlepcCheckLapackInfo("gesvd",info);
437: PetscCall(PetscFPTrapPop());
438: *rank = 0;
439: for (i=0;i<n;i++) {
440: if (sigma[i]/PetscMax(sigma[0],1)>delta) (*rank)++;
441: }
442: /* n first columns of A have the left singular vectors */
443: PetscCall(BVMultInPlace(S,A,0,*rank));
444: PetscCall(PetscFree2(work,rwork));
445: PetscCall(MatDestroy(&A));
446: PetscFunctionReturn(PETSC_SUCCESS);
447: }
449: static PetscErrorCode BVSVDAndRank_QR_CAA(BV S,PetscInt M,PetscInt L,PetscReal delta,PetscScalar *pA,PetscReal *sigma,PetscInt *rank)
450: {
451: PetscInt i,j,n,ml=S->k;
452: PetscBLASInt m,k_,lda,lwork,info;
453: PetscScalar *work,*T,*U,*R,sone=1.0,zero=0.0;
454: PetscReal *rwork;
455: Mat A;
457: PetscFunctionBegin;
458: /* Compute QR factorizaton of S */
459: PetscCall(BVGetSizes(S,NULL,&n,NULL));
460: PetscCheck(n>=ml,PetscObjectComm((PetscObject)S),PETSC_ERR_SUP,"The QR_CAA method does not support problem size n < m*L");
461: PetscCall(BVSetActiveColumns(S,0,ml));
462: PetscCall(PetscArrayzero(pA,ml*ml));
463: PetscCall(MatCreateDense(PETSC_COMM_SELF,ml,ml,PETSC_DECIDE,PETSC_DECIDE,pA,&A));
464: PetscCall(BVOrthogonalize(S,A));
465: PetscCall(MatDestroy(&A));
467: /* SVD of first (M-1)*L diagonal block */
468: PetscCall(PetscBLASIntCast((M-1)*L,&m));
469: PetscCall(PetscMalloc5(m*m,&T,m*m,&R,m*m,&U,5*ml,&work,5*ml,&rwork));
470: for (j=0;j<m;j++) PetscCall(PetscArraycpy(R+j*m,pA+j*ml,m));
471: lwork = 5*m;
472: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
473: #if !defined (PETSC_USE_COMPLEX)
474: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&m,&m,R,&m,sigma,U,&m,NULL,&m,work,&lwork,&info));
475: #else
476: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&m,&m,R,&m,sigma,U,&m,NULL,&m,work,&lwork,rwork,&info));
477: #endif
478: SlepcCheckLapackInfo("gesvd",info);
479: PetscCall(PetscFPTrapPop());
480: *rank = 0;
481: for (i=0;i<m;i++) {
482: if (sigma[i]/PetscMax(sigma[0],1)>delta) (*rank)++;
483: }
484: PetscCall(MatCreateDense(PETSC_COMM_SELF,m,m,PETSC_DECIDE,PETSC_DECIDE,U,&A));
485: PetscCall(BVSetActiveColumns(S,0,m));
486: PetscCall(BVMultInPlace(S,A,0,*rank));
487: PetscCall(MatDestroy(&A));
488: /* Projected linear system */
489: /* m first columns of A have the right singular vectors */
490: PetscCall(PetscBLASIntCast(*rank,&k_));
491: PetscCall(PetscBLASIntCast(ml,&lda));
492: PetscCallBLAS("BLASgemm",BLASgemm_("N","C",&m,&k_,&m,&sone,pA+L*lda,&lda,R,&m,&zero,T,&m));
493: PetscCall(PetscArrayzero(pA,ml*ml));
494: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&m,&sone,U,&m,T,&m,&zero,pA,&k_));
495: for (j=0;j<k_;j++) for (i=0;i<k_;i++) pA[j*k_+i] /= sigma[j];
496: PetscCall(PetscFree5(T,R,U,work,rwork));
497: PetscFunctionReturn(PETSC_SUCCESS);
498: }
500: /*@
501: BVSVDAndRank - Compute the SVD (left singular vectors only, and singular
502: values) and determine the numerical rank according to a tolerance.
504: Collective
506: Input Parameters:
507: + S - the basis vectors
508: . m - the moment degree
509: . l - the block size
510: . delta - the tolerance used to determine the rank
511: - meth - the method to be used
513: Output Parameters:
514: + A - workspace, on output contains relevant values in the CAA method
515: . sigma - computed singular values
516: - rank - estimated rank (optional)
518: Notes:
519: This function computes [U,Sigma,V] = svd(S) and replaces S with U.
520: The current implementation computes this via S'*S, and it may include
521: some kind of iterative refinement to improve accuracy in some cases.
523: The parameters m and l refer to the moment and block size of contour
524: integral methods. All columns up to m*l are modified, and the active
525: columns are set to 0..m*l.
527: The method is one of BV_SVD_METHOD_REFINE, BV_SVD_METHOD_QR, BV_SVD_METHOD_QR_CAA.
529: The A workspace should be m*l*m*l in size.
531: Once the decomposition is computed, the numerical rank is estimated
532: by counting the number of singular values that are larger than the
533: tolerance delta, relative to the first singular value.
535: Level: developer
537: .seealso: BVSetActiveColumns()
538: @*/
539: PetscErrorCode BVSVDAndRank(BV S,PetscInt m,PetscInt l,PetscReal delta,BVSVDMethod meth,PetscScalar *A,PetscReal *sigma,PetscInt *rank)
540: {
541: PetscFunctionBegin;
547: PetscAssertPointer(A,6);
548: PetscAssertPointer(sigma,7);
549: PetscAssertPointer(rank,8);
551: PetscCall(PetscLogEventBegin(BV_SVDAndRank,S,0,0,0));
552: PetscCall(BVSetActiveColumns(S,0,m*l));
553: switch (meth) {
554: case BV_SVD_METHOD_REFINE:
555: PetscCall(BVSVDAndRank_Refine(S,delta,A,sigma,rank));
556: break;
557: case BV_SVD_METHOD_QR:
558: PetscCall(BVSVDAndRank_QR(S,delta,A,sigma,rank));
559: break;
560: case BV_SVD_METHOD_QR_CAA:
561: PetscCall(BVSVDAndRank_QR_CAA(S,m,l,delta,A,sigma,rank));
562: break;
563: }
564: PetscCall(PetscLogEventEnd(BV_SVDAndRank,S,0,0,0));
565: PetscFunctionReturn(PETSC_SUCCESS);
566: }
568: /*@
569: BVCISSResizeBases - Resize the bases involved in CISS solvers when the L grows.
571: Logically Collective
573: Input Parameters:
574: + S - basis of L*M columns
575: . V - basis of L columns (may be associated to subcommunicators)
576: . Y - basis of npoints*L columns
577: . Lold - old value of L
578: . Lnew - new value of L
579: . M - the moment size
580: - npoints - number of integration points
582: Level: developer
584: .seealso: BVResize()
585: @*/
586: PetscErrorCode BVCISSResizeBases(BV S,BV V,BV Y,PetscInt Lold,PetscInt Lnew,PetscInt M,PetscInt npoints)
587: {
588: PetscInt i,j;
590: PetscFunctionBegin;
599: PetscCall(BVResize(S,Lnew*M,PETSC_FALSE));
600: PetscCall(BVResize(V,Lnew,PETSC_TRUE));
601: PetscCall(BVResize(Y,Lnew*npoints,PETSC_TRUE));
602: /* columns of Y are interleaved */
603: for (i=npoints-1;i>=0;i--) {
604: for (j=Lold-1;j>=0;j--) PetscCall(BVCopyColumn(Y,i*Lold+j,i*Lnew+j));
605: }
606: PetscFunctionReturn(PETSC_SUCCESS);
607: }