Actual source code: bvcontour.c
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: BV developer functions needed in contour integral methods
12: */
14: #include <slepc/private/bvimpl.h>
15: #include <slepcblaslapack.h>
17: #define p_id(i) (i*subcomm->n + subcomm->color)
19: /*@
20: BVScatter - Scatters the columns of a `BV` to another `BV` created in a
21: subcommunicator.
23: Collective
25: Input Parameters:
26: + Vin - input basis vectors (defined on the whole communicator)
27: . scat - `VecScatter` object that contains the info for the communication
28: - xdup - an auxiliary vector
30: Output Parameter:
31: . Vout - output basis vectors (defined on the subcommunicator)
33: Notes:
34: Currently implemented as a loop for each active column, where each
35: column is scattered independently. The vector `xdup` is defined on the
36: contiguous parent communicator and should have enough space to store one
37: duplicate of the original vector per each subcommunicator.
39: Level: developer
41: .seealso: [](sec:bv), `BVGetColumn()`
42: @*/
43: PetscErrorCode BVScatter(BV Vin,BV Vout,VecScatter scat,Vec xdup)
44: {
45: PetscInt i;
46: Vec v,z;
47: const PetscScalar *array;
49: PetscFunctionBegin;
54: PetscCall(BVCreateVec(Vout,&z));
55: for (i=Vin->l;i<Vin->k;i++) {
56: PetscCall(BVGetColumn(Vin,i,&v));
57: PetscCall(VecScatterBegin(scat,v,xdup,INSERT_VALUES,SCATTER_FORWARD));
58: PetscCall(VecScatterEnd(scat,v,xdup,INSERT_VALUES,SCATTER_FORWARD));
59: PetscCall(BVRestoreColumn(Vin,i,&v));
60: PetscCall(VecGetArrayRead(xdup,&array));
61: PetscCall(VecPlaceArray(z,array));
62: PetscCall(BVInsertVec(Vout,i,z));
63: PetscCall(VecResetArray(z));
64: PetscCall(VecRestoreArrayRead(xdup,&array));
65: }
66: PetscCall(VecDestroy(&z));
67: PetscFunctionReturn(PETSC_SUCCESS);
68: }
70: /*@
71: BVSumQuadrature - Computes the sum of terms required in the quadrature
72: rule to approximate the contour integral.
74: Collective
76: Input Parameters:
77: + Y - input basis vectors
78: . M - number of moments
79: . L - block size
80: . L_max - maximum block size
81: . w - quadrature weights
82: . zn - normalized quadrature points
83: . scat - (optional) `VecScatter` object to communicate between subcommunicators
84: . subcomm - subcommunicator layout
85: . npoints - number of points to process by the subcommunicator
86: - useconj - whether conjugate points can be used or not
88: Output Parameter:
89: . S - output basis vectors
91: Notes:
92: This is a generalization of `BVMult()`. The resulting matrix `S` consists of `M`
93: panels of `L` columns, and the following formula is computed for each panel
94: $S_k = \sum_j w_j\zeta_j^kY_j$, where $Y_j$ is the $j$-th panel of $Y$ containing
95: the result of solving $T(z_j)^{-1}X$ for each integration point $j$. `L_max` is
96: the width of the panels in $Y$.
98: When using subcommunicators, `Y` is stored in the subcommunicators for a subset
99: of integration points. In that case, the computation is done in the `subcomm`
100: and then scattered to the whole communicator in `S` using the `VecScatter` `scat`.
101: The value `npoints` is the number of points to be processed in this `subcomm`
102: and the flag `useconj` indicates whether symmetric points can be reused.
104: Level: developer
106: .seealso: [](sec:bv), `BVMult()`, `BVScatter()`, `BVDotQuadrature()`, `RGComputeQuadrature()`, `RGCanUseConjugates()`
107: @*/
108: PetscErrorCode BVSumQuadrature(BV S,BV Y,PetscInt M,PetscInt L,PetscInt L_max,PetscScalar w[],PetscScalar zn[],VecScatter scat,PetscSubcomm subcomm,PetscInt npoints,PetscBool useconj)
109: {
110: PetscInt i,j,k,nloc;
111: Vec v,sj;
112: PetscScalar *ppk,*pv,one=1.0;
114: PetscFunctionBegin;
119: PetscCall(BVGetSizes(Y,&nloc,NULL,NULL));
120: PetscCall(PetscMalloc1(npoints,&ppk));
121: for (i=0;i<npoints;i++) ppk[i] = 1.0;
122: PetscCall(BVCreateVec(Y,&v));
123: for (k=0;k<M;k++) {
124: for (j=0;j<L;j++) {
125: PetscCall(VecSet(v,0.0));
126: for (i=0;i<npoints;i++) {
127: PetscCall(BVSetActiveColumns(Y,i*L_max+j,i*L_max+j+1));
128: PetscCall(BVMultVec(Y,ppk[i]*w[p_id(i)],1.0,v,&one));
129: }
130: if (PetscUnlikely(useconj)) {
131: PetscCall(VecGetArray(v,&pv));
132: for (i=0;i<nloc;i++) pv[i] = 2.0*PetscRealPart(pv[i]);
133: PetscCall(VecRestoreArray(v,&pv));
134: }
135: PetscCall(BVGetColumn(S,k*L+j,&sj));
136: if (PetscUnlikely(scat)) {
137: PetscCall(VecScatterBegin(scat,v,sj,ADD_VALUES,SCATTER_REVERSE));
138: PetscCall(VecScatterEnd(scat,v,sj,ADD_VALUES,SCATTER_REVERSE));
139: } else PetscCall(VecCopy(v,sj));
140: PetscCall(BVRestoreColumn(S,k*L+j,&sj));
141: }
142: for (i=0;i<npoints;i++) ppk[i] *= zn[p_id(i)];
143: }
144: PetscCall(PetscFree(ppk));
145: PetscCall(VecDestroy(&v));
146: PetscFunctionReturn(PETSC_SUCCESS);
147: }
149: /*@
150: BVDotQuadrature - Computes the projection terms required in the quadrature
151: rule to approximate the contour integral.
153: Collective
155: Input Parameters:
156: + Y - first basis vectors
157: . V - second basis vectors
158: . M - number of moments
159: . L - block size
160: . L_max - maximum block size
161: . w - quadrature weights
162: . zn - normalized quadrature points
163: . subcomm - subcommunicator layout
164: . npoints - number of points to process by the subcommunicator
165: - useconj - whether conjugate points can be used or not
167: Output Parameter:
168: . Mu - computed result
170: Notes:
171: This is a generalization of `BVDot()`. The resulting matrix `Mu` consists of `M`
172: blocks of size `L*L` (placed horizontally), each of them computed as
173: $Mu_k = \sum_j w_j\zeta_j^k V^*Y_j$, where $Y_j$ is the $j$-th panel of $Y$ containing
174: the result of solving $T(z_j)^{-1}X$ for each integration point $j$. `L_max` is
175: the width of the panels in $Y$.
177: When using subcommunicators, $Y$ is stored in the subcommunicators for a subset
178: of integration points. In that case, the computation is done in the `subcomm`
179: and then the final result is combined via reduction.
180: The value `npoints` is the number of points to be processed in this `subcomm`
181: and the flag `useconj` indicates whether symmetric points can be reused.
183: Level: developer
185: .seealso: [](sec:bv), `BVDot()`, `BVScatter()`, `BVSumQuadrature()`, `RGComputeQuadrature()`, `RGCanUseConjugates()`
186: @*/
187: PetscErrorCode BVDotQuadrature(BV Y,BV V,PetscScalar Mu[],PetscInt M,PetscInt L,PetscInt L_max,PetscScalar w[],PetscScalar zn[],PetscSubcomm subcomm,PetscInt npoints,PetscBool useconj)
188: {
189: PetscMPIInt sub_size,count;
190: PetscInt i,j,k,s;
191: PetscScalar *temp,*temp2,*ppk,alp;
192: Mat H;
193: const PetscScalar *pH;
194: MPI_Comm child,parent;
196: PetscFunctionBegin;
200: PetscCall(PetscSubcommGetChild(subcomm,&child));
201: PetscCallMPI(MPI_Comm_size(child,&sub_size));
202: PetscCall(PetscMalloc3(npoints*L*(L+1),&temp,2*M*L*L,&temp2,npoints,&ppk));
203: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,L,L_max*npoints,NULL,&H));
204: PetscCall(PetscArrayzero(temp2,2*M*L*L));
205: PetscCall(BVSetActiveColumns(Y,0,L_max*npoints));
206: PetscCall(BVSetActiveColumns(V,0,L));
207: PetscCall(BVDot(Y,V,H));
208: PetscCall(MatDenseGetArrayRead(H,&pH));
209: for (i=0;i<npoints;i++) {
210: for (j=0;j<L;j++) {
211: for (k=0;k<L;k++) {
212: temp[k+j*L+i*L*L] = pH[k+j*L+i*L*L_max];
213: }
214: }
215: }
216: PetscCall(MatDenseRestoreArrayRead(H,&pH));
217: for (i=0;i<npoints;i++) ppk[i] = 1;
218: for (k=0;k<2*M;k++) {
219: for (j=0;j<L;j++) {
220: for (i=0;i<npoints;i++) {
221: alp = ppk[i]*w[p_id(i)];
222: for (s=0;s<L;s++) {
223: if (!useconj) temp2[s+(j+k*L)*L] += alp*temp[s+(j+i*L)*L];
224: else temp2[s+(j+k*L)*L] += 2.0*PetscRealPart(alp*temp[s+(j+i*L)*L]);
225: }
226: }
227: }
228: for (i=0;i<npoints;i++) ppk[i] *= zn[p_id(i)];
229: }
230: for (i=0;i<2*M*L*L;i++) temp2[i] /= sub_size;
231: PetscCall(PetscMPIIntCast(2*M*L*L,&count));
232: PetscCall(PetscSubcommGetParent(subcomm,&parent));
233: PetscCallMPI(MPIU_Allreduce(temp2,Mu,count,MPIU_SCALAR,MPIU_SUM,parent));
234: PetscCall(PetscFree3(temp,temp2,ppk));
235: PetscCall(MatDestroy(&H));
236: PetscFunctionReturn(PETSC_SUCCESS);
237: }
239: /*@
240: BVTraceQuadrature - Computes an estimate of the number of eigenvalues
241: inside a region via quantities computed in the quadrature rule of
242: contour integral methods.
244: Collective
246: Input Parameters:
247: + Y - first basis vectors
248: . V - second basis vectors
249: . L - block size
250: . L_max - maximum block size
251: . w - quadrature weights
252: . scat - (optional) `VecScatter` object to communicate between subcommunicators
253: . subcomm - subcommunicator layout
254: . npoints - number of points to process by the subcommunicator
255: - useconj - whether conjugate points can be used or not
257: Output Parameter:
258: . est_eig - estimated eigenvalue count
260: Notes:
261: This function returns an estimation of the number of eigenvalues in the
262: region, computed as $\operatorname{tr}(V^*S_0)$, where $S_0$ is the first panel of $S$
263: computed by `BVSumQuadrature()`.
265: When using subcommunicators, `Y` is stored in the subcommunicators for a subset
266: of integration points. In that case, the computation is done in the `subcomm`
267: and then scattered to the whole communicator in `V` using the `VecScatter` `scat`.
268: The value `npoints` is the number of points to be processed in this `subcomm`
269: and the flag `useconj` indicates whether symmetric points can be reused.
271: Level: developer
273: .seealso: [](sec:bv), `BVScatter()`, `BVDotQuadrature()`, `BVSumQuadrature()`, `RGComputeQuadrature()`, `RGCanUseConjugates()`
274: @*/
275: PetscErrorCode BVTraceQuadrature(BV Y,BV V,PetscInt L,PetscInt L_max,PetscScalar w[],VecScatter scat,PetscSubcomm subcomm,PetscInt npoints,PetscBool useconj,PetscReal *est_eig)
276: {
277: PetscInt i,j;
278: Vec y,yall,vj;
279: PetscScalar dot,sum=0.0,one=1.0;
281: PetscFunctionBegin;
286: PetscCall(BVCreateVec(Y,&y));
287: PetscCall(BVCreateVec(V,&yall));
288: for (j=0;j<L;j++) {
289: PetscCall(VecSet(y,0.0));
290: for (i=0;i<npoints;i++) {
291: PetscCall(BVSetActiveColumns(Y,i*L_max+j,i*L_max+j+1));
292: PetscCall(BVMultVec(Y,w[p_id(i)],1.0,y,&one));
293: }
294: PetscCall(BVGetColumn(V,j,&vj));
295: if (scat) {
296: PetscCall(VecScatterBegin(scat,y,yall,ADD_VALUES,SCATTER_REVERSE));
297: PetscCall(VecScatterEnd(scat,y,yall,ADD_VALUES,SCATTER_REVERSE));
298: PetscCall(VecDot(vj,yall,&dot));
299: } else PetscCall(VecDot(vj,y,&dot));
300: PetscCall(BVRestoreColumn(V,j,&vj));
301: if (useconj) sum += 2.0*PetscRealPart(dot);
302: else sum += dot;
303: }
304: *est_eig = PetscAbsScalar(sum)/(PetscReal)L;
305: PetscCall(VecDestroy(&y));
306: PetscCall(VecDestroy(&yall));
307: PetscFunctionReturn(PETSC_SUCCESS);
308: }
310: static PetscErrorCode BVSVDAndRank_Refine(BV S,PetscReal delta,PetscScalar *pA,PetscReal *sigma,PetscInt *rank)
311: {
312: PetscInt i,j,k,ml=S->k;
313: PetscMPIInt len;
314: PetscScalar *work,*B,*tempB,*sarray,*Q1,*Q2,*temp2,alpha=1.0,beta=0.0;
315: PetscBLASInt l,m,n,lda,ldu,ldvt,lwork,info,ldb,ldc,lds;
316: #if defined(PETSC_USE_COMPLEX)
317: PetscReal *rwork;
318: #endif
320: PetscFunctionBegin;
321: PetscCall(PetscBLASIntCast(S->ld,&lds));
322: PetscCall(BVGetArray(S,&sarray));
323: PetscCall(PetscMalloc6(ml*ml,&temp2,S->n*ml,&Q1,S->n*ml,&Q2,ml*ml,&B,ml*ml,&tempB,5*ml,&work));
324: #if defined(PETSC_USE_COMPLEX)
325: PetscCall(PetscMalloc1(5*ml,&rwork));
326: #endif
327: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
329: PetscCall(PetscArrayzero(B,ml*ml));
330: for (i=0;i<ml;i++) B[i*ml+i]=1;
332: for (k=0;k<2;k++) {
333: PetscCall(PetscBLASIntCast(S->n,&m));
334: PetscCall(PetscBLASIntCast(ml,&l));
335: n = l; lda = m; ldb = m; ldc = l;
336: if (!k) PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&l,&n,&m,&alpha,sarray,&lds,sarray,&lds,&beta,pA,&ldc));
337: else PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&l,&n,&m,&alpha,Q1,&lda,Q1,&ldb,&beta,pA,&ldc));
338: PetscCall(PetscArrayzero(temp2,ml*ml));
339: PetscCall(PetscMPIIntCast(ml*ml,&len));
340: PetscCallMPI(MPIU_Allreduce(pA,temp2,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)S)));
342: PetscCall(PetscBLASIntCast(ml,&m));
343: n = m; lda = m; lwork = 5*m, ldu = 1; ldvt = 1;
344: #if defined(PETSC_USE_COMPLEX)
345: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&m,&n,temp2,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,rwork,&info));
346: #else
347: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&m,&n,temp2,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,&info));
348: #endif
349: SlepcCheckLapackInfo("gesvd",info);
351: PetscCall(PetscBLASIntCast(S->n,&l));
352: PetscCall(PetscBLASIntCast(ml,&n));
353: m = n; lda = l; ldb = m; ldc = l;
354: if (!k) PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,sarray,&lds,temp2,&ldb,&beta,Q1,&ldc));
355: else PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,Q1,&lda,temp2,&ldb,&beta,Q2,&ldc));
357: PetscCall(PetscBLASIntCast(ml,&l));
358: m = l; n = l; lda = l; ldb = m; ldc = l;
359: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,B,&lda,temp2,&ldb,&beta,tempB,&ldc));
360: for (i=0;i<ml;i++) {
361: sigma[i] = PetscSqrtReal(sigma[i]);
362: for (j=0;j<S->n;j++) {
363: if (k%2) Q2[j+i*S->n] /= sigma[i];
364: else Q1[j+i*S->n] /= sigma[i];
365: }
366: for (j=0;j<ml;j++) B[j+i*ml] = tempB[j+i*ml]*sigma[i];
367: }
368: }
370: PetscCall(PetscBLASIntCast(ml,&m));
371: n = m; lda = m; ldu=1; ldvt=1;
372: #if defined(PETSC_USE_COMPLEX)
373: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&m,&n,B,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,rwork,&info));
374: #else
375: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&m,&n,B,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,&info));
376: #endif
377: SlepcCheckLapackInfo("gesvd",info);
379: PetscCall(PetscBLASIntCast(S->n,&l));
380: PetscCall(PetscBLASIntCast(ml,&n));
381: m = n; lda = l; ldb = m;
382: if (k%2) PetscCallBLAS("BLASgemm",BLASgemm_("N","T",&l,&n,&m,&alpha,Q1,&lda,B,&ldb,&beta,sarray,&lds));
383: else PetscCallBLAS("BLASgemm",BLASgemm_("N","T",&l,&n,&m,&alpha,Q2,&lda,B,&ldb,&beta,sarray,&lds));
385: PetscCall(PetscFPTrapPop());
386: PetscCall(BVRestoreArray(S,&sarray));
388: if (rank) {
389: (*rank) = 0;
390: for (i=0;i<ml;i++) {
391: if (sigma[i]/PetscMax(sigma[0],1.0)>delta) (*rank)++;
392: }
393: }
394: PetscCall(PetscFree6(temp2,Q1,Q2,B,tempB,work));
395: #if defined(PETSC_USE_COMPLEX)
396: PetscCall(PetscFree(rwork));
397: #endif
398: PetscFunctionReturn(PETSC_SUCCESS);
399: }
401: static PetscErrorCode BVSVDAndRank_QR(BV S,PetscReal delta,PetscScalar *pA,PetscReal *sigma,PetscInt *rankout)
402: {
403: PetscInt i,n,ml=S->k,rank;
404: PetscBLASInt m,lda,lwork,info;
405: PetscScalar *work;
406: PetscReal *rwork;
407: Mat A;
408: Vec v;
410: PetscFunctionBegin;
411: /* Compute QR factorizaton of S */
412: PetscCall(BVGetSizes(S,NULL,&n,NULL));
413: n = PetscMin(n,ml);
414: PetscCall(BVSetActiveColumns(S,0,n));
415: PetscCall(PetscArrayzero(pA,ml*n));
416: PetscCall(MatCreateDense(PETSC_COMM_SELF,n,n,PETSC_DECIDE,PETSC_DECIDE,pA,&A));
417: PetscCall(BVOrthogonalize(S,A));
418: if (n<ml) {
419: /* the rest of the factorization */
420: for (i=n;i<ml;i++) {
421: PetscCall(BVGetColumn(S,i,&v));
422: PetscCall(BVOrthogonalizeVec(S,v,pA+i*n,NULL,NULL));
423: PetscCall(BVRestoreColumn(S,i,&v));
424: }
425: }
426: PetscCall(PetscBLASIntCast(n,&lda));
427: PetscCall(PetscBLASIntCast(ml,&m));
428: PetscCall(PetscMalloc2(5*ml,&work,5*ml,&rwork));
429: lwork = 5*m;
430: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
431: #if !defined (PETSC_USE_COMPLEX)
432: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&lda,&m,pA,&lda,sigma,NULL,&lda,NULL,&lda,work,&lwork,&info));
433: #else
434: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&lda,&m,pA,&lda,sigma,NULL,&lda,NULL,&lda,work,&lwork,rwork,&info));
435: #endif
436: SlepcCheckLapackInfo("gesvd",info);
437: PetscCall(PetscFPTrapPop());
438: rank = 0;
439: for (i=0;i<n;i++) {
440: if (sigma[i]/PetscMax(sigma[0],1)>delta) rank++;
441: }
442: if (rankout) *rankout = rank;
443: /* n first columns of A have the left singular vectors */
444: PetscCall(BVMultInPlace(S,A,0,rank));
445: PetscCall(PetscFree2(work,rwork));
446: PetscCall(MatDestroy(&A));
447: PetscFunctionReturn(PETSC_SUCCESS);
448: }
450: static PetscErrorCode BVSVDAndRank_QR_CAA(BV S,PetscInt M,PetscInt L,PetscReal delta,PetscScalar *pA,PetscReal *sigma,PetscInt *rankout)
451: {
452: PetscInt i,j,n,ml=S->k,rank;
453: PetscBLASInt m,k_,lda,lwork,info;
454: PetscScalar *work,*T,*U,*R,sone=1.0,zero=0.0;
455: PetscReal *rwork;
456: Mat A;
458: PetscFunctionBegin;
459: /* Compute QR factorizaton of S */
460: PetscCall(BVGetSizes(S,NULL,&n,NULL));
461: PetscCheck(n>=ml,PetscObjectComm((PetscObject)S),PETSC_ERR_SUP,"The QR_CAA method does not support problem size n < m*L");
462: PetscCall(BVSetActiveColumns(S,0,ml));
463: PetscCall(PetscArrayzero(pA,ml*ml));
464: PetscCall(MatCreateDense(PETSC_COMM_SELF,ml,ml,PETSC_DECIDE,PETSC_DECIDE,pA,&A));
465: PetscCall(BVOrthogonalize(S,A));
466: PetscCall(MatDestroy(&A));
468: /* SVD of first (M-1)*L diagonal block */
469: PetscCall(PetscBLASIntCast((M-1)*L,&m));
470: PetscCall(PetscMalloc5(m*m,&T,m*m,&R,m*m,&U,5*ml,&work,5*ml,&rwork));
471: for (j=0;j<m;j++) PetscCall(PetscArraycpy(R+j*m,pA+j*ml,m));
472: lwork = 5*m;
473: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
474: #if !defined (PETSC_USE_COMPLEX)
475: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&m,&m,R,&m,sigma,U,&m,NULL,&m,work,&lwork,&info));
476: #else
477: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&m,&m,R,&m,sigma,U,&m,NULL,&m,work,&lwork,rwork,&info));
478: #endif
479: SlepcCheckLapackInfo("gesvd",info);
480: PetscCall(PetscFPTrapPop());
481: rank = 0;
482: for (i=0;i<m;i++) {
483: if (sigma[i]/PetscMax(sigma[0],1)>delta) rank++;
484: }
485: if (rankout) *rankout = rank;
486: PetscCall(MatCreateDense(PETSC_COMM_SELF,m,m,PETSC_DECIDE,PETSC_DECIDE,U,&A));
487: PetscCall(BVSetActiveColumns(S,0,m));
488: PetscCall(BVMultInPlace(S,A,0,rank));
489: PetscCall(MatDestroy(&A));
490: /* Projected linear system */
491: /* m first columns of A have the right singular vectors */
492: PetscCall(PetscBLASIntCast(rank,&k_));
493: PetscCall(PetscBLASIntCast(ml,&lda));
494: PetscCallBLAS("BLASgemm",BLASgemm_("N","C",&m,&k_,&m,&sone,pA+L*lda,&lda,R,&m,&zero,T,&m));
495: PetscCall(PetscArrayzero(pA,ml*ml));
496: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&m,&sone,U,&m,T,&m,&zero,pA,&k_));
497: for (j=0;j<k_;j++) for (i=0;i<k_;i++) pA[j*k_+i] /= sigma[j];
498: PetscCall(PetscFree5(T,R,U,work,rwork));
499: PetscFunctionReturn(PETSC_SUCCESS);
500: }
502: /*@
503: BVSVDAndRank - Compute the SVD (left singular vectors only, and singular
504: values) and determine the numerical rank according to a tolerance.
506: Collective
508: Input Parameters:
509: + S - the basis vectors object
510: . m - the moment degree
511: . l - the block size
512: . delta - the tolerance used to determine the rank
513: - meth - the method to be used
515: Output Parameters:
516: + A - workspace, on output contains relevant values in the CAA method
517: . sigma - computed singular values
518: - rank - estimated rank (optional, pass `NULL` if not needed)
520: Notes:
521: This function computes the SVD $S=U\Sigma V^*$ and replaces $S$ with $U$.
522: The current implementation computes this via $S^*S$, and it may include
523: some kind of iterative refinement to improve accuracy in some cases.
525: The parameters `m` and `l` refer to the moment and block size of contour
526: integral methods. All columns up to `m*l` are modified, and the active
527: columns are set to `0..m*l`.
529: See `BVSVDMethod` for available methods.
531: The `A` workspace should be `m*l*m*l` in size.
533: Once the decomposition is computed, the numerical rank is estimated
534: by counting the number of singular values that are larger than the
535: tolerance `delta`, relative to the first singular value.
537: Level: developer
539: .seealso: [](sec:bv), `BVSetActiveColumns()`
540: @*/
541: PetscErrorCode BVSVDAndRank(BV S,PetscInt m,PetscInt l,PetscReal delta,BVSVDMethod meth,PetscScalar A[],PetscReal sigma[],PetscInt *rank)
542: {
543: PetscFunctionBegin;
549: PetscAssertPointer(A,6);
550: PetscAssertPointer(sigma,7);
552: PetscCall(PetscLogEventBegin(BV_SVDAndRank,S,0,0,0));
553: PetscCall(BVSetActiveColumns(S,0,m*l));
554: switch (meth) {
555: case BV_SVD_METHOD_REFINE:
556: PetscCall(BVSVDAndRank_Refine(S,delta,A,sigma,rank));
557: break;
558: case BV_SVD_METHOD_QR:
559: PetscCall(BVSVDAndRank_QR(S,delta,A,sigma,rank));
560: break;
561: case BV_SVD_METHOD_QR_CAA:
562: PetscCall(BVSVDAndRank_QR_CAA(S,m,l,delta,A,sigma,rank));
563: break;
564: }
565: PetscCall(PetscLogEventEnd(BV_SVDAndRank,S,0,0,0));
566: PetscFunctionReturn(PETSC_SUCCESS);
567: }
569: /*@
570: BVCISSResizeBases - Resize the bases involved in CISS solvers when the $L$ parameter grows.
572: Logically Collective
574: Input Parameters:
575: + S - basis of $L\cdot M$ columns
576: . V - basis of $L$ columns (may be associated to subcommunicators)
577: . Y - basis of `npoints`$\cdot L$ columns
578: . Lold - old value of $L$
579: . Lnew - new value of $L$
580: . M - the moment size
581: - npoints - number of integration points
583: Level: developer
585: .seealso: [](sec:bv), `BVResize()`
586: @*/
587: PetscErrorCode BVCISSResizeBases(BV S,BV V,BV Y,PetscInt Lold,PetscInt Lnew,PetscInt M,PetscInt npoints)
588: {
589: PetscInt i,j;
591: PetscFunctionBegin;
600: PetscCall(BVResize(S,Lnew*M,PETSC_FALSE));
601: PetscCall(BVResize(V,Lnew,PETSC_TRUE));
602: PetscCall(BVResize(Y,Lnew*npoints,PETSC_TRUE));
603: /* columns of Y are interleaved */
604: for (i=npoints-1;i>=0;i--) {
605: for (j=Lold-1;j>=0;j--) PetscCall(BVCopyColumn(Y,i*Lold+j,i*Lnew+j));
606: }
607: PetscFunctionReturn(PETSC_SUCCESS);
608: }