Actual source code: bvtensor.c
slepc-3.22.2 2024-12-02
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: Tensor BV that is represented in compact form as V = (I otimes U) S
12: */
14: #include <slepc/private/bvimpl.h>
15: #include <slepcblaslapack.h>
17: typedef struct {
18: BV U; /* first factor */
19: Mat S; /* second factor */
20: PetscScalar *qB; /* auxiliary matrix used in non-standard inner products */
21: PetscScalar *sw; /* work space */
22: PetscInt d; /* degree of the tensor BV */
23: PetscInt ld; /* leading dimension of a single block in S */
24: PetscInt puk; /* copy of the k value */
25: Vec u; /* auxiliary work vector */
26: } BV_TENSOR;
28: static PetscErrorCode BVMultInPlace_Tensor(BV V,Mat Q,PetscInt s,PetscInt e)
29: {
30: BV_TENSOR *ctx = (BV_TENSOR*)V->data;
31: PetscScalar *pS;
32: const PetscScalar *q;
33: PetscInt ldq,lds = ctx->ld*ctx->d;
35: PetscFunctionBegin;
36: PetscCall(MatDenseGetLDA(Q,&ldq));
37: PetscCall(MatDenseGetArray(ctx->S,&pS));
38: PetscCall(MatDenseGetArrayRead(Q,&q));
39: PetscCall(BVMultInPlace_BLAS_Private(V,lds,V->k-V->l,s-V->l,e-V->l,pS+(V->nc+V->l)*lds,lds,q+V->l*ldq+V->l,ldq,PETSC_FALSE));
40: PetscCall(MatDenseRestoreArrayRead(Q,&q));
41: PetscCall(MatDenseRestoreArray(ctx->S,&pS));
42: PetscFunctionReturn(PETSC_SUCCESS);
43: }
45: static PetscErrorCode BVMultInPlaceHermitianTranspose_Tensor(BV V,Mat Q,PetscInt s,PetscInt e)
46: {
47: BV_TENSOR *ctx = (BV_TENSOR*)V->data;
48: PetscScalar *pS;
49: const PetscScalar *q;
50: PetscInt ldq,lds = ctx->ld*ctx->d;
52: PetscFunctionBegin;
53: PetscCall(MatDenseGetLDA(Q,&ldq));
54: PetscCall(MatDenseGetArray(ctx->S,&pS));
55: PetscCall(MatDenseGetArrayRead(Q,&q));
56: PetscCall(BVMultInPlace_BLAS_Private(V,lds,V->k-V->l,s-V->l,e-V->l,pS+(V->nc+V->l)*lds,lds,q+V->l*ldq+V->l,ldq,PETSC_TRUE));
57: PetscCall(MatDenseRestoreArrayRead(Q,&q));
58: PetscCall(MatDenseRestoreArray(ctx->S,&pS));
59: PetscFunctionReturn(PETSC_SUCCESS);
60: }
62: static PetscErrorCode BVDot_Tensor(BV X,BV Y,Mat M)
63: {
64: BV_TENSOR *x = (BV_TENSOR*)X->data,*y = (BV_TENSOR*)Y->data;
65: PetscScalar *m;
66: const PetscScalar *px,*py;
67: PetscInt ldm,lds = x->ld*x->d;
69: PetscFunctionBegin;
70: PetscCheck(x->U==y->U,PetscObjectComm((PetscObject)X),PETSC_ERR_SUP,"BVDot() in BVTENSOR requires that both operands have the same U factor");
71: PetscCheck(lds==y->ld*y->d,PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_SIZ,"Mismatching dimensions ld*d %" PetscInt_FMT " %" PetscInt_FMT,lds,y->ld*y->d);
72: PetscCall(MatDenseGetLDA(M,&ldm));
73: PetscCall(MatDenseGetArrayRead(x->S,&px));
74: PetscCall(MatDenseGetArrayRead(y->S,&py));
75: PetscCall(MatDenseGetArray(M,&m));
76: PetscCall(BVDot_BLAS_Private(X,Y->k-Y->l,X->k-X->l,lds,py+(Y->nc+Y->l)*lds,lds,px+(X->nc+X->l)*lds,lds,m+X->l*ldm+Y->l,ldm,PETSC_FALSE));
77: PetscCall(MatDenseRestoreArray(M,&m));
78: PetscCall(MatDenseRestoreArrayRead(x->S,&px));
79: PetscCall(MatDenseRestoreArrayRead(y->S,&py));
80: PetscFunctionReturn(PETSC_SUCCESS);
81: }
83: static PetscErrorCode BVScale_Tensor(BV bv,PetscInt j,PetscScalar alpha)
84: {
85: BV_TENSOR *ctx = (BV_TENSOR*)bv->data;
86: PetscScalar *pS;
87: PetscInt lds = ctx->ld*ctx->d;
89: PetscFunctionBegin;
90: PetscCall(MatDenseGetArray(ctx->S,&pS));
91: if (PetscUnlikely(j<0)) PetscCall(BVScale_BLAS_Private(bv,(bv->k-bv->l)*lds,pS+(bv->nc+bv->l)*lds,alpha));
92: else PetscCall(BVScale_BLAS_Private(bv,lds,pS+(bv->nc+j)*lds,alpha));
93: PetscCall(MatDenseRestoreArray(ctx->S,&pS));
94: PetscFunctionReturn(PETSC_SUCCESS);
95: }
97: static PetscErrorCode BVNorm_Tensor(BV bv,PetscInt j,NormType type,PetscReal *val)
98: {
99: BV_TENSOR *ctx = (BV_TENSOR*)bv->data;
100: const PetscScalar *pS;
101: PetscInt lds = ctx->ld*ctx->d;
103: PetscFunctionBegin;
104: PetscCall(MatDenseGetArrayRead(ctx->S,&pS));
105: if (j<0) PetscCall(BVNorm_LAPACK_Private(bv,lds,bv->k-bv->l,pS+(bv->nc+bv->l)*lds,lds,type,val,PETSC_FALSE));
106: else PetscCall(BVNorm_LAPACK_Private(bv,lds,1,pS+(bv->nc+j)*lds,lds,type,val,PETSC_FALSE));
107: PetscCall(MatDenseRestoreArrayRead(ctx->S,&pS));
108: PetscFunctionReturn(PETSC_SUCCESS);
109: }
111: static PetscErrorCode BVCopyColumn_Tensor(BV V,PetscInt j,PetscInt i)
112: {
113: BV_TENSOR *ctx = (BV_TENSOR*)V->data;
114: PetscScalar *pS;
115: PetscInt lds = ctx->ld*ctx->d;
117: PetscFunctionBegin;
118: PetscCall(MatDenseGetArray(ctx->S,&pS));
119: PetscCall(PetscArraycpy(pS+(V->nc+i)*lds,pS+(V->nc+j)*lds,lds));
120: PetscCall(MatDenseRestoreArray(ctx->S,&pS));
121: PetscFunctionReturn(PETSC_SUCCESS);
122: }
124: static PetscErrorCode BVTensorNormColumn(BV bv,PetscInt j,PetscReal *norm)
125: {
126: BV_TENSOR *ctx = (BV_TENSOR*)bv->data;
127: PetscBLASInt one=1,lds_;
128: PetscScalar sone=1.0,szero=0.0,*x,dot;
129: const PetscScalar *S;
130: PetscReal alpha=1.0,scale=0.0,aval;
131: PetscInt i,lds,ld=ctx->ld;
133: PetscFunctionBegin;
134: lds = ld*ctx->d;
135: PetscCall(MatDenseGetArrayRead(ctx->S,&S));
136: PetscCall(PetscBLASIntCast(lds,&lds_));
137: if (PetscUnlikely(ctx->qB)) {
138: x = ctx->sw;
139: PetscCallBLAS("BLASgemv",BLASgemv_("N",&lds_,&lds_,&sone,ctx->qB,&lds_,S+j*lds,&one,&szero,x,&one));
140: dot = PetscRealPart(BLASdot_(&lds_,S+j*lds,&one,x,&one));
141: PetscCall(BV_SafeSqrt(bv,dot,norm));
142: } else {
143: /* Compute *norm = BLASnrm2_(&lds_,S+j*lds,&one); */
144: if (lds==1) *norm = PetscAbsScalar(S[j*lds]);
145: else {
146: for (i=0;i<lds;i++) {
147: aval = PetscAbsScalar(S[i+j*lds]);
148: if (aval!=0.0) {
149: if (PetscUnlikely(scale<aval)) {
150: alpha = 1.0 + alpha*PetscSqr(scale/aval);
151: scale = aval;
152: } else alpha += PetscSqr(aval/scale);
153: }
154: }
155: *norm = scale*PetscSqrtReal(alpha);
156: }
157: }
158: PetscCall(MatDenseRestoreArrayRead(ctx->S,&S));
159: PetscFunctionReturn(PETSC_SUCCESS);
160: }
162: static PetscErrorCode BVOrthogonalizeGS1_Tensor(BV bv,PetscInt k,Vec v,PetscBool *which,PetscScalar *h,PetscScalar *c,PetscReal *onorm,PetscReal *norm)
163: {
164: BV_TENSOR *ctx = (BV_TENSOR*)bv->data;
165: PetscScalar *pS,*cc,*x,dot,sonem=-1.0,sone=1.0,szero=0.0;
166: PetscInt i,lds = ctx->ld*ctx->d;
167: PetscBLASInt lds_,k_,one=1;
168: const PetscScalar *omega;
170: PetscFunctionBegin;
171: PetscCheck(!v,PetscObjectComm((PetscObject)bv),PETSC_ERR_SUP,"Orthogonalization against an external vector is not allowed in BVTENSOR");
172: PetscCall(MatDenseGetArray(ctx->S,&pS));
173: if (!c) PetscCall(VecGetArray(bv->buffer,&cc));
174: else cc = c;
175: PetscCall(PetscBLASIntCast(lds,&lds_));
176: PetscCall(PetscBLASIntCast(k,&k_));
178: if (onorm) PetscCall(BVTensorNormColumn(bv,k,onorm));
180: if (ctx->qB) x = ctx->sw;
181: else x = pS+k*lds;
183: if (PetscUnlikely(bv->orthog_type==BV_ORTHOG_MGS)) { /* modified Gram-Schmidt */
185: if (PetscUnlikely(bv->indef)) { /* signature */
186: PetscCall(VecGetArrayRead(bv->omega,&omega));
187: }
188: for (i=-bv->nc;i<k;i++) {
189: if (which && i>=0 && !which[i]) continue;
190: if (ctx->qB) PetscCallBLAS("BLASgemv",BLASgemv_("N",&lds_,&lds_,&sone,ctx->qB,&lds_,pS+k*lds,&one,&szero,x,&one));
191: /* c_i = (s_k, s_i) */
192: dot = PetscRealPart(BLASdot_(&lds_,pS+i*lds,&one,x,&one));
193: if (bv->indef) dot /= PetscRealPart(omega[i]);
194: PetscCall(BV_SetValue(bv,i,0,cc,dot));
195: /* s_k = s_k - c_i s_i */
196: dot = -dot;
197: PetscCallBLAS("BLASaxpy",BLASaxpy_(&lds_,&dot,pS+i*lds,&one,pS+k*lds,&one));
198: }
199: if (PetscUnlikely(bv->indef)) PetscCall(VecRestoreArrayRead(bv->omega,&omega));
201: } else { /* classical Gram-Schmidt */
202: if (ctx->qB) PetscCallBLAS("BLASgemv",BLASgemv_("N",&lds_,&lds_,&sone,ctx->qB,&lds_,pS+k*lds,&one,&szero,x,&one));
204: /* cc = S_{0:k-1}^* s_k */
205: PetscCallBLAS("BLASgemv",BLASgemv_("C",&lds_,&k_,&sone,pS,&lds_,x,&one,&szero,cc,&one));
207: /* s_k = s_k - S_{0:k-1} cc */
208: if (PetscUnlikely(bv->indef)) PetscCall(BV_ApplySignature(bv,k,cc,PETSC_TRUE));
209: PetscCallBLAS("BLASgemv",BLASgemv_("N",&lds_,&k_,&sonem,pS,&lds_,cc,&one,&sone,pS+k*lds,&one));
210: if (PetscUnlikely(bv->indef)) PetscCall(BV_ApplySignature(bv,k,cc,PETSC_FALSE));
211: }
213: if (norm) PetscCall(BVTensorNormColumn(bv,k,norm));
214: PetscCall(BV_AddCoefficients(bv,k,h,cc));
215: PetscCall(MatDenseRestoreArray(ctx->S,&pS));
216: PetscCall(VecRestoreArray(bv->buffer,&cc));
217: PetscFunctionReturn(PETSC_SUCCESS);
218: }
220: static PetscErrorCode BVView_Tensor(BV bv,PetscViewer viewer)
221: {
222: BV_TENSOR *ctx = (BV_TENSOR*)bv->data;
223: PetscViewerFormat format;
224: PetscBool isascii;
225: const char *bvname,*uname,*sname;
227: PetscFunctionBegin;
228: PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii));
229: if (isascii) {
230: PetscCall(PetscViewerGetFormat(viewer,&format));
231: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
232: PetscCall(PetscViewerASCIIPrintf(viewer,"number of tensor blocks (degree): %" PetscInt_FMT "\n",ctx->d));
233: PetscCall(PetscViewerASCIIPrintf(viewer,"number of columns of U factor: %" PetscInt_FMT "\n",ctx->ld));
234: PetscFunctionReturn(PETSC_SUCCESS);
235: }
236: PetscCall(BVView(ctx->U,viewer));
237: PetscCall(MatView(ctx->S,viewer));
238: if (format == PETSC_VIEWER_ASCII_MATLAB) {
239: PetscCall(PetscObjectGetName((PetscObject)bv,&bvname));
240: PetscCall(PetscObjectGetName((PetscObject)ctx->U,&uname));
241: PetscCall(PetscObjectGetName((PetscObject)ctx->S,&sname));
242: PetscCall(PetscViewerASCIIPrintf(viewer,"%s=kron(eye(%" PetscInt_FMT "),%s)*%s(:,1:%" PetscInt_FMT ");\n",bvname,ctx->d,uname,sname,bv->k));
243: }
244: } else {
245: PetscCall(BVView(ctx->U,viewer));
246: PetscCall(MatView(ctx->S,viewer));
247: }
248: PetscFunctionReturn(PETSC_SUCCESS);
249: }
251: static PetscErrorCode BVTensorUpdateMatrix(BV V,PetscInt ini,PetscInt end)
252: {
253: BV_TENSOR *ctx = (BV_TENSOR*)V->data;
254: PetscInt i,j,r,c,l,k,ld=ctx->ld,lds=ctx->d*ctx->ld;
255: PetscScalar *qB,*sqB;
256: Vec u;
257: Mat A;
259: PetscFunctionBegin;
260: if (!V->matrix) PetscFunctionReturn(PETSC_SUCCESS);
261: l = ctx->U->l; k = ctx->U->k;
262: /* update inner product matrix */
263: if (!ctx->qB) {
264: PetscCall(PetscCalloc2(lds*lds,&ctx->qB,lds,&ctx->sw));
265: PetscCall(BVCreateVec(ctx->U,&ctx->u));
266: }
267: ctx->U->l = 0;
268: for (r=0;r<ctx->d;r++) {
269: for (c=0;c<=r;c++) {
270: PetscCall(MatNestGetSubMat(V->matrix,r,c,&A));
271: if (A) {
272: qB = ctx->qB+c*ld*lds+r*ld;
273: for (i=ini;i<end;i++) {
274: PetscCall(BVGetColumn(ctx->U,i,&u));
275: PetscCall(MatMult(A,u,ctx->u));
276: ctx->U->k = i+1;
277: PetscCall(BVDotVec(ctx->U,ctx->u,qB+i*lds));
278: PetscCall(BVRestoreColumn(ctx->U,i,&u));
279: for (j=0;j<i;j++) qB[i+j*lds] = PetscConj(qB[j+i*lds]);
280: qB[i*lds+i] = PetscRealPart(qB[i+i*lds]);
281: }
282: if (PetscUnlikely(c!=r)) {
283: sqB = ctx->qB+r*ld*lds+c*ld;
284: for (i=ini;i<end;i++) for (j=0;j<=i;j++) {
285: sqB[i+j*lds] = PetscConj(qB[j+i*lds]);
286: sqB[j+i*lds] = qB[j+i*lds];
287: }
288: }
289: }
290: }
291: }
292: ctx->U->l = l; ctx->U->k = k;
293: PetscFunctionReturn(PETSC_SUCCESS);
294: }
296: static PetscErrorCode BVTensorBuildFirstColumn_Tensor(BV V,PetscInt k)
297: {
298: BV_TENSOR *ctx = (BV_TENSOR*)V->data;
299: PetscInt i,nq=0;
300: PetscScalar *pS,*omega;
301: PetscReal norm;
302: PetscBool breakdown=PETSC_FALSE;
304: PetscFunctionBegin;
305: PetscCall(MatDenseGetArray(ctx->S,&pS));
306: for (i=0;i<ctx->d;i++) {
307: if (i>=k) PetscCall(BVSetRandomColumn(ctx->U,nq));
308: else PetscCall(BVCopyColumn(ctx->U,i,nq));
309: PetscCall(BVOrthogonalizeColumn(ctx->U,nq,pS+i*ctx->ld,&norm,&breakdown));
310: if (!breakdown) {
311: PetscCall(BVScaleColumn(ctx->U,nq,1.0/norm));
312: pS[nq+i*ctx->ld] = norm;
313: nq++;
314: }
315: }
316: PetscCall(MatDenseRestoreArray(ctx->S,&pS));
317: PetscCheck(nq,PetscObjectComm((PetscObject)V),PETSC_ERR_ARG_SIZ,"Cannot build first column of tensor BV; U should contain k=%" PetscInt_FMT " nonzero columns",k);
318: PetscCall(BVTensorUpdateMatrix(V,0,nq));
319: PetscCall(BVTensorNormColumn(V,0,&norm));
320: PetscCall(BVScale_Tensor(V,0,1.0/norm));
321: if (V->indef) {
322: PetscCall(BV_AllocateSignature(V));
323: PetscCall(VecGetArray(V->omega,&omega));
324: omega[0] = (norm<0.0)? -1.0: 1.0;
325: PetscCall(VecRestoreArray(V->omega,&omega));
326: }
327: /* set active columns */
328: ctx->U->l = 0;
329: ctx->U->k = nq;
330: PetscFunctionReturn(PETSC_SUCCESS);
331: }
333: /*@
334: BVTensorBuildFirstColumn - Builds the first column of the tensor basis vectors
335: V from the data contained in the first k columns of U.
337: Collective
339: Input Parameters:
340: + V - the basis vectors context
341: - k - the number of columns of U with relevant information
343: Notes:
344: At most d columns are considered, where d is the degree of the tensor BV.
345: Given V = (I otimes U) S, this function computes the first column of V, that
346: is, it computes the coefficients of the first column of S by orthogonalizing
347: the first d columns of U. If k is less than d (or linearly dependent columns
348: are found) then additional random columns are used.
350: The computed column has unit norm.
352: Level: advanced
354: .seealso: BVCreateTensor()
355: @*/
356: PetscErrorCode BVTensorBuildFirstColumn(BV V,PetscInt k)
357: {
358: PetscFunctionBegin;
361: PetscUseMethod(V,"BVTensorBuildFirstColumn_C",(BV,PetscInt),(V,k));
362: PetscFunctionReturn(PETSC_SUCCESS);
363: }
365: static PetscErrorCode BVTensorCompress_Tensor(BV V,PetscInt newc)
366: {
367: BV_TENSOR *ctx = (BV_TENSOR*)V->data;
368: PetscInt nwu=0,nnc,nrow,lwa,r,c;
369: PetscInt i,j,k,n,lds=ctx->ld*ctx->d,deg=ctx->d,lock,cs1=V->k,rs1=ctx->U->k,rk=0,offu;
370: PetscScalar *S,*M,*Z,*pQ,*SS,*SS2,t,sone=1.0,zero=0.0,mone=-1.0,*p,*tau,*work,*qB,*sqB;
371: PetscReal *sg,tol,*rwork;
372: PetscBLASInt ld_,cs1_,rs1_,cs1tdeg,n_,info,lw_,newc_,newctdeg,nnc_,nrow_,nnctdeg,lds_,rk_;
373: Mat Q,A;
375: PetscFunctionBegin;
376: if (!cs1) PetscFunctionReturn(PETSC_SUCCESS);
377: lwa = 6*ctx->ld*lds+2*cs1;
378: n = PetscMin(rs1,deg*cs1);
379: lock = ctx->U->l;
380: nnc = cs1-lock-newc;
381: nrow = rs1-lock;
382: PetscCall(PetscCalloc6(deg*newc*nnc,&SS,newc*nnc,&SS2,(rs1+lock+newc)*n,&pQ,deg*rs1,&tau,lwa,&work,6*n,&rwork));
383: offu = lock*(rs1+1);
384: M = work+nwu;
385: nwu += rs1*cs1*deg;
386: sg = rwork;
387: Z = work+nwu;
388: nwu += deg*cs1*n;
389: PetscCall(PetscBLASIntCast(n,&n_));
390: PetscCall(PetscBLASIntCast(nnc,&nnc_));
391: PetscCall(PetscBLASIntCast(cs1,&cs1_));
392: PetscCall(PetscBLASIntCast(rs1,&rs1_));
393: PetscCall(PetscBLASIntCast(newc,&newc_));
394: PetscCall(PetscBLASIntCast(newc*deg,&newctdeg));
395: PetscCall(PetscBLASIntCast(nnc*deg,&nnctdeg));
396: PetscCall(PetscBLASIntCast(cs1*deg,&cs1tdeg));
397: PetscCall(PetscBLASIntCast(lwa-nwu,&lw_));
398: PetscCall(PetscBLASIntCast(nrow,&nrow_));
399: PetscCall(PetscBLASIntCast(lds,&lds_));
400: PetscCall(MatDenseGetArray(ctx->S,&S));
402: if (newc>0) {
403: /* truncate columns associated with new converged eigenpairs */
404: for (j=0;j<deg;j++) {
405: for (i=lock;i<lock+newc;i++) PetscCall(PetscArraycpy(M+(i-lock+j*newc)*nrow,S+i*lds+j*ctx->ld+lock,nrow));
406: }
407: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
408: #if !defined (PETSC_USE_COMPLEX)
409: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&newctdeg,M,&nrow_,sg,pQ+offu,&rs1_,Z,&n_,work+nwu,&lw_,&info));
410: #else
411: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&newctdeg,M,&nrow_,sg,pQ+offu,&rs1_,Z,&n_,work+nwu,&lw_,rwork+n,&info));
412: #endif
413: SlepcCheckLapackInfo("gesvd",info);
414: PetscCall(PetscFPTrapPop());
415: /* SVD has rank min(newc,nrow) */
416: rk = PetscMin(newc,nrow);
417: for (i=0;i<rk;i++) {
418: t = sg[i];
419: PetscCallBLAS("BLASscal",BLASscal_(&newctdeg,&t,Z+i,&n_));
420: }
421: for (i=0;i<deg;i++) {
422: for (j=lock;j<lock+newc;j++) {
423: PetscCall(PetscArraycpy(S+j*lds+i*ctx->ld+lock,Z+(newc*i+j-lock)*n,rk));
424: PetscCall(PetscArrayzero(S+j*lds+i*ctx->ld+lock+rk,(ctx->ld-lock-rk)));
425: }
426: }
427: /*
428: update columns associated with non-converged vectors, orthogonalize
429: against pQ so that next M has rank nnc+d-1 instead of nrow+d-1
430: */
431: for (i=0;i<deg;i++) {
432: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&newc_,&nnc_,&nrow_,&sone,pQ+offu,&rs1_,S+(lock+newc)*lds+i*ctx->ld+lock,&lds_,&zero,PetscSafePointerPlusOffset(SS,i*nnc*newc),&newc_));
433: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&nrow_,&nnc_,&newc_,&mone,pQ+offu,&rs1_,PetscSafePointerPlusOffset(SS,i*nnc*newc),&newc_,&sone,S+(lock+newc)*lds+i*ctx->ld+lock,&lds_));
434: /* repeat orthogonalization step */
435: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&newc_,&nnc_,&nrow_,&sone,pQ+offu,&rs1_,S+(lock+newc)*lds+i*ctx->ld+lock,&lds_,&zero,SS2,&newc_));
436: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&nrow_,&nnc_,&newc_,&mone,pQ+offu,&rs1_,SS2,&newc_,&sone,S+(lock+newc)*lds+i*ctx->ld+lock,&lds_));
437: for (j=0;j<newc*nnc;j++) *(SS+i*newc*nnc+j) += SS2[j];
438: }
439: }
441: /* truncate columns associated with non-converged eigenpairs */
442: for (j=0;j<deg;j++) {
443: for (i=lock+newc;i<cs1;i++) PetscCall(PetscArraycpy(M+(i-lock-newc+j*nnc)*nrow,S+i*lds+j*ctx->ld+lock,nrow));
444: }
445: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
446: #if !defined (PETSC_USE_COMPLEX)
447: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&nnctdeg,M,&nrow_,sg,pQ+offu+newc*rs1,&rs1_,Z,&n_,work+nwu,&lw_,&info));
448: #else
449: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&nnctdeg,M,&nrow_,sg,pQ+offu+newc*rs1,&rs1_,Z,&n_,work+nwu,&lw_,rwork+n,&info));
450: #endif
451: SlepcCheckLapackInfo("gesvd",info);
452: PetscCall(PetscFPTrapPop());
453: tol = PetscMax(rs1,deg*cs1)*PETSC_MACHINE_EPSILON*sg[0];
454: rk = 0;
455: for (i=0;i<PetscMin(nrow,nnctdeg);i++) if (sg[i]>tol) rk++;
456: rk = PetscMin(nnc+deg-1,rk);
457: /* the SVD has rank (at most) nnc+deg-1 */
458: for (i=0;i<rk;i++) {
459: t = sg[i];
460: PetscCallBLAS("BLASscal",BLASscal_(&nnctdeg,&t,Z+i,&n_));
461: }
462: /* update S */
463: PetscCall(PetscArrayzero(S+cs1*lds,(V->m-cs1)*lds));
464: k = ctx->ld-lock-newc-rk;
465: for (i=0;i<deg;i++) {
466: for (j=lock+newc;j<cs1;j++) {
467: PetscCall(PetscArraycpy(S+j*lds+i*ctx->ld+lock+newc,Z+(nnc*i+j-lock-newc)*n,rk));
468: PetscCall(PetscArrayzero(S+j*lds+i*ctx->ld+lock+newc+rk,k));
469: }
470: }
471: if (newc>0) {
472: for (i=0;i<deg;i++) {
473: p = PetscSafePointerPlusOffset(SS,i*nnc*newc);
474: for (j=lock+newc;j<cs1;j++) {
475: for (k=0;k<newc;k++) S[j*lds+i*ctx->ld+lock+k] = *(p++);
476: }
477: }
478: }
480: /* orthogonalize pQ */
481: rk = rk+newc;
482: PetscCall(PetscBLASIntCast(rk,&rk_));
483: PetscCall(PetscBLASIntCast(cs1-lock,&nnc_));
484: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
485: PetscCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&nrow_,&rk_,pQ+offu,&rs1_,tau,work+nwu,&lw_,&info));
486: SlepcCheckLapackInfo("geqrf",info);
487: for (i=0;i<deg;i++) {
488: PetscCallBLAS("BLAStrmm",BLAStrmm_("L","U","N","N",&rk_,&nnc_,&sone,pQ+offu,&rs1_,S+lock*lds+lock+i*ctx->ld,&lds_));
489: }
490: PetscCallBLAS("LAPACKorgqr",LAPACKorgqr_(&nrow_,&rk_,&rk_,pQ+offu,&rs1_,tau,work+nwu,&lw_,&info));
491: SlepcCheckLapackInfo("orgqr",info);
492: PetscCall(PetscFPTrapPop());
494: /* update vectors U(:,idx) = U*Q(:,idx) */
495: rk = rk+lock;
496: for (i=0;i<lock;i++) pQ[i*(1+rs1)] = 1.0;
497: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,rs1,rk,pQ,&Q));
498: ctx->U->k = rs1;
499: PetscCall(BVMultInPlace(ctx->U,Q,lock,rk));
500: PetscCall(MatDestroy(&Q));
502: if (ctx->qB) {
503: /* update matrix qB */
504: PetscCall(PetscBLASIntCast(ctx->ld,&ld_));
505: PetscCall(PetscBLASIntCast(rk,&rk_));
506: for (r=0;r<ctx->d;r++) {
507: for (c=0;c<=r;c++) {
508: PetscCall(MatNestGetSubMat(V->matrix,r,c,&A));
509: if (A) {
510: qB = ctx->qB+r*ctx->ld+c*ctx->ld*lds;
511: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&rs1_,&rk_,&rs1_,&sone,qB,&lds_,pQ,&rs1_,&zero,work+nwu,&rs1_));
512: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&rk_,&rk_,&rs1_,&sone,pQ,&rs1_,work+nwu,&rs1_,&zero,qB,&lds_));
513: for (i=0;i<rk;i++) {
514: for (j=0;j<i;j++) qB[i+j*lds] = PetscConj(qB[j+i*lds]);
515: qB[i+i*lds] = PetscRealPart(qB[i+i*lds]);
516: }
517: for (i=rk;i<ctx->ld;i++) PetscCall(PetscArrayzero(qB+i*lds,ctx->ld));
518: for (i=0;i<rk;i++) PetscCall(PetscArrayzero(qB+i*lds+rk,(ctx->ld-rk)));
519: if (c!=r) {
520: sqB = ctx->qB+r*ctx->ld*lds+c*ctx->ld;
521: for (i=0;i<ctx->ld;i++) for (j=0;j<ctx->ld;j++) sqB[i+j*lds] = PetscConj(qB[j+i*lds]);
522: }
523: }
524: }
525: }
526: }
528: /* free work space */
529: PetscCall(PetscFree6(SS,SS2,pQ,tau,work,rwork));
530: PetscCall(MatDenseRestoreArray(ctx->S,&S));
532: /* set active columns */
533: if (newc) ctx->U->l += newc;
534: ctx->U->k = rk;
535: PetscFunctionReturn(PETSC_SUCCESS);
536: }
538: /*@
539: BVTensorCompress - Updates the U and S factors of the tensor basis vectors
540: object V by means of an SVD, removing redundant information.
542: Collective
544: Input Parameters:
545: + V - the tensor basis vectors context
546: - newc - additional columns to be locked
548: Notes:
549: This function is typically used when restarting Krylov solvers. Truncating a
550: tensor BV V = (I otimes U) S to its leading columns amounts to keeping the
551: leading columns of S. However, to effectively reduce the size of the
552: decomposition, it is necessary to compress it in a way that fewer columns of
553: U are employed. This can be achieved by means of an update that involves the
554: SVD of the low-rank matrix [S_0 S_1 ... S_{d-1}], where S_i are the pieces of S.
556: If newc is nonzero, then newc columns are added to the leading columns of V.
557: This means that the corresponding columns of the U and S factors will remain
558: invariant in subsequent operations.
560: Level: advanced
562: .seealso: BVCreateTensor(), BVSetActiveColumns()
563: @*/
564: PetscErrorCode BVTensorCompress(BV V,PetscInt newc)
565: {
566: PetscFunctionBegin;
569: PetscUseMethod(V,"BVTensorCompress_C",(BV,PetscInt),(V,newc));
570: PetscFunctionReturn(PETSC_SUCCESS);
571: }
573: static PetscErrorCode BVTensorGetDegree_Tensor(BV bv,PetscInt *d)
574: {
575: BV_TENSOR *ctx = (BV_TENSOR*)bv->data;
577: PetscFunctionBegin;
578: *d = ctx->d;
579: PetscFunctionReturn(PETSC_SUCCESS);
580: }
582: /*@
583: BVTensorGetDegree - Returns the number of blocks (degree) of the tensor BV.
585: Not Collective
587: Input Parameter:
588: . bv - the basis vectors context
590: Output Parameter:
591: . d - the degree
593: Level: advanced
595: .seealso: BVCreateTensor()
596: @*/
597: PetscErrorCode BVTensorGetDegree(BV bv,PetscInt *d)
598: {
599: PetscFunctionBegin;
601: PetscAssertPointer(d,2);
602: PetscUseMethod(bv,"BVTensorGetDegree_C",(BV,PetscInt*),(bv,d));
603: PetscFunctionReturn(PETSC_SUCCESS);
604: }
606: static PetscErrorCode BVTensorGetFactors_Tensor(BV V,BV *U,Mat *S)
607: {
608: BV_TENSOR *ctx = (BV_TENSOR*)V->data;
610: PetscFunctionBegin;
611: PetscCheck(ctx->puk==-1,PetscObjectComm((PetscObject)V),PETSC_ERR_ORDER,"Previous call to BVTensonGetFactors without a BVTensorRestoreFactors call");
612: ctx->puk = ctx->U->k;
613: if (U) *U = ctx->U;
614: if (S) *S = ctx->S;
615: PetscFunctionReturn(PETSC_SUCCESS);
616: }
618: /*@
619: BVTensorGetFactors - Returns the two factors involved in the definition of the
620: tensor basis vectors object, V = (I otimes U) S.
622: Logically Collective
624: Input Parameter:
625: . V - the basis vectors context
627: Output Parameters:
628: + U - the BV factor
629: - S - the Mat factor
631: Notes:
632: The returned factors are references (not copies) of the internal factors,
633: so modifying them will change the tensor BV as well. Some operations of the
634: tensor BV assume that U has orthonormal columns, so if the user modifies U
635: this restriction must be taken into account.
637: The returned factors must not be destroyed. BVTensorRestoreFactors() must
638: be called when they are no longer needed.
640: Pass a NULL vector for any of the arguments that is not needed.
642: Level: advanced
644: .seealso: BVTensorRestoreFactors()
645: @*/
646: PetscErrorCode BVTensorGetFactors(BV V,BV *U,Mat *S)
647: {
648: PetscFunctionBegin;
650: PetscUseMethod(V,"BVTensorGetFactors_C",(BV,BV*,Mat*),(V,U,S));
651: PetscFunctionReturn(PETSC_SUCCESS);
652: }
654: static PetscErrorCode BVTensorRestoreFactors_Tensor(BV V,BV *U,Mat *S)
655: {
656: BV_TENSOR *ctx = (BV_TENSOR*)V->data;
658: PetscFunctionBegin;
659: PetscCall(PetscObjectStateIncrease((PetscObject)V));
660: if (U) *U = NULL;
661: if (S) *S = NULL;
662: PetscCall(BVTensorUpdateMatrix(V,ctx->puk,ctx->U->k));
663: ctx->puk = -1;
664: PetscFunctionReturn(PETSC_SUCCESS);
665: }
667: /*@
668: BVTensorRestoreFactors - Restore the two factors that were obtained with
669: BVTensorGetFactors().
671: Logically Collective
673: Input Parameters:
674: + V - the basis vectors context
675: . U - the BV factor (or NULL)
676: - S - the Mat factor (or NULL)
678: Notes:
679: The arguments must match the corresponding call to BVTensorGetFactors().
681: Level: advanced
683: .seealso: BVTensorGetFactors()
684: @*/
685: PetscErrorCode BVTensorRestoreFactors(BV V,BV *U,Mat *S)
686: {
687: PetscFunctionBegin;
691: PetscUseMethod(V,"BVTensorRestoreFactors_C",(BV,BV*,Mat*),(V,U,S));
692: PetscFunctionReturn(PETSC_SUCCESS);
693: }
695: static PetscErrorCode BVDestroy_Tensor(BV bv)
696: {
697: BV_TENSOR *ctx = (BV_TENSOR*)bv->data;
699: PetscFunctionBegin;
700: PetscCall(BVDestroy(&ctx->U));
701: PetscCall(MatDestroy(&ctx->S));
702: if (ctx->u) {
703: PetscCall(PetscFree2(ctx->qB,ctx->sw));
704: PetscCall(VecDestroy(&ctx->u));
705: }
706: PetscCall(PetscFree(bv->data));
707: PetscCall(PetscObjectComposeFunction((PetscObject)bv,"BVTensorBuildFirstColumn_C",NULL));
708: PetscCall(PetscObjectComposeFunction((PetscObject)bv,"BVTensorCompress_C",NULL));
709: PetscCall(PetscObjectComposeFunction((PetscObject)bv,"BVTensorGetDegree_C",NULL));
710: PetscCall(PetscObjectComposeFunction((PetscObject)bv,"BVTensorGetFactors_C",NULL));
711: PetscCall(PetscObjectComposeFunction((PetscObject)bv,"BVTensorRestoreFactors_C",NULL));
712: PetscFunctionReturn(PETSC_SUCCESS);
713: }
715: SLEPC_EXTERN PetscErrorCode BVCreate_Tensor(BV bv)
716: {
717: BV_TENSOR *ctx;
719: PetscFunctionBegin;
720: PetscCall(PetscNew(&ctx));
721: bv->data = (void*)ctx;
722: ctx->puk = -1;
724: bv->ops->multinplace = BVMultInPlace_Tensor;
725: bv->ops->multinplacetrans = BVMultInPlaceHermitianTranspose_Tensor;
726: bv->ops->dot = BVDot_Tensor;
727: bv->ops->scale = BVScale_Tensor;
728: bv->ops->norm = BVNorm_Tensor;
729: bv->ops->copycolumn = BVCopyColumn_Tensor;
730: bv->ops->gramschmidt = BVOrthogonalizeGS1_Tensor;
731: bv->ops->destroy = BVDestroy_Tensor;
732: bv->ops->view = BVView_Tensor;
734: PetscCall(PetscObjectComposeFunction((PetscObject)bv,"BVTensorBuildFirstColumn_C",BVTensorBuildFirstColumn_Tensor));
735: PetscCall(PetscObjectComposeFunction((PetscObject)bv,"BVTensorCompress_C",BVTensorCompress_Tensor));
736: PetscCall(PetscObjectComposeFunction((PetscObject)bv,"BVTensorGetDegree_C",BVTensorGetDegree_Tensor));
737: PetscCall(PetscObjectComposeFunction((PetscObject)bv,"BVTensorGetFactors_C",BVTensorGetFactors_Tensor));
738: PetscCall(PetscObjectComposeFunction((PetscObject)bv,"BVTensorRestoreFactors_C",BVTensorRestoreFactors_Tensor));
739: PetscFunctionReturn(PETSC_SUCCESS);
740: }
742: /*@
743: BVCreateTensor - Creates a tensor BV that is represented in compact form
744: as V = (I otimes U) S, where U has orthonormal columns.
746: Collective
748: Input Parameters:
749: + U - a basis vectors object
750: - d - the number of blocks (degree) of the tensor BV
752: Output Parameter:
753: . V - the new basis vectors context
755: Notes:
756: The new basis vectors object is V = (I otimes U) S, where otimes denotes
757: the Kronecker product, I is the identity matrix of order d, and S is a
758: sequential matrix allocated internally. This compact representation is
759: used e.g. to represent the Krylov basis generated with the linearization
760: of a matrix polynomial of degree d.
762: The size of V (number of rows) is equal to d times n, where n is the size
763: of U. The dimensions of S are d times m rows and m-d+1 columns, where m is
764: the number of columns of U, so m should be at least d.
766: The communicator of V will be the same as U.
768: On input, the content of U is irrelevant. Alternatively, it may contain
769: some nonzero columns that will be used by BVTensorBuildFirstColumn().
771: Level: advanced
773: .seealso: BVTensorGetDegree(), BVTensorGetFactors(), BVTensorBuildFirstColumn()
774: @*/
775: PetscErrorCode BVCreateTensor(BV U,PetscInt d,BV *V)
776: {
777: PetscBool match;
778: PetscInt n,N,m;
779: VecType vtype;
780: BV_TENSOR *ctx;
782: PetscFunctionBegin;
785: PetscCall(PetscObjectTypeCompare((PetscObject)U,BVTENSOR,&match));
786: PetscCheck(!match,PetscObjectComm((PetscObject)U),PETSC_ERR_SUP,"U cannot be of type tensor");
788: PetscCall(BVCreate(PetscObjectComm((PetscObject)U),V));
789: PetscCall(BVGetSizes(U,&n,&N,&m));
790: PetscCheck(m>=d,PetscObjectComm((PetscObject)U),PETSC_ERR_ARG_SIZ,"U has %" PetscInt_FMT " columns, it should have at least d=%" PetscInt_FMT,m,d);
791: PetscCall(BVSetSizes(*V,d*n,d*N,m-d+1));
792: PetscCall(BVGetVecType(U,&vtype));
793: PetscCall(BVSetVecType(*V,vtype));
794: PetscCall(PetscObjectChangeTypeName((PetscObject)*V,BVTENSOR));
795: PetscCall(PetscLogEventBegin(BV_Create,*V,0,0,0));
796: PetscCall(BVCreate_Tensor(*V));
797: PetscCall(PetscLogEventEnd(BV_Create,*V,0,0,0));
799: ctx = (BV_TENSOR*)(*V)->data;
800: ctx->U = U;
801: ctx->d = d;
802: ctx->ld = m;
803: PetscCall(PetscObjectReference((PetscObject)U));
804: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,d*m,m-d+1,NULL,&ctx->S));
805: PetscCall(PetscObjectSetName((PetscObject)ctx->S,"S"));
807: /* Copy user-provided attributes of U */
808: (*V)->orthog_type = U->orthog_type;
809: (*V)->orthog_ref = U->orthog_ref;
810: (*V)->orthog_eta = U->orthog_eta;
811: (*V)->orthog_block = U->orthog_block;
812: (*V)->vmm = U->vmm;
813: (*V)->rrandom = U->rrandom;
814: PetscFunctionReturn(PETSC_SUCCESS);
815: }