Actual source code: bvtensor.c

slepc-main 2024-11-15
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    Tensor BV that is represented in compact form as V = (I otimes U) S
 12: */

 14: #include <slepc/private/bvimpl.h>
 15: #include <slepcblaslapack.h>

 17: typedef struct {
 18:   BV          U;        /* first factor */
 19:   Mat         S;        /* second factor */
 20:   PetscScalar *qB;      /* auxiliary matrix used in non-standard inner products */
 21:   PetscScalar *sw;      /* work space */
 22:   PetscInt    d;        /* degree of the tensor BV */
 23:   PetscInt    ld;       /* leading dimension of a single block in S */
 24:   PetscInt    puk;      /* copy of the k value */
 25:   Vec         u;        /* auxiliary work vector */
 26: } BV_TENSOR;

 28: static PetscErrorCode BVMultInPlace_Tensor(BV V,Mat Q,PetscInt s,PetscInt e)
 29: {
 30:   BV_TENSOR         *ctx = (BV_TENSOR*)V->data;
 31:   PetscScalar       *pS;
 32:   const PetscScalar *q;
 33:   PetscInt          ldq,lds = ctx->ld*ctx->d;

 35:   PetscFunctionBegin;
 36:   PetscCall(MatDenseGetLDA(Q,&ldq));
 37:   PetscCall(MatDenseGetArray(ctx->S,&pS));
 38:   PetscCall(MatDenseGetArrayRead(Q,&q));
 39:   PetscCall(BVMultInPlace_BLAS_Private(V,lds,V->k-V->l,s-V->l,e-V->l,pS+(V->nc+V->l)*lds,lds,q+V->l*ldq+V->l,ldq,PETSC_FALSE));
 40:   PetscCall(MatDenseRestoreArrayRead(Q,&q));
 41:   PetscCall(MatDenseRestoreArray(ctx->S,&pS));
 42:   PetscFunctionReturn(PETSC_SUCCESS);
 43: }

 45: static PetscErrorCode BVMultInPlaceHermitianTranspose_Tensor(BV V,Mat Q,PetscInt s,PetscInt e)
 46: {
 47:   BV_TENSOR         *ctx = (BV_TENSOR*)V->data;
 48:   PetscScalar       *pS;
 49:   const PetscScalar *q;
 50:   PetscInt          ldq,lds = ctx->ld*ctx->d;

 52:   PetscFunctionBegin;
 53:   PetscCall(MatDenseGetLDA(Q,&ldq));
 54:   PetscCall(MatDenseGetArray(ctx->S,&pS));
 55:   PetscCall(MatDenseGetArrayRead(Q,&q));
 56:   PetscCall(BVMultInPlace_BLAS_Private(V,lds,V->k-V->l,s-V->l,e-V->l,pS+(V->nc+V->l)*lds,lds,q+V->l*ldq+V->l,ldq,PETSC_TRUE));
 57:   PetscCall(MatDenseRestoreArrayRead(Q,&q));
 58:   PetscCall(MatDenseRestoreArray(ctx->S,&pS));
 59:   PetscFunctionReturn(PETSC_SUCCESS);
 60: }

 62: static PetscErrorCode BVDot_Tensor(BV X,BV Y,Mat M)
 63: {
 64:   BV_TENSOR         *x = (BV_TENSOR*)X->data,*y = (BV_TENSOR*)Y->data;
 65:   PetscScalar       *m;
 66:   const PetscScalar *px,*py;
 67:   PetscInt          ldm,lds = x->ld*x->d;

 69:   PetscFunctionBegin;
 70:   PetscCheck(x->U==y->U,PetscObjectComm((PetscObject)X),PETSC_ERR_SUP,"BVDot() in BVTENSOR requires that both operands have the same U factor");
 71:   PetscCheck(lds==y->ld*y->d,PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_SIZ,"Mismatching dimensions ld*d %" PetscInt_FMT " %" PetscInt_FMT,lds,y->ld*y->d);
 72:   PetscCall(MatDenseGetLDA(M,&ldm));
 73:   PetscCall(MatDenseGetArrayRead(x->S,&px));
 74:   PetscCall(MatDenseGetArrayRead(y->S,&py));
 75:   PetscCall(MatDenseGetArray(M,&m));
 76:   PetscCall(BVDot_BLAS_Private(X,Y->k-Y->l,X->k-X->l,lds,py+(Y->nc+Y->l)*lds,lds,px+(X->nc+X->l)*lds,lds,m+X->l*ldm+Y->l,ldm,PETSC_FALSE));
 77:   PetscCall(MatDenseRestoreArray(M,&m));
 78:   PetscCall(MatDenseRestoreArrayRead(x->S,&px));
 79:   PetscCall(MatDenseRestoreArrayRead(y->S,&py));
 80:   PetscFunctionReturn(PETSC_SUCCESS);
 81: }

 83: static PetscErrorCode BVScale_Tensor(BV bv,PetscInt j,PetscScalar alpha)
 84: {
 85:   BV_TENSOR      *ctx = (BV_TENSOR*)bv->data;
 86:   PetscScalar    *pS;
 87:   PetscInt       lds = ctx->ld*ctx->d;

 89:   PetscFunctionBegin;
 90:   PetscCall(MatDenseGetArray(ctx->S,&pS));
 91:   if (PetscUnlikely(j<0)) PetscCall(BVScale_BLAS_Private(bv,(bv->k-bv->l)*lds,pS+(bv->nc+bv->l)*lds,alpha));
 92:   else PetscCall(BVScale_BLAS_Private(bv,lds,pS+(bv->nc+j)*lds,alpha));
 93:   PetscCall(MatDenseRestoreArray(ctx->S,&pS));
 94:   PetscFunctionReturn(PETSC_SUCCESS);
 95: }

 97: static PetscErrorCode BVNorm_Tensor(BV bv,PetscInt j,NormType type,PetscReal *val)
 98: {
 99:   BV_TENSOR         *ctx = (BV_TENSOR*)bv->data;
100:   const PetscScalar *pS;
101:   PetscInt          lds = ctx->ld*ctx->d;

103:   PetscFunctionBegin;
104:   PetscCall(MatDenseGetArrayRead(ctx->S,&pS));
105:   if (j<0) PetscCall(BVNorm_LAPACK_Private(bv,lds,bv->k-bv->l,pS+(bv->nc+bv->l)*lds,lds,type,val,PETSC_FALSE));
106:   else PetscCall(BVNorm_LAPACK_Private(bv,lds,1,pS+(bv->nc+j)*lds,lds,type,val,PETSC_FALSE));
107:   PetscCall(MatDenseRestoreArrayRead(ctx->S,&pS));
108:   PetscFunctionReturn(PETSC_SUCCESS);
109: }

111: static PetscErrorCode BVCopyColumn_Tensor(BV V,PetscInt j,PetscInt i)
112: {
113:   BV_TENSOR      *ctx = (BV_TENSOR*)V->data;
114:   PetscScalar    *pS;
115:   PetscInt       lds = ctx->ld*ctx->d;

117:   PetscFunctionBegin;
118:   PetscCall(MatDenseGetArray(ctx->S,&pS));
119:   PetscCall(PetscArraycpy(pS+(V->nc+i)*lds,pS+(V->nc+j)*lds,lds));
120:   PetscCall(MatDenseRestoreArray(ctx->S,&pS));
121:   PetscFunctionReturn(PETSC_SUCCESS);
122: }

124: static PetscErrorCode BVTensorNormColumn(BV bv,PetscInt j,PetscReal *norm)
125: {
126:   BV_TENSOR         *ctx = (BV_TENSOR*)bv->data;
127:   PetscBLASInt      one=1,lds_;
128:   PetscScalar       sone=1.0,szero=0.0,*x,dot;
129:   const PetscScalar *S;
130:   PetscReal         alpha=1.0,scale=0.0,aval;
131:   PetscInt          i,lds,ld=ctx->ld;

133:   PetscFunctionBegin;
134:   lds = ld*ctx->d;
135:   PetscCall(MatDenseGetArrayRead(ctx->S,&S));
136:   PetscCall(PetscBLASIntCast(lds,&lds_));
137:   if (PetscUnlikely(ctx->qB)) {
138:     x = ctx->sw;
139:     PetscCallBLAS("BLASgemv",BLASgemv_("N",&lds_,&lds_,&sone,ctx->qB,&lds_,S+j*lds,&one,&szero,x,&one));
140:     dot = PetscRealPart(BLASdot_(&lds_,S+j*lds,&one,x,&one));
141:     PetscCall(BV_SafeSqrt(bv,dot,norm));
142:   } else {
143:     /* Compute *norm = BLASnrm2_(&lds_,S+j*lds,&one); */
144:     if (lds==1) *norm = PetscAbsScalar(S[j*lds]);
145:     else {
146:       for (i=0;i<lds;i++) {
147:         aval = PetscAbsScalar(S[i+j*lds]);
148:         if (aval!=0.0) {
149:           if (PetscUnlikely(scale<aval)) {
150:             alpha = 1.0 + alpha*PetscSqr(scale/aval);
151:             scale = aval;
152:           } else alpha += PetscSqr(aval/scale);
153:         }
154:       }
155:       *norm = scale*PetscSqrtReal(alpha);
156:     }
157:   }
158:   PetscCall(MatDenseRestoreArrayRead(ctx->S,&S));
159:   PetscFunctionReturn(PETSC_SUCCESS);
160: }

162: static PetscErrorCode BVOrthogonalizeGS1_Tensor(BV bv,PetscInt k,Vec v,PetscBool *which,PetscScalar *h,PetscScalar *c,PetscReal *onorm,PetscReal *norm)
163: {
164:   BV_TENSOR         *ctx = (BV_TENSOR*)bv->data;
165:   PetscScalar       *pS,*cc,*x,dot,sonem=-1.0,sone=1.0,szero=0.0;
166:   PetscInt          i,lds = ctx->ld*ctx->d;
167:   PetscBLASInt      lds_,k_,one=1;
168:   const PetscScalar *omega;

170:   PetscFunctionBegin;
171:   PetscCheck(!v,PetscObjectComm((PetscObject)bv),PETSC_ERR_SUP,"Orthogonalization against an external vector is not allowed in BVTENSOR");
172:   PetscCall(MatDenseGetArray(ctx->S,&pS));
173:   if (!c) PetscCall(VecGetArray(bv->buffer,&cc));
174:   else cc = c;
175:   PetscCall(PetscBLASIntCast(lds,&lds_));
176:   PetscCall(PetscBLASIntCast(k,&k_));

178:   if (onorm) PetscCall(BVTensorNormColumn(bv,k,onorm));

180:   if (ctx->qB) x = ctx->sw;
181:   else x = pS+k*lds;

183:   if (PetscUnlikely(bv->orthog_type==BV_ORTHOG_MGS)) {  /* modified Gram-Schmidt */

185:     if (PetscUnlikely(bv->indef)) { /* signature */
186:       PetscCall(VecGetArrayRead(bv->omega,&omega));
187:     }
188:     for (i=-bv->nc;i<k;i++) {
189:       if (which && i>=0 && !which[i]) continue;
190:       if (ctx->qB) PetscCallBLAS("BLASgemv",BLASgemv_("N",&lds_,&lds_,&sone,ctx->qB,&lds_,pS+k*lds,&one,&szero,x,&one));
191:       /* c_i = (s_k, s_i) */
192:       dot = PetscRealPart(BLASdot_(&lds_,pS+i*lds,&one,x,&one));
193:       if (bv->indef) dot /= PetscRealPart(omega[i]);
194:       PetscCall(BV_SetValue(bv,i,0,cc,dot));
195:       /* s_k = s_k - c_i s_i */
196:       dot = -dot;
197:       PetscCallBLAS("BLASaxpy",BLASaxpy_(&lds_,&dot,pS+i*lds,&one,pS+k*lds,&one));
198:     }
199:     if (PetscUnlikely(bv->indef)) PetscCall(VecRestoreArrayRead(bv->omega,&omega));

201:   } else {  /* classical Gram-Schmidt */
202:     if (ctx->qB) PetscCallBLAS("BLASgemv",BLASgemv_("N",&lds_,&lds_,&sone,ctx->qB,&lds_,pS+k*lds,&one,&szero,x,&one));

204:     /* cc = S_{0:k-1}^* s_k */
205:     PetscCallBLAS("BLASgemv",BLASgemv_("C",&lds_,&k_,&sone,pS,&lds_,x,&one,&szero,cc,&one));

207:     /* s_k = s_k - S_{0:k-1} cc */
208:     if (PetscUnlikely(bv->indef)) PetscCall(BV_ApplySignature(bv,k,cc,PETSC_TRUE));
209:     PetscCallBLAS("BLASgemv",BLASgemv_("N",&lds_,&k_,&sonem,pS,&lds_,cc,&one,&sone,pS+k*lds,&one));
210:     if (PetscUnlikely(bv->indef)) PetscCall(BV_ApplySignature(bv,k,cc,PETSC_FALSE));
211:   }

213:   if (norm) PetscCall(BVTensorNormColumn(bv,k,norm));
214:   PetscCall(BV_AddCoefficients(bv,k,h,cc));
215:   PetscCall(MatDenseRestoreArray(ctx->S,&pS));
216:   PetscCall(VecRestoreArray(bv->buffer,&cc));
217:   PetscFunctionReturn(PETSC_SUCCESS);
218: }

220: static PetscErrorCode BVView_Tensor(BV bv,PetscViewer viewer)
221: {
222:   BV_TENSOR         *ctx = (BV_TENSOR*)bv->data;
223:   PetscViewerFormat format;
224:   PetscBool         isascii;
225:   const char        *bvname,*uname,*sname;

227:   PetscFunctionBegin;
228:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii));
229:   if (isascii) {
230:     PetscCall(PetscViewerGetFormat(viewer,&format));
231:     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
232:       PetscCall(PetscViewerASCIIPrintf(viewer,"number of tensor blocks (degree): %" PetscInt_FMT "\n",ctx->d));
233:       PetscCall(PetscViewerASCIIPrintf(viewer,"number of columns of U factor: %" PetscInt_FMT "\n",ctx->ld));
234:       PetscFunctionReturn(PETSC_SUCCESS);
235:     }
236:     PetscCall(BVView(ctx->U,viewer));
237:     PetscCall(MatView(ctx->S,viewer));
238:     if (format == PETSC_VIEWER_ASCII_MATLAB) {
239:       PetscCall(PetscObjectGetName((PetscObject)bv,&bvname));
240:       PetscCall(PetscObjectGetName((PetscObject)ctx->U,&uname));
241:       PetscCall(PetscObjectGetName((PetscObject)ctx->S,&sname));
242:       PetscCall(PetscViewerASCIIPrintf(viewer,"%s=kron(eye(%" PetscInt_FMT "),%s)*%s(:,1:%" PetscInt_FMT ");\n",bvname,ctx->d,uname,sname,bv->k));
243:     }
244:   } else {
245:     PetscCall(BVView(ctx->U,viewer));
246:     PetscCall(MatView(ctx->S,viewer));
247:   }
248:   PetscFunctionReturn(PETSC_SUCCESS);
249: }

251: static PetscErrorCode BVTensorUpdateMatrix(BV V,PetscInt ini,PetscInt end)
252: {
253:   BV_TENSOR      *ctx = (BV_TENSOR*)V->data;
254:   PetscInt       i,j,r,c,l,k,ld=ctx->ld,lds=ctx->d*ctx->ld;
255:   PetscScalar    *qB,*sqB;
256:   Vec            u;
257:   Mat            A;

259:   PetscFunctionBegin;
260:   if (!V->matrix) PetscFunctionReturn(PETSC_SUCCESS);
261:   l = ctx->U->l; k = ctx->U->k;
262:   /* update inner product matrix */
263:   if (!ctx->qB) {
264:     PetscCall(PetscCalloc2(lds*lds,&ctx->qB,lds,&ctx->sw));
265:     PetscCall(BVCreateVec(ctx->U,&ctx->u));
266:   }
267:   ctx->U->l = 0;
268:   for (r=0;r<ctx->d;r++) {
269:     for (c=0;c<=r;c++) {
270:       PetscCall(MatNestGetSubMat(V->matrix,r,c,&A));
271:       if (A) {
272:         qB = ctx->qB+c*ld*lds+r*ld;
273:         for (i=ini;i<end;i++) {
274:           PetscCall(BVGetColumn(ctx->U,i,&u));
275:           PetscCall(MatMult(A,u,ctx->u));
276:           ctx->U->k = i+1;
277:           PetscCall(BVDotVec(ctx->U,ctx->u,qB+i*lds));
278:           PetscCall(BVRestoreColumn(ctx->U,i,&u));
279:           for (j=0;j<i;j++) qB[i+j*lds] = PetscConj(qB[j+i*lds]);
280:           qB[i*lds+i] = PetscRealPart(qB[i+i*lds]);
281:         }
282:         if (PetscUnlikely(c!=r)) {
283:           sqB = ctx->qB+r*ld*lds+c*ld;
284:           for (i=ini;i<end;i++) for (j=0;j<=i;j++) {
285:             sqB[i+j*lds] = PetscConj(qB[j+i*lds]);
286:             sqB[j+i*lds] = qB[j+i*lds];
287:           }
288:         }
289:       }
290:     }
291:   }
292:   ctx->U->l = l; ctx->U->k = k;
293:   PetscFunctionReturn(PETSC_SUCCESS);
294: }

296: static PetscErrorCode BVTensorBuildFirstColumn_Tensor(BV V,PetscInt k)
297: {
298:   BV_TENSOR      *ctx = (BV_TENSOR*)V->data;
299:   PetscInt       i,nq=0;
300:   PetscScalar    *pS,*omega;
301:   PetscReal      norm;
302:   PetscBool      breakdown=PETSC_FALSE;

304:   PetscFunctionBegin;
305:   PetscCall(MatDenseGetArray(ctx->S,&pS));
306:   for (i=0;i<ctx->d;i++) {
307:     if (i>=k) PetscCall(BVSetRandomColumn(ctx->U,nq));
308:     else PetscCall(BVCopyColumn(ctx->U,i,nq));
309:     PetscCall(BVOrthogonalizeColumn(ctx->U,nq,pS+i*ctx->ld,&norm,&breakdown));
310:     if (!breakdown) {
311:       PetscCall(BVScaleColumn(ctx->U,nq,1.0/norm));
312:       pS[nq+i*ctx->ld] = norm;
313:       nq++;
314:     }
315:   }
316:   PetscCall(MatDenseRestoreArray(ctx->S,&pS));
317:   PetscCheck(nq,PetscObjectComm((PetscObject)V),PETSC_ERR_ARG_SIZ,"Cannot build first column of tensor BV; U should contain k=%" PetscInt_FMT " nonzero columns",k);
318:   PetscCall(BVTensorUpdateMatrix(V,0,nq));
319:   PetscCall(BVTensorNormColumn(V,0,&norm));
320:   PetscCall(BVScale_Tensor(V,0,1.0/norm));
321:   if (V->indef) {
322:     PetscCall(BV_AllocateSignature(V));
323:     PetscCall(VecGetArray(V->omega,&omega));
324:     omega[0] = (norm<0.0)? -1.0: 1.0;
325:     PetscCall(VecRestoreArray(V->omega,&omega));
326:   }
327:   /* set active columns */
328:   ctx->U->l = 0;
329:   ctx->U->k = nq;
330:   PetscFunctionReturn(PETSC_SUCCESS);
331: }

333: /*@
334:    BVTensorBuildFirstColumn - Builds the first column of the tensor basis vectors
335:    V from the data contained in the first k columns of U.

337:    Collective

339:    Input Parameters:
340: +  V - the basis vectors context
341: -  k - the number of columns of U with relevant information

343:    Notes:
344:    At most d columns are considered, where d is the degree of the tensor BV.
345:    Given V = (I otimes U) S, this function computes the first column of V, that
346:    is, it computes the coefficients of the first column of S by orthogonalizing
347:    the first d columns of U. If k is less than d (or linearly dependent columns
348:    are found) then additional random columns are used.

350:    The computed column has unit norm.

352:    Level: advanced

354: .seealso: BVCreateTensor()
355: @*/
356: PetscErrorCode BVTensorBuildFirstColumn(BV V,PetscInt k)
357: {
358:   PetscFunctionBegin;
361:   PetscUseMethod(V,"BVTensorBuildFirstColumn_C",(BV,PetscInt),(V,k));
362:   PetscFunctionReturn(PETSC_SUCCESS);
363: }

365: static PetscErrorCode BVTensorCompress_Tensor(BV V,PetscInt newc)
366: {
367:   BV_TENSOR      *ctx = (BV_TENSOR*)V->data;
368:   PetscInt       nwu=0,nnc,nrow,lwa,r,c;
369:   PetscInt       i,j,k,n,lds=ctx->ld*ctx->d,deg=ctx->d,lock,cs1=V->k,rs1=ctx->U->k,rk=0,offu;
370:   PetscScalar    *S,*M,*Z,*pQ,*SS,*SS2,t,sone=1.0,zero=0.0,mone=-1.0,*p,*tau,*work,*qB,*sqB;
371:   PetscReal      *sg,tol,*rwork;
372:   PetscBLASInt   ld_,cs1_,rs1_,cs1tdeg,n_,info,lw_,newc_,newctdeg,nnc_,nrow_,nnctdeg,lds_,rk_;
373:   Mat            Q,A;

375:   PetscFunctionBegin;
376:   if (!cs1) PetscFunctionReturn(PETSC_SUCCESS);
377:   lwa = 6*ctx->ld*lds+2*cs1;
378:   n = PetscMin(rs1,deg*cs1);
379:   lock = ctx->U->l;
380:   nnc = cs1-lock-newc;
381:   nrow = rs1-lock;
382:   PetscCall(PetscCalloc6(deg*newc*nnc,&SS,newc*nnc,&SS2,(rs1+lock+newc)*n,&pQ,deg*rs1,&tau,lwa,&work,6*n,&rwork));
383:   offu = lock*(rs1+1);
384:   M = work+nwu;
385:   nwu += rs1*cs1*deg;
386:   sg = rwork;
387:   Z = work+nwu;
388:   nwu += deg*cs1*n;
389:   PetscCall(PetscBLASIntCast(n,&n_));
390:   PetscCall(PetscBLASIntCast(nnc,&nnc_));
391:   PetscCall(PetscBLASIntCast(cs1,&cs1_));
392:   PetscCall(PetscBLASIntCast(rs1,&rs1_));
393:   PetscCall(PetscBLASIntCast(newc,&newc_));
394:   PetscCall(PetscBLASIntCast(newc*deg,&newctdeg));
395:   PetscCall(PetscBLASIntCast(nnc*deg,&nnctdeg));
396:   PetscCall(PetscBLASIntCast(cs1*deg,&cs1tdeg));
397:   PetscCall(PetscBLASIntCast(lwa-nwu,&lw_));
398:   PetscCall(PetscBLASIntCast(nrow,&nrow_));
399:   PetscCall(PetscBLASIntCast(lds,&lds_));
400:   PetscCall(MatDenseGetArray(ctx->S,&S));

402:   if (newc>0) {
403:     /* truncate columns associated with new converged eigenpairs */
404:     for (j=0;j<deg;j++) {
405:       for (i=lock;i<lock+newc;i++) PetscCall(PetscArraycpy(M+(i-lock+j*newc)*nrow,S+i*lds+j*ctx->ld+lock,nrow));
406:     }
407:     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
408: #if !defined (PETSC_USE_COMPLEX)
409:     PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&newctdeg,M,&nrow_,sg,pQ+offu,&rs1_,Z,&n_,work+nwu,&lw_,&info));
410: #else
411:     PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&newctdeg,M,&nrow_,sg,pQ+offu,&rs1_,Z,&n_,work+nwu,&lw_,rwork+n,&info));
412: #endif
413:     SlepcCheckLapackInfo("gesvd",info);
414:     PetscCall(PetscFPTrapPop());
415:     /* SVD has rank min(newc,nrow) */
416:     rk = PetscMin(newc,nrow);
417:     for (i=0;i<rk;i++) {
418:       t = sg[i];
419:       PetscCallBLAS("BLASscal",BLASscal_(&newctdeg,&t,Z+i,&n_));
420:     }
421:     for (i=0;i<deg;i++) {
422:       for (j=lock;j<lock+newc;j++) {
423:         PetscCall(PetscArraycpy(S+j*lds+i*ctx->ld+lock,Z+(newc*i+j-lock)*n,rk));
424:         PetscCall(PetscArrayzero(S+j*lds+i*ctx->ld+lock+rk,(ctx->ld-lock-rk)));
425:       }
426:     }
427:     /*
428:       update columns associated with non-converged vectors, orthogonalize
429:       against pQ so that next M has rank nnc+d-1 instead of nrow+d-1
430:     */
431:     for (i=0;i<deg;i++) {
432:       PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&newc_,&nnc_,&nrow_,&sone,pQ+offu,&rs1_,S+(lock+newc)*lds+i*ctx->ld+lock,&lds_,&zero,PetscSafePointerPlusOffset(SS,i*nnc*newc),&newc_));
433:       PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&nrow_,&nnc_,&newc_,&mone,pQ+offu,&rs1_,PetscSafePointerPlusOffset(SS,i*nnc*newc),&newc_,&sone,S+(lock+newc)*lds+i*ctx->ld+lock,&lds_));
434:       /* repeat orthogonalization step */
435:       PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&newc_,&nnc_,&nrow_,&sone,pQ+offu,&rs1_,S+(lock+newc)*lds+i*ctx->ld+lock,&lds_,&zero,SS2,&newc_));
436:       PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&nrow_,&nnc_,&newc_,&mone,pQ+offu,&rs1_,SS2,&newc_,&sone,S+(lock+newc)*lds+i*ctx->ld+lock,&lds_));
437:       for (j=0;j<newc*nnc;j++) *(SS+i*newc*nnc+j) += SS2[j];
438:     }
439:   }

441:   /* truncate columns associated with non-converged eigenpairs */
442:   for (j=0;j<deg;j++) {
443:     for (i=lock+newc;i<cs1;i++) PetscCall(PetscArraycpy(M+(i-lock-newc+j*nnc)*nrow,S+i*lds+j*ctx->ld+lock,nrow));
444:   }
445:   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
446: #if !defined (PETSC_USE_COMPLEX)
447:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&nnctdeg,M,&nrow_,sg,pQ+offu+newc*rs1,&rs1_,Z,&n_,work+nwu,&lw_,&info));
448: #else
449:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&nnctdeg,M,&nrow_,sg,pQ+offu+newc*rs1,&rs1_,Z,&n_,work+nwu,&lw_,rwork+n,&info));
450: #endif
451:   SlepcCheckLapackInfo("gesvd",info);
452:   PetscCall(PetscFPTrapPop());
453:   tol = PetscMax(rs1,deg*cs1)*PETSC_MACHINE_EPSILON*sg[0];
454:   rk = 0;
455:   for (i=0;i<PetscMin(nrow,nnctdeg);i++) if (sg[i]>tol) rk++;
456:   rk = PetscMin(nnc+deg-1,rk);
457:   /* the SVD has rank (at most) nnc+deg-1 */
458:   for (i=0;i<rk;i++) {
459:     t = sg[i];
460:     PetscCallBLAS("BLASscal",BLASscal_(&nnctdeg,&t,Z+i,&n_));
461:   }
462:   /* update S */
463:   PetscCall(PetscArrayzero(S+cs1*lds,(V->m-cs1)*lds));
464:   k = ctx->ld-lock-newc-rk;
465:   for (i=0;i<deg;i++) {
466:     for (j=lock+newc;j<cs1;j++) {
467:       PetscCall(PetscArraycpy(S+j*lds+i*ctx->ld+lock+newc,Z+(nnc*i+j-lock-newc)*n,rk));
468:       PetscCall(PetscArrayzero(S+j*lds+i*ctx->ld+lock+newc+rk,k));
469:     }
470:   }
471:   if (newc>0) {
472:     for (i=0;i<deg;i++) {
473:       p = PetscSafePointerPlusOffset(SS,i*nnc*newc);
474:       for (j=lock+newc;j<cs1;j++) {
475:         for (k=0;k<newc;k++) S[j*lds+i*ctx->ld+lock+k] = *(p++);
476:       }
477:     }
478:   }

480:   /* orthogonalize pQ */
481:   rk = rk+newc;
482:   PetscCall(PetscBLASIntCast(rk,&rk_));
483:   PetscCall(PetscBLASIntCast(cs1-lock,&nnc_));
484:   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
485:   PetscCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&nrow_,&rk_,pQ+offu,&rs1_,tau,work+nwu,&lw_,&info));
486:   SlepcCheckLapackInfo("geqrf",info);
487:   for (i=0;i<deg;i++) {
488:     PetscCallBLAS("BLAStrmm",BLAStrmm_("L","U","N","N",&rk_,&nnc_,&sone,pQ+offu,&rs1_,S+lock*lds+lock+i*ctx->ld,&lds_));
489:   }
490:   PetscCallBLAS("LAPACKorgqr",LAPACKorgqr_(&nrow_,&rk_,&rk_,pQ+offu,&rs1_,tau,work+nwu,&lw_,&info));
491:   SlepcCheckLapackInfo("orgqr",info);
492:   PetscCall(PetscFPTrapPop());

494:   /* update vectors U(:,idx) = U*Q(:,idx) */
495:   rk = rk+lock;
496:   for (i=0;i<lock;i++) pQ[i*(1+rs1)] = 1.0;
497:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,rs1,rk,pQ,&Q));
498:   ctx->U->k = rs1;
499:   PetscCall(BVMultInPlace(ctx->U,Q,lock,rk));
500:   PetscCall(MatDestroy(&Q));

502:   if (ctx->qB) {
503:    /* update matrix qB */
504:     PetscCall(PetscBLASIntCast(ctx->ld,&ld_));
505:     PetscCall(PetscBLASIntCast(rk,&rk_));
506:     for (r=0;r<ctx->d;r++) {
507:       for (c=0;c<=r;c++) {
508:         PetscCall(MatNestGetSubMat(V->matrix,r,c,&A));
509:         if (A) {
510:           qB = ctx->qB+r*ctx->ld+c*ctx->ld*lds;
511:           PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&rs1_,&rk_,&rs1_,&sone,qB,&lds_,pQ,&rs1_,&zero,work+nwu,&rs1_));
512:           PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&rk_,&rk_,&rs1_,&sone,pQ,&rs1_,work+nwu,&rs1_,&zero,qB,&lds_));
513:           for (i=0;i<rk;i++) {
514:             for (j=0;j<i;j++) qB[i+j*lds] = PetscConj(qB[j+i*lds]);
515:             qB[i+i*lds] = PetscRealPart(qB[i+i*lds]);
516:           }
517:           for (i=rk;i<ctx->ld;i++) PetscCall(PetscArrayzero(qB+i*lds,ctx->ld));
518:           for (i=0;i<rk;i++) PetscCall(PetscArrayzero(qB+i*lds+rk,(ctx->ld-rk)));
519:           if (c!=r) {
520:             sqB = ctx->qB+r*ctx->ld*lds+c*ctx->ld;
521:             for (i=0;i<ctx->ld;i++) for (j=0;j<ctx->ld;j++) sqB[i+j*lds] = PetscConj(qB[j+i*lds]);
522:           }
523:         }
524:       }
525:     }
526:   }

528:   /* free work space */
529:   PetscCall(PetscFree6(SS,SS2,pQ,tau,work,rwork));
530:   PetscCall(MatDenseRestoreArray(ctx->S,&S));

532:   /* set active columns */
533:   if (newc) ctx->U->l += newc;
534:   ctx->U->k = rk;
535:   PetscFunctionReturn(PETSC_SUCCESS);
536: }

538: /*@
539:    BVTensorCompress - Updates the U and S factors of the tensor basis vectors
540:    object V by means of an SVD, removing redundant information.

542:    Collective

544:    Input Parameters:
545: +  V - the tensor basis vectors context
546: -  newc - additional columns to be locked

548:    Notes:
549:    This function is typically used when restarting Krylov solvers. Truncating a
550:    tensor BV V = (I otimes U) S to its leading columns amounts to keeping the
551:    leading columns of S. However, to effectively reduce the size of the
552:    decomposition, it is necessary to compress it in a way that fewer columns of
553:    U are employed. This can be achieved by means of an update that involves the
554:    SVD of the low-rank matrix [S_0 S_1 ... S_{d-1}], where S_i are the pieces of S.

556:    If newc is nonzero, then newc columns are added to the leading columns of V.
557:    This means that the corresponding columns of the U and S factors will remain
558:    invariant in subsequent operations.

560:    Level: advanced

562: .seealso: BVCreateTensor(), BVSetActiveColumns()
563: @*/
564: PetscErrorCode BVTensorCompress(BV V,PetscInt newc)
565: {
566:   PetscFunctionBegin;
569:   PetscUseMethod(V,"BVTensorCompress_C",(BV,PetscInt),(V,newc));
570:   PetscFunctionReturn(PETSC_SUCCESS);
571: }

573: static PetscErrorCode BVTensorGetDegree_Tensor(BV bv,PetscInt *d)
574: {
575:   BV_TENSOR *ctx = (BV_TENSOR*)bv->data;

577:   PetscFunctionBegin;
578:   *d = ctx->d;
579:   PetscFunctionReturn(PETSC_SUCCESS);
580: }

582: /*@
583:    BVTensorGetDegree - Returns the number of blocks (degree) of the tensor BV.

585:    Not Collective

587:    Input Parameter:
588: .  bv - the basis vectors context

590:    Output Parameter:
591: .  d - the degree

593:    Level: advanced

595: .seealso: BVCreateTensor()
596: @*/
597: PetscErrorCode BVTensorGetDegree(BV bv,PetscInt *d)
598: {
599:   PetscFunctionBegin;
601:   PetscAssertPointer(d,2);
602:   PetscUseMethod(bv,"BVTensorGetDegree_C",(BV,PetscInt*),(bv,d));
603:   PetscFunctionReturn(PETSC_SUCCESS);
604: }

606: static PetscErrorCode BVTensorGetFactors_Tensor(BV V,BV *U,Mat *S)
607: {
608:   BV_TENSOR *ctx = (BV_TENSOR*)V->data;

610:   PetscFunctionBegin;
611:   PetscCheck(ctx->puk==-1,PetscObjectComm((PetscObject)V),PETSC_ERR_ORDER,"Previous call to BVTensonGetFactors without a BVTensorRestoreFactors call");
612:   ctx->puk = ctx->U->k;
613:   if (U) *U = ctx->U;
614:   if (S) *S = ctx->S;
615:   PetscFunctionReturn(PETSC_SUCCESS);
616: }

618: /*@
619:    BVTensorGetFactors - Returns the two factors involved in the definition of the
620:    tensor basis vectors object, V = (I otimes U) S.

622:    Logically Collective

624:    Input Parameter:
625: .  V - the basis vectors context

627:    Output Parameters:
628: +  U - the BV factor
629: -  S - the Mat factor

631:    Notes:
632:    The returned factors are references (not copies) of the internal factors,
633:    so modifying them will change the tensor BV as well. Some operations of the
634:    tensor BV assume that U has orthonormal columns, so if the user modifies U
635:    this restriction must be taken into account.

637:    The returned factors must not be destroyed. BVTensorRestoreFactors() must
638:    be called when they are no longer needed.

640:    Pass a NULL vector for any of the arguments that is not needed.

642:    Level: advanced

644: .seealso: BVTensorRestoreFactors()
645: @*/
646: PetscErrorCode BVTensorGetFactors(BV V,BV *U,Mat *S)
647: {
648:   PetscFunctionBegin;
650:   PetscUseMethod(V,"BVTensorGetFactors_C",(BV,BV*,Mat*),(V,U,S));
651:   PetscFunctionReturn(PETSC_SUCCESS);
652: }

654: static PetscErrorCode BVTensorRestoreFactors_Tensor(BV V,BV *U,Mat *S)
655: {
656:   BV_TENSOR      *ctx = (BV_TENSOR*)V->data;

658:   PetscFunctionBegin;
659:   PetscCall(PetscObjectStateIncrease((PetscObject)V));
660:   if (U) *U = NULL;
661:   if (S) *S = NULL;
662:   PetscCall(BVTensorUpdateMatrix(V,ctx->puk,ctx->U->k));
663:   ctx->puk = -1;
664:   PetscFunctionReturn(PETSC_SUCCESS);
665: }

667: /*@
668:    BVTensorRestoreFactors - Restore the two factors that were obtained with
669:    BVTensorGetFactors().

671:    Logically Collective

673:    Input Parameters:
674: +  V - the basis vectors context
675: .  U - the BV factor (or NULL)
676: -  S - the Mat factor (or NULL)

678:    Notes:
679:    The arguments must match the corresponding call to BVTensorGetFactors().

681:    Level: advanced

683: .seealso: BVTensorGetFactors()
684: @*/
685: PetscErrorCode BVTensorRestoreFactors(BV V,BV *U,Mat *S)
686: {
687:   PetscFunctionBegin;
691:   PetscUseMethod(V,"BVTensorRestoreFactors_C",(BV,BV*,Mat*),(V,U,S));
692:   PetscFunctionReturn(PETSC_SUCCESS);
693: }

695: static PetscErrorCode BVDestroy_Tensor(BV bv)
696: {
697:   BV_TENSOR      *ctx = (BV_TENSOR*)bv->data;

699:   PetscFunctionBegin;
700:   PetscCall(BVDestroy(&ctx->U));
701:   PetscCall(MatDestroy(&ctx->S));
702:   if (ctx->u) {
703:     PetscCall(PetscFree2(ctx->qB,ctx->sw));
704:     PetscCall(VecDestroy(&ctx->u));
705:   }
706:   PetscCall(PetscFree(bv->data));
707:   PetscCall(PetscObjectComposeFunction((PetscObject)bv,"BVTensorBuildFirstColumn_C",NULL));
708:   PetscCall(PetscObjectComposeFunction((PetscObject)bv,"BVTensorCompress_C",NULL));
709:   PetscCall(PetscObjectComposeFunction((PetscObject)bv,"BVTensorGetDegree_C",NULL));
710:   PetscCall(PetscObjectComposeFunction((PetscObject)bv,"BVTensorGetFactors_C",NULL));
711:   PetscCall(PetscObjectComposeFunction((PetscObject)bv,"BVTensorRestoreFactors_C",NULL));
712:   PetscFunctionReturn(PETSC_SUCCESS);
713: }

715: SLEPC_EXTERN PetscErrorCode BVCreate_Tensor(BV bv)
716: {
717:   BV_TENSOR      *ctx;

719:   PetscFunctionBegin;
720:   PetscCall(PetscNew(&ctx));
721:   bv->data = (void*)ctx;
722:   ctx->puk = -1;

724:   bv->ops->multinplace      = BVMultInPlace_Tensor;
725:   bv->ops->multinplacetrans = BVMultInPlaceHermitianTranspose_Tensor;
726:   bv->ops->dot              = BVDot_Tensor;
727:   bv->ops->scale            = BVScale_Tensor;
728:   bv->ops->norm             = BVNorm_Tensor;
729:   bv->ops->copycolumn       = BVCopyColumn_Tensor;
730:   bv->ops->gramschmidt      = BVOrthogonalizeGS1_Tensor;
731:   bv->ops->destroy          = BVDestroy_Tensor;
732:   bv->ops->view             = BVView_Tensor;

734:   PetscCall(PetscObjectComposeFunction((PetscObject)bv,"BVTensorBuildFirstColumn_C",BVTensorBuildFirstColumn_Tensor));
735:   PetscCall(PetscObjectComposeFunction((PetscObject)bv,"BVTensorCompress_C",BVTensorCompress_Tensor));
736:   PetscCall(PetscObjectComposeFunction((PetscObject)bv,"BVTensorGetDegree_C",BVTensorGetDegree_Tensor));
737:   PetscCall(PetscObjectComposeFunction((PetscObject)bv,"BVTensorGetFactors_C",BVTensorGetFactors_Tensor));
738:   PetscCall(PetscObjectComposeFunction((PetscObject)bv,"BVTensorRestoreFactors_C",BVTensorRestoreFactors_Tensor));
739:   PetscFunctionReturn(PETSC_SUCCESS);
740: }

742: /*@
743:    BVCreateTensor - Creates a tensor BV that is represented in compact form
744:    as V = (I otimes U) S, where U has orthonormal columns.

746:    Collective

748:    Input Parameters:
749: +  U - a basis vectors object
750: -  d - the number of blocks (degree) of the tensor BV

752:    Output Parameter:
753: .  V - the new basis vectors context

755:    Notes:
756:    The new basis vectors object is V = (I otimes U) S, where otimes denotes
757:    the Kronecker product, I is the identity matrix of order d, and S is a
758:    sequential matrix allocated internally. This compact representation is
759:    used e.g. to represent the Krylov basis generated with the linearization
760:    of a matrix polynomial of degree d.

762:    The size of V (number of rows) is equal to d times n, where n is the size
763:    of U. The dimensions of S are d times m rows and m-d+1 columns, where m is
764:    the number of columns of U, so m should be at least d.

766:    The communicator of V will be the same as U.

768:    On input, the content of U is irrelevant. Alternatively, it may contain
769:    some nonzero columns that will be used by BVTensorBuildFirstColumn().

771:    Level: advanced

773: .seealso: BVTensorGetDegree(), BVTensorGetFactors(), BVTensorBuildFirstColumn()
774: @*/
775: PetscErrorCode BVCreateTensor(BV U,PetscInt d,BV *V)
776: {
777:   PetscBool      match;
778:   PetscInt       n,N,m;
779:   VecType        vtype;
780:   BV_TENSOR      *ctx;

782:   PetscFunctionBegin;
785:   PetscCall(PetscObjectTypeCompare((PetscObject)U,BVTENSOR,&match));
786:   PetscCheck(!match,PetscObjectComm((PetscObject)U),PETSC_ERR_SUP,"U cannot be of type tensor");

788:   PetscCall(BVCreate(PetscObjectComm((PetscObject)U),V));
789:   PetscCall(BVGetSizes(U,&n,&N,&m));
790:   PetscCheck(m>=d,PetscObjectComm((PetscObject)U),PETSC_ERR_ARG_SIZ,"U has %" PetscInt_FMT " columns, it should have at least d=%" PetscInt_FMT,m,d);
791:   PetscCall(BVSetSizes(*V,d*n,d*N,m-d+1));
792:   PetscCall(BVGetVecType(U,&vtype));
793:   PetscCall(BVSetVecType(*V,vtype));
794:   PetscCall(PetscObjectChangeTypeName((PetscObject)*V,BVTENSOR));
795:   PetscCall(PetscLogEventBegin(BV_Create,*V,0,0,0));
796:   PetscCall(BVCreate_Tensor(*V));
797:   PetscCall(PetscLogEventEnd(BV_Create,*V,0,0,0));

799:   ctx = (BV_TENSOR*)(*V)->data;
800:   ctx->U  = U;
801:   ctx->d  = d;
802:   ctx->ld = m;
803:   PetscCall(PetscObjectReference((PetscObject)U));
804:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,d*m,m-d+1,NULL,&ctx->S));
805:   PetscCall(PetscObjectSetName((PetscObject)ctx->S,"S"));

807:   /* Copy user-provided attributes of U */
808:   (*V)->orthog_type  = U->orthog_type;
809:   (*V)->orthog_ref   = U->orthog_ref;
810:   (*V)->orthog_eta   = U->orthog_eta;
811:   (*V)->orthog_block = U->orthog_block;
812:   (*V)->vmm          = U->vmm;
813:   (*V)->rrandom      = U->rrandom;
814:   PetscFunctionReturn(PETSC_SUCCESS);
815: }