Actual source code: sveccuda.cu

slepc-3.21.1 2024-04-26
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    BV implemented as a single Vec (CUDA version)
 12: */

 14: #include <slepc/private/bvimpl.h>
 15: #include <slepccublas.h>
 16: #include "../src/sys/classes/bv/impls/svec/svec.h"

 18: PetscErrorCode BVMult_Svec_CUDA(BV Y,PetscScalar alpha,PetscScalar beta,BV X,Mat Q)
 19: {
 20:   BV_SVEC           *y = (BV_SVEC*)Y->data,*x = (BV_SVEC*)X->data;
 21:   const PetscScalar *d_px,*d_A,*d_B,*d_q;
 22:   PetscScalar       *d_py,*d_C;
 23:   PetscInt          ldq;

 25:   PetscFunctionBegin;
 26:   if (!Y->n) PetscFunctionReturn(PETSC_SUCCESS);
 27:   PetscCall(VecCUDAGetArrayRead(x->v,&d_px));
 28:   if (beta==(PetscScalar)0.0) PetscCall(VecCUDAGetArrayWrite(y->v,&d_py));
 29:   else PetscCall(VecCUDAGetArray(y->v,&d_py));
 30:   d_A = d_px+(X->nc+X->l)*X->ld;
 31:   d_C = d_py+(Y->nc+Y->l)*Y->ld;
 32:   if (Q) {
 33:     PetscCall(MatDenseGetLDA(Q,&ldq));
 34:     PetscCall(BV_MatDenseCUDAGetArrayRead(Y,Q,&d_q));
 35:     d_B = d_q+Y->l*ldq+X->l;
 36:     PetscCall(BVMult_BLAS_CUDA(Y,Y->n,Y->k-Y->l,X->k-X->l,alpha,d_A,X->ld,d_B,ldq,beta,d_C,Y->ld));
 37:     PetscCall(BV_MatDenseCUDARestoreArrayRead(Y,Q,&d_q));
 38:   } else PetscCall(BVAXPY_BLAS_CUDA(Y,Y->n,Y->k-Y->l,alpha,d_A,X->ld,beta,d_C,Y->ld));
 39:   PetscCall(VecCUDARestoreArrayRead(x->v,&d_px));
 40:   if (beta==(PetscScalar)0.0) PetscCall(VecCUDARestoreArrayWrite(y->v,&d_py));
 41:   else PetscCall(VecCUDARestoreArray(y->v,&d_py));
 42:   PetscFunctionReturn(PETSC_SUCCESS);
 43: }

 45: PetscErrorCode BVMultVec_Svec_CUDA(BV X,PetscScalar alpha,PetscScalar beta,Vec y,PetscScalar *q)
 46: {
 47:   BV_SVEC           *x = (BV_SVEC*)X->data;
 48:   PetscScalar       *d_py,*d_q;
 49:   const PetscScalar *d_px;

 51:   PetscFunctionBegin;
 52:   PetscCall(VecCUDAGetArrayRead(x->v,&d_px));
 53:   if (beta==(PetscScalar)0.0) PetscCall(VecCUDAGetArrayWrite(y,&d_py));
 54:   else PetscCall(VecCUDAGetArray(y,&d_py));
 55:   if (!q) PetscCall(VecCUDAGetArray(X->buffer,&d_q));
 56:   else {
 57:     PetscInt k=X->k-X->l;
 58:     PetscCallCUDA(cudaMalloc((void**)&d_q,k*sizeof(PetscScalar)));
 59:     PetscCallCUDA(cudaMemcpy(d_q,q,k*sizeof(PetscScalar),cudaMemcpyHostToDevice));
 60:     PetscCall(PetscLogCpuToGpu(k*sizeof(PetscScalar)));
 61:   }
 62:   PetscCall(BVMultVec_BLAS_CUDA(X,X->n,X->k-X->l,alpha,d_px+(X->nc+X->l)*X->ld,X->ld,d_q,beta,d_py));
 63:   PetscCall(VecCUDARestoreArrayRead(x->v,&d_px));
 64:   if (beta==(PetscScalar)0.0) PetscCall(VecCUDARestoreArrayWrite(y,&d_py));
 65:   else PetscCall(VecCUDARestoreArray(y,&d_py));
 66:   if (!q) PetscCall(VecCUDARestoreArray(X->buffer,&d_q));
 67:   else PetscCallCUDA(cudaFree(d_q));
 68:   PetscFunctionReturn(PETSC_SUCCESS);
 69: }

 71: PetscErrorCode BVMultInPlace_Svec_CUDA(BV V,Mat Q,PetscInt s,PetscInt e)
 72: {
 73:   BV_SVEC           *ctx = (BV_SVEC*)V->data;
 74:   PetscScalar       *d_pv;
 75:   const PetscScalar *d_q;
 76:   PetscInt          ldq;

 78:   PetscFunctionBegin;
 79:   if (s>=e || !V->n) PetscFunctionReturn(PETSC_SUCCESS);
 80:   PetscCall(MatDenseGetLDA(Q,&ldq));
 81:   PetscCall(VecCUDAGetArray(ctx->v,&d_pv));
 82:   PetscCall(BV_MatDenseCUDAGetArrayRead(V,Q,&d_q));
 83:   PetscCall(BVMultInPlace_BLAS_CUDA(V,V->n,V->k-V->l,s-V->l,e-V->l,d_pv+(V->nc+V->l)*V->ld,V->ld,d_q+V->l*ldq+V->l,ldq,PETSC_FALSE));
 84:   PetscCall(BV_MatDenseCUDARestoreArrayRead(V,Q,&d_q));
 85:   PetscCall(VecCUDARestoreArray(ctx->v,&d_pv));
 86:   PetscFunctionReturn(PETSC_SUCCESS);
 87: }

 89: PetscErrorCode BVMultInPlaceHermitianTranspose_Svec_CUDA(BV V,Mat Q,PetscInt s,PetscInt e)
 90: {
 91:   BV_SVEC           *ctx = (BV_SVEC*)V->data;
 92:   PetscScalar       *d_pv;
 93:   const PetscScalar *d_q;
 94:   PetscInt          ldq;

 96:   PetscFunctionBegin;
 97:   if (s>=e || !V->n) PetscFunctionReturn(PETSC_SUCCESS);
 98:   PetscCall(MatDenseGetLDA(Q,&ldq));
 99:   PetscCall(VecCUDAGetArray(ctx->v,&d_pv));
100:   PetscCall(BV_MatDenseCUDAGetArrayRead(V,Q,&d_q));
101:   PetscCall(BVMultInPlace_BLAS_CUDA(V,V->n,V->k-V->l,s-V->l,e-V->l,d_pv+(V->nc+V->l)*V->ld,V->ld,d_q+V->l*ldq+V->l,ldq,PETSC_TRUE));
102:   PetscCall(BV_MatDenseCUDARestoreArrayRead(V,Q,&d_q));
103:   PetscCall(VecCUDARestoreArray(ctx->v,&d_pv));
104:   PetscFunctionReturn(PETSC_SUCCESS);
105: }

107: PetscErrorCode BVDot_Svec_CUDA(BV X,BV Y,Mat M)
108: {
109:   BV_SVEC           *x = (BV_SVEC*)X->data,*y = (BV_SVEC*)Y->data;
110:   const PetscScalar *d_px,*d_py;
111:   PetscScalar       *pm;
112:   PetscInt          ldm;

114:   PetscFunctionBegin;
115:   PetscCall(MatDenseGetLDA(M,&ldm));
116:   PetscCall(VecCUDAGetArrayRead(x->v,&d_px));
117:   PetscCall(VecCUDAGetArrayRead(y->v,&d_py));
118:   PetscCall(MatDenseGetArrayWrite(M,&pm));
119:   PetscCall(BVDot_BLAS_CUDA(X,Y->k-Y->l,X->k-X->l,X->n,d_py+(Y->nc+Y->l)*Y->ld,Y->ld,d_px+(X->nc+X->l)*X->ld,X->ld,pm+X->l*ldm+Y->l,ldm,x->mpi));
120:   PetscCall(MatDenseRestoreArrayWrite(M,&pm));
121:   PetscCall(VecCUDARestoreArrayRead(x->v,&d_px));
122:   PetscCall(VecCUDARestoreArrayRead(y->v,&d_py));
123:   PetscFunctionReturn(PETSC_SUCCESS);
124: }

126: PetscErrorCode BVDotVec_Svec_CUDA(BV X,Vec y,PetscScalar *q)
127: {
128:   BV_SVEC           *x = (BV_SVEC*)X->data;
129:   const PetscScalar *d_px,*d_py;
130:   Vec               z = y;

132:   PetscFunctionBegin;
133:   if (PetscUnlikely(X->matrix)) {
134:     PetscCall(BV_IPMatMult(X,y));
135:     z = X->Bx;
136:   }
137:   PetscCall(VecCUDAGetArrayRead(x->v,&d_px));
138:   PetscCall(VecCUDAGetArrayRead(z,&d_py));
139:   PetscCall(BVDotVec_BLAS_CUDA(X,X->n,X->k-X->l,d_px+(X->nc+X->l)*X->ld,X->ld,d_py,q,x->mpi));
140:   PetscCall(VecCUDARestoreArrayRead(z,&d_py));
141:   PetscCall(VecCUDARestoreArrayRead(x->v,&d_px));
142:   PetscFunctionReturn(PETSC_SUCCESS);
143: }

145: PetscErrorCode BVDotVec_Local_Svec_CUDA(BV X,Vec y,PetscScalar *m)
146: {
147:   BV_SVEC           *x = (BV_SVEC*)X->data;
148:   const PetscScalar *d_px,*d_py;
149:   Vec               z = y;

151:   PetscFunctionBegin;
152:   if (PetscUnlikely(X->matrix)) {
153:     PetscCall(BV_IPMatMult(X,y));
154:     z = X->Bx;
155:   }
156:   PetscCall(VecCUDAGetArrayRead(x->v,&d_px));
157:   PetscCall(VecCUDAGetArrayRead(z,&d_py));
158:   PetscCall(BVDotVec_BLAS_CUDA(X,X->n,X->k-X->l,d_px+(X->nc+X->l)*X->ld,X->ld,d_py,m,PETSC_FALSE));
159:   PetscCall(VecCUDARestoreArrayRead(z,&d_py));
160:   PetscCall(VecCUDARestoreArrayRead(x->v,&d_px));
161:   PetscFunctionReturn(PETSC_SUCCESS);
162: }

164: PetscErrorCode BVScale_Svec_CUDA(BV bv,PetscInt j,PetscScalar alpha)
165: {
166:   BV_SVEC        *ctx = (BV_SVEC*)bv->data;
167:   PetscScalar    *d_array,*d_A;
168:   PetscInt       n=0;

170:   PetscFunctionBegin;
171:   if (!bv->n) PetscFunctionReturn(PETSC_SUCCESS);
172:   PetscCall(VecCUDAGetArray(ctx->v,&d_array));
173:   if (PetscUnlikely(j<0)) {
174:     d_A = d_array+(bv->nc+bv->l)*bv->ld;
175:     n = (bv->k-bv->l)*bv->ld;
176:   } else {
177:     d_A = d_array+(bv->nc+j)*bv->ld;
178:     n = bv->n;
179:   }
180:   PetscCall(BVScale_BLAS_CUDA(bv,n,d_A,alpha));
181:   PetscCall(VecCUDARestoreArray(ctx->v,&d_array));
182:   PetscFunctionReturn(PETSC_SUCCESS);
183: }

185: PetscErrorCode BVNorm_Svec_CUDA(BV bv,PetscInt j,NormType type,PetscReal *val)
186: {
187:   BV_SVEC           *ctx = (BV_SVEC*)bv->data;
188:   const PetscScalar *array,*d_array,*d_A;
189:   PetscInt          n=0;

191:   PetscFunctionBegin;
192:   if (!ctx->mpi && ((j<0 && type==NORM_FROBENIUS && bv->ld==bv->n) || (j>=0 && type==NORM_2))) {
193:     /* compute on GPU with cuBLAS - TODO: include the MPI case here */
194:     *val = 0.0;
195:     if (!bv->n) PetscFunctionReturn(PETSC_SUCCESS);
196:     PetscCall(VecCUDAGetArrayRead(ctx->v,&d_array));
197:     if (PetscUnlikely(j<0)) {
198:       d_A = d_array+(bv->nc+bv->l)*bv->ld;
199:       n = (bv->k-bv->l)*bv->ld;
200:     } else {
201:       d_A = d_array+(bv->nc+j)*bv->ld;
202:       n = bv->n;
203:     }
204:     PetscCall(BVNorm_BLAS_CUDA(bv,n,d_A,val));
205:     PetscCall(VecCUDARestoreArrayRead(ctx->v,&d_array));
206:   } else {
207:     /* compute on CPU */
208:     PetscCall(VecGetArrayRead(ctx->v,&array));
209:     if (PetscUnlikely(j<0)) PetscCall(BVNorm_LAPACK_Private(bv,bv->n,bv->k-bv->l,array+(bv->nc+bv->l)*bv->ld,bv->ld,type,val,ctx->mpi));
210:     else PetscCall(BVNorm_LAPACK_Private(bv,bv->n,1,array+(bv->nc+j)*bv->ld,bv->ld,type,val,ctx->mpi));
211:     PetscCall(VecRestoreArrayRead(ctx->v,&array));
212:   }
213:   PetscFunctionReturn(PETSC_SUCCESS);
214: }

216: PetscErrorCode BVNorm_Local_Svec_CUDA(BV bv,PetscInt j,NormType type,PetscReal *val)
217: {
218:   BV_SVEC           *ctx = (BV_SVEC*)bv->data;
219:   const PetscScalar *array,*d_array,*d_A;
220:   PetscInt          n=0;

222:   PetscFunctionBegin;
223:   if ((j<0 && type==NORM_FROBENIUS && bv->ld==bv->n) || (j>=0 && type==NORM_2)) {
224:     /* compute on GPU with cuBLAS */
225:     *val = 0.0;
226:     if (!bv->n) PetscFunctionReturn(PETSC_SUCCESS);
227:     PetscCall(VecCUDAGetArrayRead(ctx->v,&d_array));
228:     if (PetscUnlikely(j<0)) {
229:       d_A = d_array+(bv->nc+bv->l)*bv->ld;
230:       n = (bv->k-bv->l)*bv->ld;
231:     } else {
232:       d_A = d_array+(bv->nc+j)*bv->ld;
233:       n = bv->n;
234:     }
235:     PetscCall(BVNorm_BLAS_CUDA(bv,n,d_A,val));
236:     PetscCall(VecCUDARestoreArrayRead(ctx->v,&d_array));
237:   } else {
238:     /* compute on CPU */
239:     PetscCall(VecGetArrayRead(ctx->v,&array));
240:     if (PetscUnlikely(j<0)) PetscCall(BVNorm_LAPACK_Private(bv,bv->n,bv->k-bv->l,array+(bv->nc+bv->l)*bv->ld,bv->ld,type,val,PETSC_FALSE));
241:     else PetscCall(BVNorm_LAPACK_Private(bv,bv->n,1,array+(bv->nc+j)*bv->ld,bv->ld,type,val,PETSC_FALSE));
242:     PetscCall(VecRestoreArrayRead(ctx->v,&array));
243:   }
244:   PetscFunctionReturn(PETSC_SUCCESS);
245: }

247: PetscErrorCode BVNormalize_Svec_CUDA(BV bv,PetscScalar *eigi)
248: {
249:   BV_SVEC        *ctx = (BV_SVEC*)bv->data;
250:   PetscScalar    *array,*d_array,*wi=NULL;

252:   PetscFunctionBegin;
253:   if (eigi) wi = eigi+bv->l;
254:   if (!ctx->mpi) {
255:     /* compute on GPU with cuBLAS - TODO: include the MPI case here */
256:     if (!bv->n) PetscFunctionReturn(PETSC_SUCCESS);
257:     PetscCall(VecCUDAGetArray(ctx->v,&d_array));
258:     PetscCall(BVNormalize_BLAS_CUDA(bv,bv->n,bv->k-bv->l,d_array+(bv->nc+bv->l)*bv->ld,bv->ld,wi));
259:     PetscCall(VecCUDARestoreArray(ctx->v,&d_array));
260:   } else {
261:     /* compute on CPU */
262:     PetscCall(VecGetArray(ctx->v,&array));
263:     PetscCall(BVNormalize_LAPACK_Private(bv,bv->n,bv->k-bv->l,array+(bv->nc+bv->l)*bv->ld,bv->ld,wi,ctx->mpi));
264:     PetscCall(VecRestoreArray(ctx->v,&array));
265:   }
266:   PetscFunctionReturn(PETSC_SUCCESS);
267: }

269: PetscErrorCode BVMatMult_Svec_CUDA(BV V,Mat A,BV W)
270: {
271:   BV_SVEC           *v = (BV_SVEC*)V->data,*w = (BV_SVEC*)W->data;
272:   Mat               Vmat,Wmat;
273:   const PetscScalar *d_pv;
274:   PetscScalar       *d_pw;
275:   PetscInt          j;

277:   PetscFunctionBegin;
278:   if (V->vmm) {
279:     PetscCall(BVGetMat(V,&Vmat));
280:     PetscCall(BVGetMat(W,&Wmat));
281:     PetscCall(MatProductCreateWithMat(A,Vmat,NULL,Wmat));
282:     PetscCall(MatProductSetType(Wmat,MATPRODUCT_AB));
283:     PetscCall(MatProductSetFromOptions(Wmat));
284:     PetscCall(MatProductSymbolic(Wmat));
285:     PetscCall(MatProductNumeric(Wmat));
286:     PetscCall(MatProductClear(Wmat));
287:     PetscCall(BVRestoreMat(V,&Vmat));
288:     PetscCall(BVRestoreMat(W,&Wmat));
289:   } else {
290:     PetscCall(VecCUDAGetArrayRead(v->v,&d_pv));
291:     PetscCall(VecCUDAGetArrayWrite(w->v,&d_pw));
292:     for (j=0;j<V->k-V->l;j++) {
293:       PetscCall(VecCUDAPlaceArray(V->cv[1],(PetscScalar *)d_pv+(V->nc+V->l+j)*V->ld));
294:       PetscCall(VecCUDAPlaceArray(W->cv[1],d_pw+(W->nc+W->l+j)*W->ld));
295:       PetscCall(MatMult(A,V->cv[1],W->cv[1]));
296:       PetscCall(VecCUDAResetArray(V->cv[1]));
297:       PetscCall(VecCUDAResetArray(W->cv[1]));
298:     }
299:     PetscCall(VecCUDARestoreArrayRead(v->v,&d_pv));
300:     PetscCall(VecCUDARestoreArrayWrite(w->v,&d_pw));
301:   }
302:   PetscFunctionReturn(PETSC_SUCCESS);
303: }

305: PetscErrorCode BVCopy_Svec_CUDA(BV V,BV W)
306: {
307:   BV_SVEC           *v = (BV_SVEC*)V->data,*w = (BV_SVEC*)W->data;
308:   const PetscScalar *d_pv;
309:   PetscScalar       *d_pw;

311:   PetscFunctionBegin;
312:   PetscCall(VecCUDAGetArrayRead(v->v,&d_pv));
313:   PetscCall(VecCUDAGetArray(w->v,&d_pw));
314:   PetscCallCUDA(cudaMemcpy2D(d_pw+(W->nc+W->l)*W->ld,W->ld*sizeof(PetscScalar),d_pv+(V->nc+V->l)*V->ld,V->ld*sizeof(PetscScalar),V->n*sizeof(PetscScalar),V->k-V->l,cudaMemcpyDeviceToDevice));
315:   PetscCall(VecCUDARestoreArrayRead(v->v,&d_pv));
316:   PetscCall(VecCUDARestoreArray(w->v,&d_pw));
317:   PetscFunctionReturn(PETSC_SUCCESS);
318: }

320: PetscErrorCode BVCopyColumn_Svec_CUDA(BV V,PetscInt j,PetscInt i)
321: {
322:   BV_SVEC        *v = (BV_SVEC*)V->data;
323:   PetscScalar    *d_pv;

325:   PetscFunctionBegin;
326:   PetscCall(VecCUDAGetArray(v->v,&d_pv));
327:   PetscCallCUDA(cudaMemcpy(d_pv+(V->nc+i)*V->ld,d_pv+(V->nc+j)*V->ld,V->n*sizeof(PetscScalar),cudaMemcpyDeviceToDevice));
328:   PetscCall(VecCUDARestoreArray(v->v,&d_pv));
329:   PetscFunctionReturn(PETSC_SUCCESS);
330: }

332: PetscErrorCode BVResize_Svec_CUDA(BV bv,PetscInt m,PetscBool copy)
333: {
334:   BV_SVEC           *ctx = (BV_SVEC*)bv->data;
335:   const PetscScalar *d_pv;
336:   PetscScalar       *d_pnew;
337:   PetscInt          bs;
338:   Vec               vnew;
339:   char              str[50];

341:   PetscFunctionBegin;
342:   PetscCall(PetscLayoutGetBlockSize(bv->map,&bs));
343:   PetscCall(VecCreate(PetscObjectComm((PetscObject)bv),&vnew));
344:   PetscCall(VecSetType(vnew,bv->vtype));
345:   PetscCall(VecSetSizes(vnew,m*bv->ld,PETSC_DECIDE));
346:   PetscCall(VecSetBlockSize(vnew,bs));
347:   if (((PetscObject)bv)->name) {
348:     PetscCall(PetscSNPrintf(str,sizeof(str),"%s_0",((PetscObject)bv)->name));
349:     PetscCall(PetscObjectSetName((PetscObject)vnew,str));
350:   }
351:   if (copy) {
352:     PetscCall(VecCUDAGetArrayRead(ctx->v,&d_pv));
353:     PetscCall(VecCUDAGetArrayWrite(vnew,&d_pnew));
354:     PetscCallCUDA(cudaMemcpy(d_pnew,d_pv,PetscMin(m,bv->m)*bv->ld*sizeof(PetscScalar),cudaMemcpyDeviceToDevice));
355:     PetscCall(VecCUDARestoreArrayRead(ctx->v,&d_pv));
356:     PetscCall(VecCUDARestoreArrayWrite(vnew,&d_pnew));
357:   }
358:   PetscCall(VecDestroy(&ctx->v));
359:   ctx->v = vnew;
360:   PetscFunctionReturn(PETSC_SUCCESS);
361: }

363: PetscErrorCode BVGetColumn_Svec_CUDA(BV bv,PetscInt j,Vec*)
364: {
365:   BV_SVEC        *ctx = (BV_SVEC*)bv->data;
366:   PetscScalar    *d_pv;
367:   PetscInt       l;

369:   PetscFunctionBegin;
370:   l = BVAvailableVec;
371:   PetscCall(VecCUDAGetArray(ctx->v,&d_pv));
372:   PetscCall(VecCUDAPlaceArray(bv->cv[l],d_pv+(bv->nc+j)*bv->ld));
373:   PetscFunctionReturn(PETSC_SUCCESS);
374: }

376: PetscErrorCode BVRestoreColumn_Svec_CUDA(BV bv,PetscInt j,Vec*)
377: {
378:   BV_SVEC        *ctx = (BV_SVEC*)bv->data;
379:   PetscInt       l;

381:   PetscFunctionBegin;
382:   l = (j==bv->ci[0])? 0: 1;
383:   PetscCall(VecCUDAResetArray(bv->cv[l]));
384:   PetscCall(VecCUDARestoreArray(ctx->v,NULL));
385:   PetscFunctionReturn(PETSC_SUCCESS);
386: }

388: PetscErrorCode BVRestoreSplit_Svec_CUDA(BV bv,BV *L,BV *R)
389: {
390:   Vec               v;
391:   const PetscScalar *d_pv;
392:   PetscObjectState  lstate,rstate;
393:   PetscBool         change=PETSC_FALSE;

395:   PetscFunctionBegin;
396:   /* force sync flag to PETSC_CUDA_BOTH */
397:   if (L) {
398:     PetscCall(PetscObjectStateGet((PetscObject)*L,&lstate));
399:     if (lstate != bv->lstate) {
400:       v = ((BV_SVEC*)bv->L->data)->v;
401:       PetscCall(VecCUDAGetArrayRead(v,&d_pv));
402:       PetscCall(VecCUDARestoreArrayRead(v,&d_pv));
403:       change = PETSC_TRUE;
404:     }
405:   }
406:   if (R) {
407:     PetscCall(PetscObjectStateGet((PetscObject)*R,&rstate));
408:     if (rstate != bv->rstate) {
409:       v = ((BV_SVEC*)bv->R->data)->v;
410:       PetscCall(VecCUDAGetArrayRead(v,&d_pv));
411:       PetscCall(VecCUDARestoreArrayRead(v,&d_pv));
412:       change = PETSC_TRUE;
413:     }
414:   }
415:   if (change) {
416:     v = ((BV_SVEC*)bv->data)->v;
417:     PetscCall(VecCUDAGetArray(v,(PetscScalar **)&d_pv));
418:     PetscCall(VecCUDARestoreArray(v,(PetscScalar **)&d_pv));
419:   }
420:   PetscFunctionReturn(PETSC_SUCCESS);
421: }

423: PetscErrorCode BVGetMat_Svec_CUDA(BV bv,Mat *A)
424: {
425:   BV_SVEC        *ctx = (BV_SVEC*)bv->data;
426:   PetscScalar    *vv,*aa;
427:   PetscBool      create=PETSC_FALSE;
428:   PetscInt       m,cols;

430:   PetscFunctionBegin;
431:   m = bv->k-bv->l;
432:   if (!bv->Aget) create=PETSC_TRUE;
433:   else {
434:     PetscCall(MatDenseCUDAGetArray(bv->Aget,&aa));
435:     PetscCheck(!aa,PetscObjectComm((PetscObject)bv),PETSC_ERR_ARG_WRONGSTATE,"BVGetMat already called on this BV");
436:     PetscCall(MatGetSize(bv->Aget,NULL,&cols));
437:     if (cols!=m) {
438:       PetscCall(MatDestroy(&bv->Aget));
439:       create=PETSC_TRUE;
440:     }
441:   }
442:   PetscCall(VecCUDAGetArray(ctx->v,&vv));
443:   if (create) {
444:     PetscCall(MatCreateDenseFromVecType(PetscObjectComm((PetscObject)bv),bv->vtype,bv->n,PETSC_DECIDE,bv->N,m,bv->ld,vv,&bv->Aget)); /* pass a pointer to avoid allocation of storage */
445:     PetscCall(MatDenseCUDAReplaceArray(bv->Aget,NULL));  /* replace with a null pointer, the value after BVRestoreMat */
446:   }
447:   PetscCall(MatDenseCUDAPlaceArray(bv->Aget,vv+(bv->nc+bv->l)*bv->ld));  /* set the actual pointer */
448:   *A = bv->Aget;
449:   PetscFunctionReturn(PETSC_SUCCESS);
450: }

452: PetscErrorCode BVRestoreMat_Svec_CUDA(BV bv,Mat *A)
453: {
454:   BV_SVEC        *ctx = (BV_SVEC*)bv->data;
455:   PetscScalar    *vv,*aa;

457:   PetscFunctionBegin;
458:   PetscCall(MatDenseCUDAGetArray(bv->Aget,&aa));
459:   vv = aa-(bv->nc+bv->l)*bv->ld;
460:   PetscCall(MatDenseCUDAResetArray(bv->Aget));
461:   PetscCall(VecCUDARestoreArray(ctx->v,&vv));
462:   *A = NULL;
463:   PetscFunctionReturn(PETSC_SUCCESS);
464: }