Actual source code: bvimpl.h
slepc-main 2023-10-18
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #pragma once
13: #include <slepcbv.h>
14: #include <slepc/private/slepcimpl.h>
16: /* SUBMANSEC = BV */
18: SLEPC_EXTERN PetscBool BVRegisterAllCalled;
19: SLEPC_EXTERN PetscErrorCode BVRegisterAll(void);
21: SLEPC_EXTERN PetscLogEvent BV_Create,BV_Copy,BV_Mult,BV_MultVec,BV_MultInPlace,BV_Dot,BV_DotVec,BV_Orthogonalize,BV_OrthogonalizeVec,BV_Scale,BV_Norm,BV_NormVec,BV_Normalize,BV_SetRandom,BV_MatMult,BV_MatMultVec,BV_MatProject,BV_SVDAndRank;
23: typedef struct _BVOps *BVOps;
25: struct _BVOps {
26: PetscErrorCode (*mult)(BV,PetscScalar,PetscScalar,BV,Mat);
27: PetscErrorCode (*multvec)(BV,PetscScalar,PetscScalar,Vec,PetscScalar*);
28: PetscErrorCode (*multinplace)(BV,Mat,PetscInt,PetscInt);
29: PetscErrorCode (*multinplacetrans)(BV,Mat,PetscInt,PetscInt);
30: PetscErrorCode (*dot)(BV,BV,Mat);
31: PetscErrorCode (*dotvec)(BV,Vec,PetscScalar*);
32: PetscErrorCode (*dotvec_local)(BV,Vec,PetscScalar*);
33: PetscErrorCode (*dotvec_begin)(BV,Vec,PetscScalar*);
34: PetscErrorCode (*dotvec_end)(BV,Vec,PetscScalar*);
35: PetscErrorCode (*scale)(BV,PetscInt,PetscScalar);
36: PetscErrorCode (*norm)(BV,PetscInt,NormType,PetscReal*);
37: PetscErrorCode (*norm_local)(BV,PetscInt,NormType,PetscReal*);
38: PetscErrorCode (*norm_begin)(BV,PetscInt,NormType,PetscReal*);
39: PetscErrorCode (*norm_end)(BV,PetscInt,NormType,PetscReal*);
40: PetscErrorCode (*normalize)(BV,PetscScalar*);
41: PetscErrorCode (*matmult)(BV,Mat,BV);
42: PetscErrorCode (*copy)(BV,BV);
43: PetscErrorCode (*copycolumn)(BV,PetscInt,PetscInt);
44: PetscErrorCode (*resize)(BV,PetscInt,PetscBool);
45: PetscErrorCode (*getcolumn)(BV,PetscInt,Vec*);
46: PetscErrorCode (*restorecolumn)(BV,PetscInt,Vec*);
47: PetscErrorCode (*getarray)(BV,PetscScalar**);
48: PetscErrorCode (*restorearray)(BV,PetscScalar**);
49: PetscErrorCode (*getarrayread)(BV,const PetscScalar**);
50: PetscErrorCode (*restorearrayread)(BV,const PetscScalar**);
51: PetscErrorCode (*restoresplit)(BV,BV*,BV*);
52: PetscErrorCode (*gramschmidt)(BV,PetscInt,Vec,PetscBool*,PetscScalar*,PetscScalar*,PetscReal*,PetscReal*);
53: PetscErrorCode (*getmat)(BV,Mat*);
54: PetscErrorCode (*restoremat)(BV,Mat*);
55: PetscErrorCode (*duplicate)(BV,BV);
56: PetscErrorCode (*create)(BV);
57: PetscErrorCode (*setfromoptions)(BV,PetscOptionItems*);
58: PetscErrorCode (*view)(BV,PetscViewer);
59: PetscErrorCode (*destroy)(BV);
60: };
62: struct _p_BV {
63: PETSCHEADER(struct _BVOps);
64: /*------------------------- User parameters --------------------------*/
65: Vec t; /* template vector */
66: PetscInt n,N; /* dimensions of vectors (local, global) */
67: PetscInt m; /* number of vectors */
68: PetscInt l; /* number of leading columns */
69: PetscInt k; /* number of active columns */
70: PetscInt nc; /* number of constraints */
71: PetscInt ld; /* leading dimension */
72: BVOrthogType orthog_type; /* the method of vector orthogonalization */
73: BVOrthogRefineType orthog_ref; /* refinement method */
74: PetscReal orthog_eta; /* refinement threshold */
75: BVOrthogBlockType orthog_block; /* the method of block orthogonalization */
76: Mat matrix; /* inner product matrix */
77: PetscBool indef; /* matrix is indefinite */
78: BVMatMultType vmm; /* version of matmult operation */
79: PetscBool rrandom; /* reproducible random vectors */
80: PetscReal deftol; /* tolerance for BV_SafeSqrt */
82: /*---------------------- Cached data and workspace -------------------*/
83: Vec buffer; /* buffer vector used in orthogonalization */
84: Mat Abuffer; /* auxiliary seqdense matrix that wraps the buffer */
85: Vec Bx; /* result of matrix times a vector x */
86: PetscObjectId xid; /* object id of vector x */
87: PetscObjectState xstate; /* state of vector x */
88: Vec cv[2]; /* column vectors obtained with BVGetColumn() */
89: PetscInt ci[2]; /* column indices of obtained vectors */
90: PetscObjectState st[2]; /* state of obtained vectors */
91: PetscObjectId id[2]; /* object id of obtained vectors */
92: PetscScalar *h,*c; /* orthogonalization coefficients */
93: Vec omega; /* signature matrix values for indefinite case */
94: PetscBool defersfo; /* deferred call to setfromoptions */
95: BV cached; /* cached BV to store result of matrix times BV */
96: PetscObjectState bvstate; /* state of BV when BVApplyMatrixBV() was called */
97: BV L,R; /* BV objects obtained with BVGetSplit() */
98: PetscObjectState lstate,rstate;/* state of L and R when BVGetSplit() was called */
99: PetscInt lsplit; /* the value of l when BVGetSplit() was called */
100: PetscInt issplit; /* >0 if this BV has been created by splitting (1=left, 2=right) */
101: BV splitparent; /* my parent if I am a split BV */
102: PetscRandom rand; /* random number generator */
103: Mat Acreate; /* matrix given at BVCreateFromMat() */
104: Mat Aget; /* matrix returned for BVGetMat() */
105: PetscBool cuda; /* true if GPU must be used */
106: PetscBool sfocalled; /* setfromoptions has been called */
107: PetscScalar *work;
108: PetscInt lwork;
109: void *data;
110: };
112: /*
113: BV_SafeSqrt - Computes the square root of a scalar value alpha, which is
114: assumed to be z'*B*z. The result is
115: if definite inner product: res = sqrt(alpha)
116: if indefinite inner product: res = sgn(alpha)*sqrt(abs(alpha))
117: */
118: static inline PetscErrorCode BV_SafeSqrt(BV bv,PetscScalar alpha,PetscReal *res)
119: {
120: PetscReal absal,realp;
122: PetscFunctionBegin;
123: absal = PetscAbsScalar(alpha);
124: realp = PetscRealPart(alpha);
125: if (PetscUnlikely(absal<PETSC_MACHINE_EPSILON)) PetscCall(PetscInfo(bv,"Zero norm %g, either the vector is zero or a semi-inner product is being used\n",(double)absal));
126: #if defined(PETSC_USE_COMPLEX)
127: PetscCheck(PetscAbsReal(PetscImaginaryPart(alpha))<bv->deftol || PetscAbsReal(PetscImaginaryPart(alpha))/absal<10*bv->deftol,PetscObjectComm((PetscObject)bv),PETSC_ERR_USER_INPUT,"The inner product is not well defined: nonzero imaginary part %g",(double)PetscImaginaryPart(alpha));
128: #endif
129: if (PetscUnlikely(bv->indef)) {
130: *res = (realp<0.0)? -PetscSqrtReal(-realp): PetscSqrtReal(realp);
131: } else {
132: PetscCheck(realp>-bv->deftol,PetscObjectComm((PetscObject)bv),PETSC_ERR_USER_INPUT,"The inner product is not well defined: indefinite matrix %g",(double)realp);
133: *res = (realp<0.0)? 0.0: PetscSqrtReal(realp);
134: }
135: PetscFunctionReturn(PETSC_SUCCESS);
136: }
138: /*
139: BV_IPMatMult - Multiply a vector x by the inner-product matrix, cache the
140: result in Bx.
141: */
142: static inline PetscErrorCode BV_IPMatMult(BV bv,Vec x)
143: {
144: PetscFunctionBegin;
145: if (((PetscObject)x)->id != bv->xid || ((PetscObject)x)->state != bv->xstate) {
146: if (PetscUnlikely(!bv->Bx)) PetscCall(MatCreateVecs(bv->matrix,&bv->Bx,NULL));
147: PetscCall(MatMult(bv->matrix,x,bv->Bx));
148: PetscCall(PetscObjectGetId((PetscObject)x,&bv->xid));
149: PetscCall(PetscObjectStateGet((PetscObject)x,&bv->xstate));
150: }
151: PetscFunctionReturn(PETSC_SUCCESS);
152: }
154: /*
155: BV_IPMatMultBV - Multiply BV by the inner-product matrix, cache the
156: result internally in bv->cached.
157: */
158: static inline PetscErrorCode BV_IPMatMultBV(BV bv)
159: {
160: PetscFunctionBegin;
161: PetscCall(BVGetCachedBV(bv,&bv->cached));
162: if (((PetscObject)bv)->state != bv->bvstate || bv->l != bv->cached->l || bv->k != bv->cached->k) {
163: PetscCall(BVSetActiveColumns(bv->cached,bv->l,bv->k));
164: if (bv->matrix) PetscCall(BVMatMult(bv,bv->matrix,bv->cached));
165: else PetscCall(BVCopy(bv,bv->cached));
166: bv->bvstate = ((PetscObject)bv)->state;
167: }
168: PetscFunctionReturn(PETSC_SUCCESS);
169: }
171: /*
172: BV_AllocateCoeffs - Allocate orthogonalization coefficients if not done already.
173: */
174: static inline PetscErrorCode BV_AllocateCoeffs(BV bv)
175: {
176: PetscFunctionBegin;
177: if (!bv->h) PetscCall(PetscMalloc2(bv->nc+bv->m,&bv->h,bv->nc+bv->m,&bv->c));
178: PetscFunctionReturn(PETSC_SUCCESS);
179: }
181: /*
182: BV_AllocateSignature - Allocate signature coefficients if not done already.
183: */
184: static inline PetscErrorCode BV_AllocateSignature(BV bv)
185: {
186: PetscFunctionBegin;
187: if (bv->indef && !bv->omega) {
188: if (bv->cuda) {
189: #if defined(PETSC_HAVE_CUDA)
190: PetscCall(VecCreateSeqCUDA(PETSC_COMM_SELF,bv->nc+bv->m,&bv->omega));
191: #else
192: SETERRQ(PetscObjectComm((PetscObject)bv),PETSC_ERR_PLIB,"Something wrong happened");
193: #endif
194: } else PetscCall(VecCreateSeq(PETSC_COMM_SELF,bv->nc+bv->m,&bv->omega));
195: PetscCall(VecSet(bv->omega,1.0));
196: }
197: PetscFunctionReturn(PETSC_SUCCESS);
198: }
200: /*
201: BV_SetMatrixDiagonal - sets the inner product matrix for BV as a diagonal matrix
202: with the diagonal specified by vector vomega, using the same matrix type as matrix M
203: */
204: static inline PetscErrorCode BV_SetMatrixDiagonal(BV bv,Vec vomega,Mat M)
205: {
206: Mat Omega;
207: MatType Mtype;
209: PetscFunctionBegin;
210: PetscCall(MatGetType(M,&Mtype));
211: PetscCall(MatCreate(PetscObjectComm((PetscObject)bv),&Omega));
212: PetscCall(MatSetSizes(Omega,bv->n,bv->n,bv->N,bv->N));
213: PetscCall(MatSetType(Omega,Mtype));
214: PetscCall(MatSetUp(Omega));
215: PetscCall(MatDiagonalSet(Omega,vomega,INSERT_VALUES));
216: PetscCall(BVSetMatrix(bv,Omega,PETSC_TRUE));
217: PetscCall(MatDestroy(&Omega));
218: PetscFunctionReturn(PETSC_SUCCESS);
219: }
221: /*
222: BVAvailableVec: First (0) or second (1) vector available for
223: getcolumn operation (or -1 if both vectors already fetched).
224: */
225: #define BVAvailableVec (((bv->ci[0]==-bv->nc-1)? 0: (bv->ci[1]==-bv->nc-1)? 1: -1))
227: /*
228: Macros to test valid BV arguments
229: */
230: #if !defined(PETSC_USE_DEBUG)
232: #define BVCheckSizes(h,arg) do {(void)(h);} while (0)
233: #define BVCheckOp(h,arg,op) do {(void)(h);} while (0)
235: #else
237: #define BVCheckSizes(h,arg) \
238: do { \
239: PetscCheck((h)->m,PetscObjectComm((PetscObject)(h)),PETSC_ERR_ARG_WRONGSTATE,"BV sizes have not been defined: Parameter #%d",arg); \
240: } while (0)
242: #define BVCheckOp(h,arg,op) \
243: do { \
244: PetscCheck((h)->ops->op,PetscObjectComm((PetscObject)(h)),PETSC_ERR_SUP,"Operation not implemented in this BV type: Parameter #%d",arg); \
245: } while (0)
247: #endif
249: SLEPC_INTERN PetscErrorCode BVView_Vecs(BV,PetscViewer);
251: SLEPC_INTERN PetscErrorCode BVAllocateWork_Private(BV,PetscInt);
253: SLEPC_INTERN PetscErrorCode BVMult_BLAS_Private(BV,PetscInt,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscInt,const PetscScalar*,PetscInt,PetscScalar,PetscScalar*,PetscInt);
254: SLEPC_INTERN PetscErrorCode BVMultVec_BLAS_Private(BV,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscInt,const PetscScalar*,PetscScalar,PetscScalar*);
255: SLEPC_INTERN PetscErrorCode BVMultInPlace_BLAS_Private(BV,PetscInt,PetscInt,PetscInt,PetscInt,PetscScalar*,PetscInt,const PetscScalar*,PetscInt,PetscBool);
256: SLEPC_INTERN PetscErrorCode BVMultInPlace_Vecs_Private(BV,PetscInt,PetscInt,PetscInt,Vec*,const PetscScalar*,PetscBool);
257: SLEPC_INTERN PetscErrorCode BVAXPY_BLAS_Private(BV,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscInt,PetscScalar,PetscScalar*,PetscInt);
258: SLEPC_INTERN PetscErrorCode BVDot_BLAS_Private(BV,PetscInt,PetscInt,PetscInt,const PetscScalar*,PetscInt,const PetscScalar*,PetscInt,PetscScalar*,PetscInt,PetscBool);
259: SLEPC_INTERN PetscErrorCode BVDotVec_BLAS_Private(BV,PetscInt,PetscInt,const PetscScalar*,PetscInt,const PetscScalar*,PetscScalar*,PetscBool);
260: SLEPC_INTERN PetscErrorCode BVScale_BLAS_Private(BV,PetscInt,PetscScalar*,PetscScalar);
261: SLEPC_INTERN PetscErrorCode BVNorm_LAPACK_Private(BV,PetscInt,PetscInt,const PetscScalar*,PetscInt,NormType,PetscReal*,PetscBool);
262: SLEPC_INTERN PetscErrorCode BVNormalize_LAPACK_Private(BV,PetscInt,PetscInt,const PetscScalar*,PetscInt,PetscScalar*,PetscBool);
263: SLEPC_INTERN PetscErrorCode BVGetMat_Default(BV,Mat*);
264: SLEPC_INTERN PetscErrorCode BVRestoreMat_Default(BV,Mat*);
265: SLEPC_INTERN PetscErrorCode BVMatCholInv_LAPACK_Private(BV,Mat,Mat);
266: SLEPC_INTERN PetscErrorCode BVMatTriInv_LAPACK_Private(BV,Mat,Mat);
267: SLEPC_INTERN PetscErrorCode BVMatSVQB_LAPACK_Private(BV,Mat,Mat);
268: SLEPC_INTERN PetscErrorCode BVOrthogonalize_LAPACK_TSQR(BV,PetscInt,PetscInt,PetscScalar*,PetscInt,PetscScalar*,PetscInt);
269: SLEPC_INTERN PetscErrorCode BVOrthogonalize_LAPACK_TSQR_OnlyR(BV,PetscInt,PetscInt,PetscScalar*,PetscInt,PetscScalar*,PetscInt);
271: /* reduction operations used in BVOrthogonalize and BVNormalize */
272: SLEPC_EXTERN MPI_Op MPIU_TSQR, MPIU_LAPY2;
273: SLEPC_EXTERN void MPIAPI SlepcGivensPacked(void*,void*,PetscMPIInt*,MPI_Datatype*);
274: SLEPC_EXTERN void MPIAPI SlepcPythag(void*,void*,PetscMPIInt*,MPI_Datatype*);
276: /*
277: BV_CleanCoefficients_Default - Sets to zero all entries of column j of the bv buffer
278: */
279: static inline PetscErrorCode BV_CleanCoefficients_Default(BV bv,PetscInt j,PetscScalar *h)
280: {
281: PetscScalar *hh=h,*a;
282: PetscInt i;
284: PetscFunctionBegin;
285: if (!h) {
286: PetscCall(VecGetArray(bv->buffer,&a));
287: hh = a + j*(bv->nc+bv->m);
288: }
289: for (i=0;i<bv->nc+j;i++) hh[i] = 0.0;
290: if (!h) PetscCall(VecRestoreArray(bv->buffer,&a));
291: PetscFunctionReturn(PETSC_SUCCESS);
292: }
294: /*
295: BV_AddCoefficients_Default - Add the contents of the scratch (0-th column) of the bv buffer
296: into column j of the bv buffer
297: */
298: static inline PetscErrorCode BV_AddCoefficients_Default(BV bv,PetscInt j,PetscScalar *h,PetscScalar *c)
299: {
300: PetscScalar *hh=h,*cc=c;
301: PetscInt i;
303: PetscFunctionBegin;
304: if (!h) {
305: PetscCall(VecGetArray(bv->buffer,&cc));
306: hh = cc + j*(bv->nc+bv->m);
307: }
308: for (i=0;i<bv->nc+j;i++) hh[i] += cc[i];
309: if (!h) PetscCall(VecRestoreArray(bv->buffer,&cc));
310: PetscCall(PetscLogFlops(1.0*(bv->nc+j)));
311: PetscFunctionReturn(PETSC_SUCCESS);
312: }
314: /*
315: BV_SetValue_Default - Sets value in row j (counted after the constraints) of column k
316: of the coefficients array
317: */
318: static inline PetscErrorCode BV_SetValue_Default(BV bv,PetscInt j,PetscInt k,PetscScalar *h,PetscScalar value)
319: {
320: PetscScalar *hh=h,*a;
322: PetscFunctionBegin;
323: if (!h) {
324: PetscCall(VecGetArray(bv->buffer,&a));
325: hh = a + k*(bv->nc+bv->m);
326: }
327: hh[bv->nc+j] = value;
328: if (!h) PetscCall(VecRestoreArray(bv->buffer,&a));
329: PetscFunctionReturn(PETSC_SUCCESS);
330: }
332: /*
333: BV_SquareSum_Default - Returns the value h'*h, where h represents the contents of the
334: coefficients array (up to position j)
335: */
336: static inline PetscErrorCode BV_SquareSum_Default(BV bv,PetscInt j,PetscScalar *h,PetscReal *sum)
337: {
338: PetscScalar *hh=h;
339: PetscInt i;
341: PetscFunctionBegin;
342: *sum = 0.0;
343: if (!h) PetscCall(VecGetArray(bv->buffer,&hh));
344: for (i=0;i<bv->nc+j;i++) *sum += PetscRealPart(hh[i]*PetscConj(hh[i]));
345: if (!h) PetscCall(VecRestoreArray(bv->buffer,&hh));
346: PetscCall(PetscLogFlops(2.0*(bv->nc+j)));
347: PetscFunctionReturn(PETSC_SUCCESS);
348: }
350: /*
351: BV_ApplySignature_Default - Computes the pointwise product h*omega, where h represents
352: the contents of the coefficients array (up to position j) and omega is the signature;
353: if inverse=TRUE then the operation is h/omega
354: */
355: static inline PetscErrorCode BV_ApplySignature_Default(BV bv,PetscInt j,PetscScalar *h,PetscBool inverse)
356: {
357: PetscScalar *hh=h;
358: PetscInt i;
359: const PetscScalar *omega;
361: PetscFunctionBegin;
362: if (PetscUnlikely(!(bv->nc+j))) PetscFunctionReturn(PETSC_SUCCESS);
363: if (!h) PetscCall(VecGetArray(bv->buffer,&hh));
364: PetscCall(VecGetArrayRead(bv->omega,&omega));
365: if (inverse) for (i=0;i<bv->nc+j;i++) hh[i] /= PetscRealPart(omega[i]);
366: else for (i=0;i<bv->nc+j;i++) hh[i] *= PetscRealPart(omega[i]);
367: PetscCall(VecRestoreArrayRead(bv->omega,&omega));
368: if (!h) PetscCall(VecRestoreArray(bv->buffer,&hh));
369: PetscCall(PetscLogFlops(1.0*(bv->nc+j)));
370: PetscFunctionReturn(PETSC_SUCCESS);
371: }
373: /*
374: BV_SquareRoot_Default - Returns the square root of position j (counted after the constraints)
375: of the coefficients array
376: */
377: static inline PetscErrorCode BV_SquareRoot_Default(BV bv,PetscInt j,PetscScalar *h,PetscReal *beta)
378: {
379: PetscScalar *hh=h;
381: PetscFunctionBegin;
382: if (!h) PetscCall(VecGetArray(bv->buffer,&hh));
383: PetscCall(BV_SafeSqrt(bv,hh[bv->nc+j],beta));
384: if (!h) PetscCall(VecRestoreArray(bv->buffer,&hh));
385: PetscFunctionReturn(PETSC_SUCCESS);
386: }
388: /*
389: BV_StoreCoefficients_Default - Copy the contents of the coefficients array to an array dest
390: provided by the caller (only values from l to j are copied)
391: */
392: static inline PetscErrorCode BV_StoreCoefficients_Default(BV bv,PetscInt j,PetscScalar *h,PetscScalar *dest)
393: {
394: PetscScalar *hh=h,*a;
395: PetscInt i;
397: PetscFunctionBegin;
398: if (!h) {
399: PetscCall(VecGetArray(bv->buffer,&a));
400: hh = a + j*(bv->nc+bv->m);
401: }
402: for (i=bv->l;i<j;i++) dest[i-bv->l] = hh[bv->nc+i];
403: if (!h) PetscCall(VecRestoreArray(bv->buffer,&a));
404: PetscFunctionReturn(PETSC_SUCCESS);
405: }
407: /*
408: BV_GetEigenvector - retrieves k-th eigenvector from basis vectors V.
409: The argument eigi is the imaginary part of the corresponding eigenvalue.
410: */
411: static inline PetscErrorCode BV_GetEigenvector(BV V,PetscInt k,PetscScalar eigi,Vec Vr,Vec Vi)
412: {
413: PetscFunctionBegin;
414: #if defined(PETSC_USE_COMPLEX)
415: if (Vr) PetscCall(BVCopyVec(V,k,Vr));
416: if (Vi) PetscCall(VecSet(Vi,0.0));
417: #else
418: if (eigi > 0.0) { /* first value of conjugate pair */
419: if (Vr) PetscCall(BVCopyVec(V,k,Vr));
420: if (Vi) PetscCall(BVCopyVec(V,k+1,Vi));
421: } else if (eigi < 0.0) { /* second value of conjugate pair */
422: if (Vr) PetscCall(BVCopyVec(V,k-1,Vr));
423: if (Vi) {
424: PetscCall(BVCopyVec(V,k,Vi));
425: PetscCall(VecScale(Vi,-1.0));
426: }
427: } else { /* real eigenvalue */
428: if (Vr) PetscCall(BVCopyVec(V,k,Vr));
429: if (Vi) PetscCall(VecSet(Vi,0.0));
430: }
431: #endif
432: PetscFunctionReturn(PETSC_SUCCESS);
433: }
435: /*
436: BV_OrthogonalizeColumn_Safe - this is intended for cases where we know that
437: the resulting vector is going to be numerically zero, so normalization or
438: iterative refinement may cause problems in parallel (collective operation
439: not being called by all processes)
440: */
441: static inline PetscErrorCode BV_OrthogonalizeColumn_Safe(BV bv,PetscInt j,PetscScalar *H,PetscReal *norm,PetscBool *lindep)
442: {
443: BVOrthogRefineType orthog_ref;
445: PetscFunctionBegin;
446: PetscCall(PetscInfo(bv,"Orthogonalizing column %" PetscInt_FMT " without refinement\n",j));
447: orthog_ref = bv->orthog_ref;
448: bv->orthog_ref = BV_ORTHOG_REFINE_NEVER; /* avoid refinement */
449: PetscCall(BVOrthogonalizeColumn(bv,j,H,NULL,NULL));
450: bv->orthog_ref = orthog_ref; /* restore refinement setting */
451: if (norm) *norm = 0.0;
452: if (lindep) *lindep = PETSC_TRUE;
453: PetscFunctionReturn(PETSC_SUCCESS);
454: }
456: /*
457: BV_SetDefaultLD - set the default value of the leading dimension, based on
458: the local size.
459: */
460: static inline PetscErrorCode BV_SetDefaultLD(BV bv,PetscInt nloc)
461: {
462: size_t bytes,align;
464: PetscFunctionBegin;
465: if (bv->ld) { /* set by user */
466: PetscCheck(bv->ld>=nloc,PetscObjectComm((PetscObject)bv),PETSC_ERR_USER_INPUT,"The leading dimension %" PetscInt_FMT " should be larger or equal to the local number of rows %" PetscInt_FMT,bv->ld,nloc);
467: } else {
468: align = PetscMax(PETSC_MEMALIGN,16); /* assume that CUDA requires 16-byte alignment */
469: bytes = (nloc*sizeof(PetscScalar) + align - 1) & ~(align - 1);
470: bv->ld = bytes/sizeof(PetscScalar);
471: }
472: PetscFunctionReturn(PETSC_SUCCESS);
473: }
475: #if defined(PETSC_HAVE_CUDA)
476: /*
477: BV_MatDenseCUDAGetArrayRead - if Q is MATSEQDENSE it will allocate memory on the
478: GPU and copy the contents; otherwise, calls MatDenseCUDAGetArrayRead()
479: */
480: static inline PetscErrorCode BV_MatDenseCUDAGetArrayRead(BV bv,Mat Q,const PetscScalar **d_q)
481: {
482: const PetscScalar *q;
483: PetscInt ldq,mq;
484: PetscCuBLASInt ldq_=0;
485: PetscBool matiscuda;
487: PetscFunctionBegin;
488: (void)bv; // avoid unused parameter warning
489: PetscCall(MatGetSize(Q,NULL,&mq));
490: PetscCall(MatDenseGetLDA(Q,&ldq));
491: PetscCall(PetscCuBLASIntCast(ldq,&ldq_));
492: PetscCall(PetscObjectTypeCompare((PetscObject)Q,MATSEQDENSECUDA,&matiscuda));
493: if (matiscuda) PetscCall(MatDenseCUDAGetArrayRead(Q,d_q));
494: else {
495: PetscCall(MatDenseGetArrayRead(Q,&q));
496: PetscCallCUDA(cudaMalloc((void**)d_q,ldq*mq*sizeof(PetscScalar)));
497: PetscCallCUDA(cudaMemcpy((void*)*d_q,q,ldq*mq*sizeof(PetscScalar),cudaMemcpyHostToDevice));
498: PetscCall(PetscLogCpuToGpu(ldq*mq*sizeof(PetscScalar)));
499: }
500: PetscFunctionReturn(PETSC_SUCCESS);
501: }
503: /*
504: BV_MatDenseCUDARestoreArrayRead - restores the pointer obtained with BV_MatDenseCUDAGetArrayRead(),
505: freeing the GPU memory in case of MATSEQDENSE
506: */
507: static inline PetscErrorCode BV_MatDenseCUDARestoreArrayRead(BV bv,Mat Q,const PetscScalar **d_q)
508: {
509: PetscBool matiscuda;
511: PetscFunctionBegin;
512: (void)bv; // avoid unused parameter warning
513: PetscCall(PetscObjectTypeCompare((PetscObject)Q,MATSEQDENSECUDA,&matiscuda));
514: if (matiscuda) PetscCall(MatDenseCUDARestoreArrayRead(Q,d_q));
515: else {
516: PetscCall(MatDenseRestoreArrayRead(Q,NULL));
517: PetscCallCUDA(cudaFree((void*)*d_q));
518: *d_q = NULL;
519: }
520: PetscFunctionReturn(PETSC_SUCCESS);
521: }
523: SLEPC_INTERN PetscErrorCode BVMult_BLAS_CUDA(BV,PetscInt,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscInt,const PetscScalar*,PetscInt,PetscScalar,PetscScalar*,PetscInt);
524: SLEPC_INTERN PetscErrorCode BVMultVec_BLAS_CUDA(BV,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscInt,const PetscScalar*,PetscScalar,PetscScalar*);
525: SLEPC_INTERN PetscErrorCode BVMultInPlace_BLAS_CUDA(BV,PetscInt,PetscInt,PetscInt,PetscInt,PetscScalar*,PetscInt,const PetscScalar*,PetscInt,PetscBool);
526: SLEPC_INTERN PetscErrorCode BVAXPY_BLAS_CUDA(BV,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscInt,PetscScalar,PetscScalar*,PetscInt);
527: SLEPC_INTERN PetscErrorCode BVDot_BLAS_CUDA(BV,PetscInt,PetscInt,PetscInt,const PetscScalar*,PetscInt,const PetscScalar*,PetscInt,PetscScalar*,PetscInt,PetscBool);
528: SLEPC_INTERN PetscErrorCode BVDotVec_BLAS_CUDA(BV,PetscInt,PetscInt,const PetscScalar*,PetscInt,const PetscScalar*,PetscScalar*,PetscBool);
529: SLEPC_INTERN PetscErrorCode BVScale_BLAS_CUDA(BV,PetscInt,PetscScalar*,PetscScalar);
531: SLEPC_INTERN PetscErrorCode BV_ConjugateCUDAArray(PetscScalar*,PetscInt);
533: SLEPC_INTERN PetscErrorCode BV_CleanCoefficients_CUDA(BV,PetscInt,PetscScalar*);
534: SLEPC_INTERN PetscErrorCode BV_AddCoefficients_CUDA(BV,PetscInt,PetscScalar*,PetscScalar*);
535: SLEPC_INTERN PetscErrorCode BV_SetValue_CUDA(BV,PetscInt,PetscInt,PetscScalar*,PetscScalar);
536: SLEPC_INTERN PetscErrorCode BV_SquareSum_CUDA(BV,PetscInt,PetscScalar*,PetscReal*);
537: SLEPC_INTERN PetscErrorCode BV_ApplySignature_CUDA(BV,PetscInt,PetscScalar*,PetscBool);
538: SLEPC_INTERN PetscErrorCode BV_SquareRoot_CUDA(BV,PetscInt,PetscScalar*,PetscReal*);
539: SLEPC_INTERN PetscErrorCode BV_StoreCoefficients_CUDA(BV,PetscInt,PetscScalar*,PetscScalar*);
540: #define BV_CleanCoefficients(a,b,c) ((a)->cuda?BV_CleanCoefficients_CUDA:BV_CleanCoefficients_Default)((a),(b),(c))
541: #define BV_AddCoefficients(a,b,c,d) ((a)->cuda?BV_AddCoefficients_CUDA:BV_AddCoefficients_Default)((a),(b),(c),(d))
542: #define BV_SetValue(a,b,c,d,e) ((a)->cuda?BV_SetValue_CUDA:BV_SetValue_Default)((a),(b),(c),(d),(e))
543: #define BV_SquareSum(a,b,c,d) ((a)->cuda?BV_SquareSum_CUDA:BV_SquareSum_Default)((a),(b),(c),(d))
544: #define BV_ApplySignature(a,b,c,d) ((a)->cuda?BV_ApplySignature_CUDA:BV_ApplySignature_Default)((a),(b),(c),(d))
545: #define BV_SquareRoot(a,b,c,d) ((a)->cuda?BV_SquareRoot_CUDA:BV_SquareRoot_Default)((a),(b),(c),(d))
546: #define BV_StoreCoefficients(a,b,c,d) ((a)->cuda?BV_StoreCoefficients_CUDA:BV_StoreCoefficients_Default)((a),(b),(c),(d))
547: #else
548: #define BV_CleanCoefficients(a,b,c) BV_CleanCoefficients_Default((a),(b),(c))
549: #define BV_AddCoefficients(a,b,c,d) BV_AddCoefficients_Default((a),(b),(c),(d))
550: #define BV_SetValue(a,b,c,d,e) BV_SetValue_Default((a),(b),(c),(d),(e))
551: #define BV_SquareSum(a,b,c,d) BV_SquareSum_Default((a),(b),(c),(d))
552: #define BV_ApplySignature(a,b,c,d) BV_ApplySignature_Default((a),(b),(c),(d))
553: #define BV_SquareRoot(a,b,c,d) BV_SquareRoot_Default((a),(b),(c),(d))
554: #define BV_StoreCoefficients(a,b,c,d) BV_StoreCoefficients_Default((a),(b),(c),(d))
555: #endif /* PETSC_HAVE_CUDA */